import os import re import subprocess from datetime import datetime from typing import Dict, Any, List, Optional, Tuple class GitAnalyzer: """Analyze Git repository for project status information.""" def __init__(self, repo_path: str = "."): """Initialize with repository path.""" self.repo_path = repo_path def _run_git_command(self, command: List[str]) -> str: """Run a Git command and return the output.""" try: result = subprocess.run( ["git"] + command, cwd=self.repo_path, capture_output=True, text=True, check=True, ) return result.stdout.strip() except subprocess.CalledProcessError: return "" def get_repo_info(self) -> Dict[str, Any]: """Get basic repository information.""" info = {} # Get remote URL remote_url = self._run_git_command(["remote", "get-url", "origin"]) info["remote_url"] = remote_url # Get current branch branch = self._run_git_command(["branch", "--show-current"]) info["branch"] = branch # Get last commit last_commit = self._run_git_command( ["log", "-1", "--pretty=format:%h|%s|%ad|%an", "--date=short"] ) if last_commit: parts = last_commit.split("|") if len(parts) >= 4: info["last_commit"] = { "hash": parts[0], "message": parts[1], "date": parts[2], "author": parts[3], } return info def analyze_commits( self, limit: int = 100, untagged_limit: int = 50 ) -> Dict[str, Any]: """Analyze Git commits for feature updates, changelog entries, etc.""" result = { "feature_updates": {}, "new_features": {}, "changelog_entries": [], "fixes": [], "issues": [], "untagged_commits": [], "milestones": {}, "roadmap_items": {}, } # Regular expressions for parsing commit messages status_re = re.compile(r"\[status:(\w+)\]") feature_re = re.compile(r"\[feature:(\w+)\]") new_feature_re = re.compile(r"\[new-feature:(\w+):(.+?)\]") changelog_re = re.compile(r"\[changelog:(.+?)\]") fix_re = re.compile(r"\[fix:(.+?)\]") issue_re = re.compile(r"\[issue:(.+?)\]") milestone_re = re.compile(r"\[milestone:(\w+)\]") roadmap_re = re.compile(r"\[roadmap:(\w+):(.+?)\]") # Get Git log git_log = self._run_git_command( [ "log", f"-n{limit}", "--pretty=format:%h|%s|%ad|%an", "--date=short", ] ) untagged_count = 0 # Parse commits for line in git_log.splitlines(): parts = line.split("|") if len(parts) < 4: continue commit_hash, subject, date, author = parts # Check for tags status_match = status_re.search(subject) feature_match = feature_re.search(subject) new_feature_match = new_feature_re.search(subject) changelog_match = changelog_re.search(subject) fix_match = fix_re.search(subject) issue_match = issue_re.search(subject) milestone_match = milestone_re.search(subject) roadmap_match = roadmap_re.search(subject) # Process status updates if status_match: status_key = status_match.group(1) result["feature_updates"][status_key] = { "status": "completed", "date": date, "author": author, } # Process feature updates if feature_match: feature_key = feature_match.group(1) result["feature_updates"][feature_key] = { "status": "completed", "date": date, "author": author, } # Process new features if new_feature_match: feature_key = new_feature_match.group(1) feature_desc = new_feature_match.group(2) result["new_features"][feature_key] = { "status": "planned", "description": feature_desc, "date": date, "author": author, } # Process changelog entries if changelog_match: result["changelog_entries"].append( { "message": changelog_match.group(1), "date": date, "commit": commit_hash, "author": author, } ) # Process fixes if fix_match: result["fixes"].append( { "message": fix_match.group(1), "date": date, "commit": commit_hash, "author": author, } ) # Process issues if issue_match: result["issues"].append( { "message": issue_match.group(1), "date": date, "commit": commit_hash, "author": author, } ) # Process milestones if milestone_match: milestone_key = milestone_match.group(1) if milestone_key not in result["milestones"]: result["milestones"][milestone_key] = { "first_date": date, "last_date": date, "commits": [], } else: result["milestones"][milestone_key]["last_date"] = date result["milestones"][milestone_key]["commits"].append( commit_hash ) # Process roadmap items if roadmap_match: milestone_key = roadmap_match.group(1) item_desc = roadmap_match.group(2) if milestone_key not in result["roadmap_items"]: result["roadmap_items"][milestone_key] = [] result["roadmap_items"][milestone_key].append( { "description": item_desc, "date": date, "commit": commit_hash, "author": author, } ) # Process untagged commits if not any( [ status_match, feature_match, new_feature_match, changelog_match, fix_match, issue_match, milestone_match, roadmap_match, ] ): if untagged_count < untagged_limit: # Skip merge commits and very short messages if not subject.startswith("Merge ") and len(subject) > 5: # Extract the first sentence or up to 100 chars commit_desc = subject.split(".")[0] if len(commit_desc) > 100: commit_desc = commit_desc[:97] + "..." result["untagged_commits"].append( { "message": commit_desc, "date": date, "commit": commit_hash, "author": author, } ) untagged_count += 1 return result