import os import glob from typing import Dict, Any, List, Optional def count_lines(file_path: str) -> int: """Count lines in a file.""" try: with open(file_path, "r", encoding="utf-8", errors="ignore") as f: return len(f.readlines()) except Exception: return 0 def get_language(file_path: str) -> str: """Determine language from file extension.""" ext = os.path.splitext(file_path)[1].lower() language_map = { ".py": "Python", ".cs": "C#", ".js": "JavaScript", ".ts": "TypeScript", ".html": "HTML", ".css": "CSS", ".md": "Markdown", ".json": "JSON", ".xml": "XML", ".java": "Java", ".cpp": "C++", ".c": "C", ".h": "C/C++ Header", ".go": "Go", ".rs": "Rust", ".php": "PHP", ".rb": "Ruby", ".sh": "Shell", ".bat": "Batch", ".ps1": "PowerShell", ".sql": "SQL", ".yaml": "YAML", ".yml": "YAML", } return language_map.get(ext, "Other") def analyze_code_stats( root_dir: str = ".", exclude_dirs: List[str] = None, source_extensions: List[str] = None, top_files_limit: int = 20, ) -> Dict[str, Any]: """Analyze code statistics for the project.""" if exclude_dirs is None: exclude_dirs = ["node_modules", "venv", ".git", ".vs", "bin", "obj"] if source_extensions is None: source_extensions = [ ".py", ".cs", ".js", ".ts", ".html", ".css", ".md", ] stats = { "total_lines": 0, "file_count": 0, "languages": {}, "top_files": [], } file_stats = [] # Convert exclude_dirs to absolute paths exclude_paths = [os.path.join(root_dir, d) for d in exclude_dirs] # Find all source files for ext in source_extensions: pattern = os.path.join(root_dir, "**", f"*{ext}") for file_path in glob.glob(pattern, recursive=True): # Skip excluded directories if any( file_path.startswith(exclude_path) for exclude_path in exclude_paths ): continue # Count lines lines = count_lines(file_path) # Get language language = get_language(file_path) # Update statistics stats["total_lines"] += lines stats["file_count"] += 1 # Update language count if language not in stats["languages"]: stats["languages"][language] = 0 stats["languages"][language] += 1 # Add to file stats file_stats.append( { "path": file_path, "name": os.path.basename(file_path), "lines": lines, "language": language, } ) # Sort file stats by line count file_stats.sort(key=lambda x: x["lines"], reverse=True) # Get top files stats["top_files"] = file_stats[:top_files_limit] return stats