AdvChkSys/scripts/track_progress.py

import os
import re
import sys
from datetime import datetime
from subprocess import check_output

# Try to set UTF-8 encoding for better emoji support
sys.stderr = open(
    sys.stderr.fileno(), mode="w", encoding="utf-8", errors="replace"
)  # Configuration
PROGRESS_FILE = "docs/progress.json"
STATUS_DOC = "docs/status/ChunkManager-Status.md"
CHANGELOG_FILE = "CHANGELOG.md"
FEATURES_FILE = "docs/features.md"
GIT_LOG_LIMIT = 50  # number of commits to parse

# Regular expression to match tags in commit messages
STATUS_TAG_RE = re.compile(r"\[status:(\w+)\]")
CHANGELOG_TAG_RE = re.compile(r"\[changelog:(.+?)\]")
FEATURE_TAG_RE = re.compile(r"\[feature:(\w+)\]")
NEW_FEATURE_RE = re.compile(r"\[new-feature:(\w+):(.+?)\]")


# Ensure directories exist
os.makedirs(os.path.dirname(STATUS_DOC), exist_ok=True)
os.makedirs(os.path.dirname(FEATURES_FILE), exist_ok=True)


def count_lines_of_code():
    """Count lines of code in the project and return statistics."""
    import os
    import glob

    stats = {}
    total_lines = 0
    file_stats = []

    # Find all .cs files in the src/AdvChkSys directory and subdirectories
    cs_files = glob.glob("src/AdvChkSys/**/*.cs", recursive=True)

    for file_path in cs_files:
        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
            lines = len(f.readlines())
            total_lines += lines
            file_name = os.path.basename(file_path)
            file_stats.append((file_name, lines, file_path))

    # Sort by line count in descending order
    file_stats.sort(key=lambda x: x[1], reverse=True)

    stats["total_lines"] = total_lines
    stats["file_count"] = len(cs_files)
    stats["files"] = file_stats

    return stats


# Load features from Markdown file if it exists
features = {}
if os.path.exists(FEATURES_FILE):
    with open(FEATURES_FILE, "r", encoding="utf-8") as f:
        content = f.read()
        # Parse the markdown file
        sections = re.split(r"## (\w+)", content)[1:]  # Skip the header
        for i in range(0, len(sections), 2):
            if i + 1 < len(sections):
                feature_key = sections[i]
                feature_content = sections[i + 1]

                # Extract status, description, and date
                status_match = re.search(r"- Status: (\w+)", feature_content)
                desc_match = re.search(
                    r"- Description: (.+?)$", feature_content, re.MULTILINE
                )
                date_match = re.search(
                    r"- Last Update: (.+?)$", feature_content, re.MULTILINE
                )

                status = status_match.group(1) if status_match else "planned"
                description = desc_match.group(1) if desc_match else ""
                date = date_match.group(1) if date_match else ""

                features[feature_key] = {
                    "status": status,
                    "description": description,
                    "date": date,
                }
else:
    # Default features
    features = {
        "spatial_queries": {
            "status": "planned",
            "description": "Methods to efficiently find chunks within regions or distances",  # noqa: E501
        },
        "priority_loading": {
            "status": "planned",
            "description": "API to specify which chunks should be loaded first",  # noqa: E501
        },
        "serialization_optimization": {
            "status": "in_progress",
            "description": "Further improvements to chunk saving/loading",
        },
        "chunk_dependency": {
            "status": "planned",
            "description": "For cases where chunks need to reference neighbors",  # noqa: E501
        },
    }

# Get recent git log entries
git_log = check_output(
    [
        "git",
        "log",
        f"-n{GIT_LOG_LIMIT}",
        "--pretty=format:%h|%s|%ad|%an",
        "--date=short",
    ]
).decode()

# Parse and collect updates
status_updates = {}
changelog_entries = []
feature_updates = {}
new_features = {}

for line in git_log.splitlines():
    parts = line.split("|")
    if len(parts) < 4:
        continue

    commit_hash, subject, date, author = parts

    status_match = STATUS_TAG_RE.search(subject)
    changelog_match = CHANGELOG_TAG_RE.search(subject)
    feature_match = FEATURE_TAG_RE.search(subject)
    new_feature_match = NEW_FEATURE_RE.search(subject)

    if status_match:
        status_key = status_match.group(1)
        status_updates[status_key] = {
            "status": "done",
            "commit": commit_hash,
            "date": date,
            "message": subject,
        }

    if changelog_match:
        changelog_entries.append(
            f"- {date}: {changelog_match.group(1)} ({commit_hash})"
        )

    if feature_match:
        feature_key = feature_match.group(1)
        if feature_key in features:
            feature_updates[feature_key] = {
                "status": "completed",
                "date": date,
                "author": author,
            }

    if new_feature_match:
        feature_key = new_feature_match.group(1)
        feature_desc = new_feature_match.group(2)
        if feature_key not in features:
            new_features[feature_key] = {
                "status": "planned",
                "description": feature_desc,
                "date": date,
                "author": author,
            }

# Update features with new information
for feature_key, update in feature_updates.items():
    features[feature_key].update(update)

# Add new features
for feature_key, feature_data in new_features.items():
    features[feature_key] = feature_data

# Save features to Markdown file
with open(FEATURES_FILE, "w", encoding="utf-8") as f:
    f.write("# Feature Tracking\n\n")

    for feature_key, feature_data in features.items():
        status = feature_data.get("status", "planned")
        description = feature_data.get("description", "")
        date = feature_data.get("date", "")
        author = feature_data.get("author", "")

        f.write(f"## {feature_key}\n")
        f.write(f"- Status: {status}\n")
        f.write(f"- Description: {description}\n")
        f.write(f"- Last Update: {date}\n")
        if author:
            f.write(f"- Updated By: {author}\n")
        f.write("\n")

# Generate status document
with open(STATUS_DOC, "w", encoding="utf-8") as f:
    f.write("# AdvChkSys Development Status\n\n")
    f.write(f"Last updated: {datetime.now().strftime('%Y-%m-%d')}\n\n")

    # Add code statistics section
    f.write("## Code Statistics\n\n")
    code_stats = count_lines_of_code()
    f.write(f"Total lines of code: **{code_stats['total_lines']}**\n\n")
    f.write(f"Number of source files: **{code_stats['file_count']}**\n\n")

    f.write("### All Files by Line Count\n\n")
    f.write("| File | Lines | Path |\n")
    f.write("|------|------:|------|\n")

    for file_name, lines, file_path in code_stats["files"]:
        f.write(f"| {file_name} | {lines} | {file_path} |\n")

    f.write("\n")

    f.write("## Feature Status\n\n")
    f.write("| Feature | Status | Description | Last Update |\n")
    f.write("|---------|--------|-------------|-------------|\n")

    for feature_key, feature_data in features.items():
        status = feature_data.get("status", "unknown")
        description = feature_data.get("description", "")
        date = feature_data.get("date", "")

        # Use text indicators instead of emojis
        status_indicator = {
            "planned": "[PLANNED]",
            "in_progress": "[IN PROGRESS]",
            "completed": "[COMPLETED]",
            "done": "[DONE]",
        }.get(status, "[UNKNOWN]")

        f.write(
            f"| {feature_key.replace('_', ' ').title()} | {status_indicator} | {description} | {date} |\n"  # noqa: E501
        )

    f.write("\n## Recent Updates\n\n")
    for entry in changelog_entries[
        :10
    ]:  # Show only the 10 most recent entries
        f.write(f"{entry}\n")

# Update changelog if there are new entries
if changelog_entries and os.path.exists(CHANGELOG_FILE):
    with open(CHANGELOG_FILE, "r", encoding="utf-8") as f:
        existing_changelog = f.read()

    with open(CHANGELOG_FILE, "w", encoding="utf-8") as f:
        # Add new entries at the top, under the first heading
        lines = existing_changelog.splitlines()
        insertion_point = next(
            (i for i, line in enumerate(lines) if line.startswith("##")), 2
        )

        updated_changelog = "\n".join(lines[:insertion_point]) + "\n\n"
        updated_changelog += "\n".join(changelog_entries) + "\n\n"
        updated_changelog += "\n".join(lines[insertion_point:])

        f.write(updated_changelog)
elif changelog_entries:
    # Create new changelog file
    with open(CHANGELOG_FILE, "w", encoding="utf-8") as f:
        f.write("# Changelog\n\n")
        f.write("\n".join(changelog_entries))
        f.write("\n")

print(f"Progress tracking updated. Status document generated at {STATUS_DOC}")
print(f"Features updated at {FEATURES_FILE}")