import sys import hashlib import threading import time import html import re from cryptography.exceptions import InvalidTag import shutil import zipfile from datetime import datetime from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) from .parser import parse_journal_content, parse_journal_file from .database import get_db_connection, hydrate_database from .encryption import encrypt_data, decrypt_data from .csharp_sidecar import call_sidecar_action from .config import ( DATA_DIR, VAULT_DIR, MONTHLY_VAULT_FORMAT, BACKEND_MODE, ) _month_fingerprint_cache: dict[str, str] = {} _vault_io_lock = threading.RLock() def _using_csharp_hybrid() -> bool: return BACKEND_MODE == "csharp-hybrid" def _looks_like_rich_html(content: str) -> bool: lowered = content.lower() html_markers = ( "", "]*>", lowered)) >= 8 def _strip_rich_html(content: str) -> str: if not _looks_like_rich_html(content): return content text = content.replace("\r\n", "\n").replace("\r", "\n") text = re.sub(r"(?is)<(script|style)\b[^>]*>.*?", "", text) text = re.sub(r"(?i)", "\n", text) text = re.sub(r"(?i)", "\n", text) text = re.sub(r"(?i)]*>", "\n- ", text) text = re.sub(r"(?i)", "\n", text) text = re.sub(r"(?i)<(td|th)\b[^>]*>", " | ", text) text = re.sub(r"(?i)", " ", text) text = re.sub(r"(?i)]*>", "\n---\n", text) text = re.sub(r"(?is)<[^>]+>", "", text) text = html.unescape(text) text = text.replace("\u00a0", " ").replace("\u200b", "") text = "\n".join(line.rstrip() for line in text.splitlines()) text = re.sub(r"[ \t]{2,}", " ", text) text = re.sub(r"\n{3,}", "\n\n", text).strip() if text: return text return content # --- Monthly Vault Management --- def _get_monthly_vault_path(date: datetime) -> Path: """Returns the path for the monthly vault file.""" return VAULT_DIR / date.strftime(MONTHLY_VAULT_FORMAT) def _create_monthly_archive(month_path: Path, archive_path: Path): """Creates a zip archive of a temporary monthly directory.""" with zipfile.ZipFile(archive_path, "w", zipfile.ZIP_DEFLATED) as zipf: for file_path in month_path.iterdir(): zipf.write(file_path, arcname=file_path.name) # Store only filename in zip def _extract_monthly_archive(archive_path: Path, extract_to_path: Path): """Extracts a zip archive to a specified directory.""" with zipfile.ZipFile(archive_path, "r") as zipf: zipf.extractall(extract_to_path) # --- Public API for Journal Storage --- def _save_month(password: str, month_key: str, files_in_month: list[Path]): """Helper function to save a single month's vault.""" # We need a datetime object to generate the vault path, strptime is perfect. month_as_date = datetime.strptime(month_key, "%Y-%m") monthly_vault_path = _get_monthly_vault_path(month_as_date) # Create a temporary directory to stage files for zipping temp_month_dir = VAULT_DIR / f"temp_{month_key}" temp_month_dir.mkdir(exist_ok=True) temp_zip_path: Path | None = None try: for file_path in files_in_month: _ = shutil.copy(file_path, temp_month_dir) # Create a temporary zip archive temp_zip_path = VAULT_DIR / f"temp_{month_key}.zip" _create_monthly_archive(temp_month_dir, temp_zip_path) with open(temp_zip_path, "rb") as f_in: zip_content = f_in.read() encrypted_vault_content = encrypt_data(zip_content, password) with open(monthly_vault_path, "wb") as f_out: _ = f_out.write(encrypted_vault_content) _month_fingerprint_cache[month_key] = _compute_month_fingerprint(files_in_month) print(f"Successfully saved {monthly_vault_path.name}") except Exception as e: print(f"Error saving month {month_key}: {e}") finally: shutil.rmtree(temp_month_dir, ignore_errors=True) if temp_zip_path and temp_zip_path.exists(): temp_zip_path.unlink() def _compute_month_fingerprint(files: list[Path]) -> str: fingerprint = hashlib.sha256() for file_path in sorted(files, key=lambda p: p.name): try: stat = file_path.stat() except OSError: continue fingerprint.update(file_path.name.encode("utf-8")) fingerprint.update(str(stat.st_mtime_ns).encode("ascii")) fingerprint.update(str(stat.st_size).encode("ascii")) return fingerprint.hexdigest() def get_today_filename() -> Path: """Returns the path for today's journal entry in the active DATA_DIR.""" return DATA_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.md" def list_journal_files() -> list[tuple[str, str]]: """Lists decrypted markdown entries as (file_name, absolute_path).""" if _using_csharp_hybrid(): results = call_sidecar_action( "entries.list", payload={"dataDirectory": str(DATA_DIR)}, ) if not isinstance(results, list): return [] files: list[tuple[str, str]] = [] for item in results: if not isinstance(item, dict): continue name = item.get("FileName") or item.get("fileName") path = item.get("FilePath") or item.get("filePath") if isinstance(name, str) and isinstance(path, str): files.append((name, path)) return files files = sorted(DATA_DIR.glob("*.md")) return [(f.name, str(f)) for f in files] def load_entry_content(file_path: str | Path) -> str: """Loads one journal entry and returns the raw markdown content.""" normalized_path = str(file_path) if _using_csharp_hybrid(): data = call_sidecar_action( "entries.load", payload={"filePath": normalized_path}, ) if isinstance(data, str): return _strip_rich_html(data) if isinstance(data, dict): raw = data.get("RawContent") or data.get("rawContent") if isinstance(raw, str): return _strip_rich_html(raw) raise RuntimeError("Unexpected entries.load response shape from C# sidecar.") entry = parse_journal_file(normalized_path) return _strip_rich_html(entry.raw_content) def save_entry_content( content: str, file_path: Path | None = None, mode: str = "Daily" ): sanitized_content = _strip_rich_html(content) target_file = file_path or get_today_filename() target_file.parent.mkdir(parents=True, exist_ok=True) if _using_csharp_hybrid(): _ = call_sidecar_action( "entries.save", payload={ "content": sanitized_content, "filePath": str(target_file), "mode": mode, }, ) return if mode == "Overwrite": _ = target_file.write_text(sanitized_content, encoding="utf-8") return if mode == "Fragment": print(f"Appending fragment to {target_file.name}...") with open(target_file, "a", encoding="utf-8") as f: # Ensure there's a newline before the new content _ = f.write("\n\n" + sanitized_content.strip()) return # For Daily, Deep, etc., perform a merge if target_file.exists(): print(f"Merging content into existing file: {target_file.name}") existing_entry = parse_journal_file(str(target_file)) new_entry_data = parse_journal_content(sanitized_content, target_file.stem) existing_entry.merge_with(new_entry_data) final_content = existing_entry.to_markdown() else: print(f"Creating new entry: {target_file.name}") final_content = sanitized_content _ = target_file.write_text(final_content, encoding="utf-8") def load_all_vaults(password: str) -> bool: """ Decrypts and extracts all monthly vaults into the DATA_DIR. Cleans DATA_DIR before extraction. Returns True on success, False if password is incorrect for existing vaults. """ if not password: raise ValueError("Password cannot be empty.") with _vault_io_lock: _month_fingerprint_cache.clear() if _using_csharp_hybrid(): load_success = bool( call_sidecar_action( "vault.load_all", payload={ "password": password, "vaultDirectory": str(VAULT_DIR), "dataDirectory": str(DATA_DIR), }, ) ) if not load_success: return False try: _ = call_sidecar_action( "db.hydrate_workspace", payload={ "password": password, "dataDirectory": str(DATA_DIR), }, ) except Exception as e: print(f"Fatal error during C# workspace hydration: {e}") return False return True else: # Clear DATA_DIR first _clear_data_dir_with_retries() DATA_DIR.mkdir(parents=True, exist_ok=True) if not VAULT_DIR.exists() or not any(VAULT_DIR.iterdir()): print("Vault directory is empty or does not exist. Assuming new vault.") return True # No vaults to load, so it's a success (new vault) decryption_successful = False for vault_file in VAULT_DIR.glob("*.vault"): if vault_file.name == "_init_vault.vault": print(f"Deleting old dummy vault file: {vault_file.name}") vault_file.unlink() continue try: with open(vault_file, "rb") as f_in: encrypted_data = f_in.read() decrypted_zip_content = decrypt_data(encrypted_data, password) # Write decrypted content to a temporary zip file temp_zip_path = VAULT_DIR / f"temp_{vault_file.name}.zip" with open(temp_zip_path, "wb") as f_out: _ = f_out.write(decrypted_zip_content) _extract_monthly_archive(temp_zip_path, DATA_DIR) temp_zip_path.unlink() # Clean up temp zip decryption_successful = True print(f"Successfully loaded {vault_file.name}") print( f"Contents of DATA_DIR after loading {vault_file.name}: {list(DATA_DIR.iterdir())}" ) except InvalidTag: print( f"Warning: Could not decrypt '{vault_file.name}'. Invalid password for this file." ) # Do not set decryption_successful to True if only some files fail except Exception as e: print(f"Error loading vault '{vault_file.name}': {e}") # If any other error occurs, it's not necessarily a password issue if not decryption_successful and any(VAULT_DIR.iterdir()): # If there are vault files, but none could be decrypted, password is wrong print("Error: No vault files could be decrypted with the provided password.") return False # --- Database Hydration (Python mode only) --- # After successfully decrypting files, hydrate the live, encrypted database. conn = None try: all_entries = [parse_journal_file(str(f)) for f in DATA_DIR.glob("*.md")] if all_entries: conn = get_db_connection(password) hydrate_database(conn, all_entries) except Exception as e: print(f"Fatal error during database hydration: {e}") return False # Treat DB hydration failure as a critical error finally: if conn is not None: conn.close() return True def rebuild_all_vaults(password: str): """ Rebuilds all monthly vaults from the files in the DATA_DIR. This is a comprehensive but slower operation, intended for use on shutdown or via the CLI to ensure all changes, including to older entries, are persisted. It iterates through all decrypted files and saves them to their respective monthly vaults. """ print("rebuild_all_vaults called.") if not password: raise ValueError("Password cannot be empty.") if _using_csharp_hybrid(): _ = call_sidecar_action( "vault.rebuild_all", payload={ "password": password, "vaultDirectory": str(VAULT_DIR), "dataDirectory": str(DATA_DIR), }, ) return with _vault_io_lock: # Group files by month monthly_files: dict[str, list[Path]] = {} for file_path in DATA_DIR.glob("*.md"): try: file_date = datetime.strptime(file_path.stem, "%Y-%m-%d") month_key = file_date.strftime("%Y-%m") if month_key not in monthly_files: monthly_files[month_key] = [] monthly_files[month_key].append(file_path) except ValueError: # Skip files that don't match YYYY-MM-DD format print(f"Skipping non-journal file in DATA_DIR: {file_path.name}") continue # Ensure VAULT_DIR exists VAULT_DIR.mkdir(parents=True, exist_ok=True) for month_key, files_in_month in monthly_files.items(): _save_month(password, month_key, files_in_month) def save_current_month_vault(password: str): """ Optimized save function that only rebuilds the current month's vault. This is used for frequent, in-session saves from the UI to provide better performance, as it only operates on the files for the current month. """ print("save_current_month_vault called.") if not password: raise ValueError("Password cannot be empty.") if _using_csharp_hybrid(): _ = call_sidecar_action( "vault.save_current_month", payload={ "password": password, "vaultDirectory": str(VAULT_DIR), "dataDirectory": str(DATA_DIR), "nowUtc": datetime.utcnow().isoformat() + "Z", }, ) return with _vault_io_lock: # Determine current month now = datetime.now() month_key = now.strftime("%Y-%m") # Collect files for the current month files_in_month: list[Path] = [] for file_path in DATA_DIR.glob("*.md"): if file_path.stem.startswith(month_key): files_in_month.append(file_path) if not files_in_month: print(f"No files found for the current month ({month_key}) to save.") return current_fingerprint = _compute_month_fingerprint(files_in_month) cached_fingerprint = _month_fingerprint_cache.get(month_key) if cached_fingerprint == current_fingerprint: print(f"Skipping vault save for {month_key}; no file changes detected.") return _save_month(password, month_key, files_in_month) def initialize_vault(password: str): """ Ensures the VAULT_DIR exists. The first save operation will create the initial vault files. """ if not password: raise ValueError("Password cannot be empty.") if _using_csharp_hybrid(): _ = call_sidecar_action( "vault.initialize", payload={ "password": password, "vaultDirectory": str(VAULT_DIR), }, ) return VAULT_DIR.mkdir(parents=True, exist_ok=True) print("Vault directory ensured to exist.") def clear_data_directory(): """ Clears the DATA_DIR. This should only be called on application shutdown. """ print("Clearing DATA_DIR...") if _using_csharp_hybrid(): _ = call_sidecar_action( "vault.clear_data_directory", payload={"dataDirectory": str(DATA_DIR)}, ) print("DATA_DIR cleared.") return with _vault_io_lock: # The encrypted database file lives in DATA_DIR, so this function # will securely delete it along with all the decrypted .md files. _clear_data_dir_with_retries() DATA_DIR.mkdir(parents=True, exist_ok=True) _month_fingerprint_cache.clear() print("DATA_DIR cleared.") def _clear_data_dir_with_retries(retries: int = 5, delay_seconds: float = 0.2) -> None: if not DATA_DIR.exists(): return for attempt in range(retries): try: shutil.rmtree(DATA_DIR) return except PermissionError: if attempt == retries - 1: raise time.sleep(delay_seconds)