493 lines
17 KiB
Python
493 lines
17 KiB
Python
import sys
|
|
import hashlib
|
|
import threading
|
|
import time
|
|
import html
|
|
import re
|
|
from cryptography.exceptions import InvalidTag
|
|
import shutil
|
|
import zipfile
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
|
|
from .parser import parse_journal_content, parse_journal_file
|
|
from .database import get_db_connection, hydrate_database
|
|
from .encryption import encrypt_data, decrypt_data
|
|
from .csharp_sidecar import call_sidecar_action
|
|
|
|
from .config import (
|
|
DATA_DIR,
|
|
VAULT_DIR,
|
|
MONTHLY_VAULT_FORMAT,
|
|
BACKEND_MODE,
|
|
)
|
|
|
|
_month_fingerprint_cache: dict[str, str] = {}
|
|
_vault_io_lock = threading.RLock()
|
|
|
|
|
|
def _using_csharp_hybrid() -> bool:
|
|
return BACKEND_MODE == "csharp-hybrid"
|
|
|
|
|
|
def _looks_like_rich_html(content: str) -> bool:
|
|
lowered = content.lower()
|
|
html_markers = (
|
|
"<p",
|
|
"</p>",
|
|
"<div",
|
|
"<span",
|
|
"<table",
|
|
"<tr",
|
|
"<td",
|
|
"<li",
|
|
"<ul",
|
|
"<ol",
|
|
"style=",
|
|
"font-family:",
|
|
"-webkit-text-stroke",
|
|
)
|
|
if any(marker in lowered for marker in html_markers):
|
|
return True
|
|
return len(re.findall(r"</?[a-z][^>]*>", lowered)) >= 8
|
|
|
|
|
|
def _strip_rich_html(content: str) -> str:
|
|
if not _looks_like_rich_html(content):
|
|
return content
|
|
|
|
text = content.replace("\r\n", "\n").replace("\r", "\n")
|
|
text = re.sub(r"(?is)<(script|style)\b[^>]*>.*?</\1>", "", text)
|
|
text = re.sub(r"(?i)<br\s*/?>", "\n", text)
|
|
text = re.sub(r"(?i)</(p|div|h[1-6]|tr|table|ul|ol|blockquote)>", "\n", text)
|
|
text = re.sub(r"(?i)<li\b[^>]*>", "\n- ", text)
|
|
text = re.sub(r"(?i)</li>", "\n", text)
|
|
text = re.sub(r"(?i)<(td|th)\b[^>]*>", " | ", text)
|
|
text = re.sub(r"(?i)</(td|th)>", " ", text)
|
|
text = re.sub(r"(?i)<hr\b[^>]*>", "\n---\n", text)
|
|
text = re.sub(r"(?is)<[^>]+>", "", text)
|
|
text = html.unescape(text)
|
|
text = text.replace("\u00a0", " ").replace("\u200b", "")
|
|
text = "\n".join(line.rstrip() for line in text.splitlines())
|
|
text = re.sub(r"[ \t]{2,}", " ", text)
|
|
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
|
|
|
if text:
|
|
return text
|
|
return content
|
|
|
|
|
|
# --- Monthly Vault Management ---
|
|
|
|
|
|
def _get_monthly_vault_path(date: datetime) -> Path:
|
|
"""Returns the path for the monthly vault file."""
|
|
return VAULT_DIR / date.strftime(MONTHLY_VAULT_FORMAT)
|
|
|
|
|
|
def _create_monthly_archive(month_path: Path, archive_path: Path):
|
|
"""Creates a zip archive of a temporary monthly directory."""
|
|
with zipfile.ZipFile(archive_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
|
for file_path in month_path.iterdir():
|
|
zipf.write(file_path, arcname=file_path.name) # Store only filename in zip
|
|
|
|
|
|
def _extract_monthly_archive(archive_path: Path, extract_to_path: Path):
|
|
"""Extracts a zip archive to a specified directory."""
|
|
with zipfile.ZipFile(archive_path, "r") as zipf:
|
|
zipf.extractall(extract_to_path)
|
|
|
|
|
|
# --- Public API for Journal Storage ---
|
|
|
|
|
|
def _save_month(password: str, month_key: str, files_in_month: list[Path]):
|
|
"""Helper function to save a single month's vault."""
|
|
# We need a datetime object to generate the vault path, strptime is perfect.
|
|
month_as_date = datetime.strptime(month_key, "%Y-%m")
|
|
monthly_vault_path = _get_monthly_vault_path(month_as_date)
|
|
|
|
# Create a temporary directory to stage files for zipping
|
|
temp_month_dir = VAULT_DIR / f"temp_{month_key}"
|
|
temp_month_dir.mkdir(exist_ok=True)
|
|
temp_zip_path: Path | None = None
|
|
|
|
try:
|
|
for file_path in files_in_month:
|
|
_ = shutil.copy(file_path, temp_month_dir)
|
|
|
|
# Create a temporary zip archive
|
|
temp_zip_path = VAULT_DIR / f"temp_{month_key}.zip"
|
|
_create_monthly_archive(temp_month_dir, temp_zip_path)
|
|
|
|
with open(temp_zip_path, "rb") as f_in:
|
|
zip_content = f_in.read()
|
|
|
|
encrypted_vault_content = encrypt_data(zip_content, password)
|
|
with open(monthly_vault_path, "wb") as f_out:
|
|
_ = f_out.write(encrypted_vault_content)
|
|
_month_fingerprint_cache[month_key] = _compute_month_fingerprint(files_in_month)
|
|
print(f"Successfully saved {monthly_vault_path.name}")
|
|
except Exception as e:
|
|
print(f"Error saving month {month_key}: {e}")
|
|
finally:
|
|
shutil.rmtree(temp_month_dir, ignore_errors=True)
|
|
if temp_zip_path and temp_zip_path.exists():
|
|
temp_zip_path.unlink()
|
|
|
|
|
|
def _compute_month_fingerprint(files: list[Path]) -> str:
|
|
fingerprint = hashlib.sha256()
|
|
for file_path in sorted(files, key=lambda p: p.name):
|
|
try:
|
|
stat = file_path.stat()
|
|
except OSError:
|
|
continue
|
|
fingerprint.update(file_path.name.encode("utf-8"))
|
|
fingerprint.update(str(stat.st_mtime_ns).encode("ascii"))
|
|
fingerprint.update(str(stat.st_size).encode("ascii"))
|
|
return fingerprint.hexdigest()
|
|
|
|
|
|
def get_today_filename() -> Path:
|
|
"""Returns the path for today's journal entry in the active DATA_DIR."""
|
|
return DATA_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.md"
|
|
|
|
|
|
def list_journal_files() -> list[tuple[str, str]]:
|
|
"""Lists decrypted markdown entries as (file_name, absolute_path)."""
|
|
if _using_csharp_hybrid():
|
|
results = call_sidecar_action(
|
|
"entries.list",
|
|
payload={"dataDirectory": str(DATA_DIR)},
|
|
)
|
|
if not isinstance(results, list):
|
|
return []
|
|
|
|
files: list[tuple[str, str]] = []
|
|
for item in results:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
name = item.get("FileName") or item.get("fileName")
|
|
path = item.get("FilePath") or item.get("filePath")
|
|
if isinstance(name, str) and isinstance(path, str):
|
|
files.append((name, path))
|
|
return files
|
|
|
|
files = sorted(DATA_DIR.glob("*.md"))
|
|
return [(f.name, str(f)) for f in files]
|
|
|
|
|
|
def load_entry_content(file_path: str | Path) -> str:
|
|
"""Loads one journal entry and returns the raw markdown content."""
|
|
normalized_path = str(file_path)
|
|
if _using_csharp_hybrid():
|
|
data = call_sidecar_action(
|
|
"entries.load",
|
|
payload={"filePath": normalized_path},
|
|
)
|
|
if isinstance(data, str):
|
|
return _strip_rich_html(data)
|
|
if isinstance(data, dict):
|
|
raw = data.get("RawContent") or data.get("rawContent")
|
|
if isinstance(raw, str):
|
|
return _strip_rich_html(raw)
|
|
raise RuntimeError("Unexpected entries.load response shape from C# sidecar.")
|
|
|
|
entry = parse_journal_file(normalized_path)
|
|
return _strip_rich_html(entry.raw_content)
|
|
|
|
|
|
def save_entry_content(
|
|
content: str, file_path: Path | None = None, mode: str = "Daily"
|
|
):
|
|
sanitized_content = _strip_rich_html(content)
|
|
target_file = file_path or get_today_filename()
|
|
target_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
if _using_csharp_hybrid():
|
|
_ = call_sidecar_action(
|
|
"entries.save",
|
|
payload={
|
|
"content": sanitized_content,
|
|
"filePath": str(target_file),
|
|
"mode": mode,
|
|
},
|
|
)
|
|
return
|
|
|
|
if mode == "Overwrite":
|
|
_ = target_file.write_text(sanitized_content, encoding="utf-8")
|
|
return
|
|
|
|
if mode == "Fragment":
|
|
print(f"Appending fragment to {target_file.name}...")
|
|
with open(target_file, "a", encoding="utf-8") as f:
|
|
# Ensure there's a newline before the new content
|
|
_ = f.write("\n\n" + sanitized_content.strip())
|
|
return
|
|
|
|
# For Daily, Deep, etc., perform a merge
|
|
if target_file.exists():
|
|
print(f"Merging content into existing file: {target_file.name}")
|
|
existing_entry = parse_journal_file(str(target_file))
|
|
new_entry_data = parse_journal_content(sanitized_content, target_file.stem)
|
|
existing_entry.merge_with(new_entry_data)
|
|
final_content = existing_entry.to_markdown()
|
|
else:
|
|
print(f"Creating new entry: {target_file.name}")
|
|
final_content = sanitized_content
|
|
|
|
_ = target_file.write_text(final_content, encoding="utf-8")
|
|
|
|
|
|
def load_all_vaults(password: str) -> bool:
|
|
"""
|
|
Decrypts and extracts all monthly vaults into the DATA_DIR.
|
|
Cleans DATA_DIR before extraction.
|
|
Returns True on success, False if password is incorrect for existing vaults.
|
|
"""
|
|
if not password:
|
|
raise ValueError("Password cannot be empty.")
|
|
|
|
with _vault_io_lock:
|
|
_month_fingerprint_cache.clear()
|
|
|
|
if _using_csharp_hybrid():
|
|
load_success = bool(
|
|
call_sidecar_action(
|
|
"vault.load_all",
|
|
payload={
|
|
"password": password,
|
|
"vaultDirectory": str(VAULT_DIR),
|
|
"dataDirectory": str(DATA_DIR),
|
|
},
|
|
)
|
|
)
|
|
if not load_success:
|
|
return False
|
|
try:
|
|
_ = call_sidecar_action(
|
|
"db.hydrate_workspace",
|
|
payload={
|
|
"password": password,
|
|
"dataDirectory": str(DATA_DIR),
|
|
},
|
|
)
|
|
except Exception as e:
|
|
print(f"Fatal error during C# workspace hydration: {e}")
|
|
return False
|
|
return True
|
|
else:
|
|
# Clear DATA_DIR first
|
|
_clear_data_dir_with_retries()
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
if not VAULT_DIR.exists() or not any(VAULT_DIR.iterdir()):
|
|
print("Vault directory is empty or does not exist. Assuming new vault.")
|
|
return True # No vaults to load, so it's a success (new vault)
|
|
|
|
decryption_successful = False
|
|
for vault_file in VAULT_DIR.glob("*.vault"):
|
|
if vault_file.name == "_init_vault.vault":
|
|
print(f"Deleting old dummy vault file: {vault_file.name}")
|
|
vault_file.unlink()
|
|
continue
|
|
try:
|
|
with open(vault_file, "rb") as f_in:
|
|
encrypted_data = f_in.read()
|
|
|
|
decrypted_zip_content = decrypt_data(encrypted_data, password)
|
|
|
|
# Write decrypted content to a temporary zip file
|
|
temp_zip_path = VAULT_DIR / f"temp_{vault_file.name}.zip"
|
|
with open(temp_zip_path, "wb") as f_out:
|
|
_ = f_out.write(decrypted_zip_content)
|
|
|
|
_extract_monthly_archive(temp_zip_path, DATA_DIR)
|
|
temp_zip_path.unlink() # Clean up temp zip
|
|
decryption_successful = True
|
|
print(f"Successfully loaded {vault_file.name}")
|
|
print(
|
|
f"Contents of DATA_DIR after loading {vault_file.name}: {list(DATA_DIR.iterdir())}"
|
|
)
|
|
except InvalidTag:
|
|
print(
|
|
f"Warning: Could not decrypt '{vault_file.name}'. Invalid password for this file."
|
|
)
|
|
# Do not set decryption_successful to True if only some files fail
|
|
except Exception as e:
|
|
print(f"Error loading vault '{vault_file.name}': {e}")
|
|
# If any other error occurs, it's not necessarily a password issue
|
|
|
|
if not decryption_successful and any(VAULT_DIR.iterdir()):
|
|
# If there are vault files, but none could be decrypted, password is wrong
|
|
print("Error: No vault files could be decrypted with the provided password.")
|
|
return False
|
|
|
|
# --- Database Hydration (Python mode only) ---
|
|
# After successfully decrypting files, hydrate the live, encrypted database.
|
|
conn = None
|
|
try:
|
|
all_entries = [parse_journal_file(str(f)) for f in DATA_DIR.glob("*.md")]
|
|
if all_entries:
|
|
conn = get_db_connection(password)
|
|
hydrate_database(conn, all_entries)
|
|
except Exception as e:
|
|
print(f"Fatal error during database hydration: {e}")
|
|
return False # Treat DB hydration failure as a critical error
|
|
finally:
|
|
if conn is not None:
|
|
conn.close()
|
|
|
|
return True
|
|
|
|
|
|
def rebuild_all_vaults(password: str):
|
|
"""
|
|
Rebuilds all monthly vaults from the files in the DATA_DIR.
|
|
|
|
This is a comprehensive but slower operation, intended for use on shutdown
|
|
or via the CLI to ensure all changes, including to older entries, are
|
|
persisted. It iterates through all decrypted files and saves them to their
|
|
respective monthly vaults.
|
|
"""
|
|
print("rebuild_all_vaults called.")
|
|
if not password:
|
|
raise ValueError("Password cannot be empty.")
|
|
|
|
if _using_csharp_hybrid():
|
|
_ = call_sidecar_action(
|
|
"vault.rebuild_all",
|
|
payload={
|
|
"password": password,
|
|
"vaultDirectory": str(VAULT_DIR),
|
|
"dataDirectory": str(DATA_DIR),
|
|
},
|
|
)
|
|
return
|
|
|
|
with _vault_io_lock:
|
|
# Group files by month
|
|
monthly_files: dict[str, list[Path]] = {}
|
|
for file_path in DATA_DIR.glob("*.md"):
|
|
try:
|
|
file_date = datetime.strptime(file_path.stem, "%Y-%m-%d")
|
|
month_key = file_date.strftime("%Y-%m")
|
|
if month_key not in monthly_files:
|
|
monthly_files[month_key] = []
|
|
monthly_files[month_key].append(file_path)
|
|
except ValueError: # Skip files that don't match YYYY-MM-DD format
|
|
print(f"Skipping non-journal file in DATA_DIR: {file_path.name}")
|
|
continue
|
|
|
|
# Ensure VAULT_DIR exists
|
|
VAULT_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
for month_key, files_in_month in monthly_files.items():
|
|
_save_month(password, month_key, files_in_month)
|
|
|
|
|
|
def save_current_month_vault(password: str):
|
|
"""
|
|
Optimized save function that only rebuilds the current month's vault.
|
|
|
|
This is used for frequent, in-session saves from the UI to provide better
|
|
performance, as it only operates on the files for the current month.
|
|
"""
|
|
print("save_current_month_vault called.")
|
|
if not password:
|
|
raise ValueError("Password cannot be empty.")
|
|
|
|
if _using_csharp_hybrid():
|
|
_ = call_sidecar_action(
|
|
"vault.save_current_month",
|
|
payload={
|
|
"password": password,
|
|
"vaultDirectory": str(VAULT_DIR),
|
|
"dataDirectory": str(DATA_DIR),
|
|
"nowUtc": datetime.utcnow().isoformat() + "Z",
|
|
},
|
|
)
|
|
return
|
|
|
|
with _vault_io_lock:
|
|
# Determine current month
|
|
now = datetime.now()
|
|
month_key = now.strftime("%Y-%m")
|
|
|
|
# Collect files for the current month
|
|
files_in_month: list[Path] = []
|
|
for file_path in DATA_DIR.glob("*.md"):
|
|
if file_path.stem.startswith(month_key):
|
|
files_in_month.append(file_path)
|
|
|
|
if not files_in_month:
|
|
print(f"No files found for the current month ({month_key}) to save.")
|
|
return
|
|
|
|
current_fingerprint = _compute_month_fingerprint(files_in_month)
|
|
cached_fingerprint = _month_fingerprint_cache.get(month_key)
|
|
if cached_fingerprint == current_fingerprint:
|
|
print(f"Skipping vault save for {month_key}; no file changes detected.")
|
|
return
|
|
|
|
_save_month(password, month_key, files_in_month)
|
|
|
|
|
|
def initialize_vault(password: str):
|
|
"""
|
|
Ensures the VAULT_DIR exists. The first save operation will create the initial vault files.
|
|
"""
|
|
if not password:
|
|
raise ValueError("Password cannot be empty.")
|
|
|
|
if _using_csharp_hybrid():
|
|
_ = call_sidecar_action(
|
|
"vault.initialize",
|
|
payload={
|
|
"password": password,
|
|
"vaultDirectory": str(VAULT_DIR),
|
|
},
|
|
)
|
|
return
|
|
|
|
VAULT_DIR.mkdir(parents=True, exist_ok=True)
|
|
print("Vault directory ensured to exist.")
|
|
|
|
|
|
def clear_data_directory():
|
|
"""
|
|
Clears the DATA_DIR. This should only be called on application shutdown.
|
|
"""
|
|
print("Clearing DATA_DIR...")
|
|
if _using_csharp_hybrid():
|
|
_ = call_sidecar_action(
|
|
"vault.clear_data_directory",
|
|
payload={"dataDirectory": str(DATA_DIR)},
|
|
)
|
|
print("DATA_DIR cleared.")
|
|
return
|
|
|
|
with _vault_io_lock:
|
|
# The encrypted database file lives in DATA_DIR, so this function
|
|
# will securely delete it along with all the decrypted .md files.
|
|
_clear_data_dir_with_retries()
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
_month_fingerprint_cache.clear()
|
|
print("DATA_DIR cleared.")
|
|
|
|
|
|
def _clear_data_dir_with_retries(retries: int = 5, delay_seconds: float = 0.2) -> None:
|
|
if not DATA_DIR.exists():
|
|
return
|
|
for attempt in range(retries):
|
|
try:
|
|
shutil.rmtree(DATA_DIR)
|
|
return
|
|
except PermissionError:
|
|
if attempt == retries - 1:
|
|
raise
|
|
time.sleep(delay_seconds)
|