2026-02-21 18:35:20 -06:00

311 lines
11 KiB
Python

import sys
import hashlib
import threading
import time
from cryptography.exceptions import InvalidTag
import shutil
import zipfile
from datetime import datetime
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
from .parser import parse_journal_content, parse_journal_file
from .database import get_db_connection, hydrate_database
from .encryption import encrypt_data, decrypt_data
from .config import (
DATA_DIR,
VAULT_DIR,
MONTHLY_VAULT_FORMAT,
)
_month_fingerprint_cache: dict[str, str] = {}
_vault_io_lock = threading.RLock()
# --- Monthly Vault Management ---
def _get_monthly_vault_path(date: datetime) -> Path:
"""Returns the path for the monthly vault file."""
return VAULT_DIR / date.strftime(MONTHLY_VAULT_FORMAT)
def _create_monthly_archive(month_path: Path, archive_path: Path):
"""Creates a zip archive of a temporary monthly directory."""
with zipfile.ZipFile(archive_path, "w", zipfile.ZIP_DEFLATED) as zipf:
for file_path in month_path.iterdir():
zipf.write(file_path, arcname=file_path.name) # Store only filename in zip
def _extract_monthly_archive(archive_path: Path, extract_to_path: Path):
"""Extracts a zip archive to a specified directory."""
with zipfile.ZipFile(archive_path, "r") as zipf:
zipf.extractall(extract_to_path)
# --- Public API for Journal Storage ---
def _save_month(password: str, month_key: str, files_in_month: list[Path]):
"""Helper function to save a single month's vault."""
# We need a datetime object to generate the vault path, strptime is perfect.
month_as_date = datetime.strptime(month_key, "%Y-%m")
monthly_vault_path = _get_monthly_vault_path(month_as_date)
# Create a temporary directory to stage files for zipping
temp_month_dir = VAULT_DIR / f"temp_{month_key}"
temp_month_dir.mkdir(exist_ok=True)
temp_zip_path: Path | None = None
try:
for file_path in files_in_month:
_ = shutil.copy(file_path, temp_month_dir)
# Create a temporary zip archive
temp_zip_path = VAULT_DIR / f"temp_{month_key}.zip"
_create_monthly_archive(temp_month_dir, temp_zip_path)
with open(temp_zip_path, "rb") as f_in:
zip_content = f_in.read()
encrypted_vault_content = encrypt_data(zip_content, password)
with open(monthly_vault_path, "wb") as f_out:
_ = f_out.write(encrypted_vault_content)
_month_fingerprint_cache[month_key] = _compute_month_fingerprint(files_in_month)
print(f"Successfully saved {monthly_vault_path.name}")
except Exception as e:
print(f"Error saving month {month_key}: {e}")
finally:
shutil.rmtree(temp_month_dir, ignore_errors=True)
if temp_zip_path and temp_zip_path.exists():
temp_zip_path.unlink()
def _compute_month_fingerprint(files: list[Path]) -> str:
fingerprint = hashlib.sha256()
for file_path in sorted(files, key=lambda p: p.name):
try:
stat = file_path.stat()
except OSError:
continue
fingerprint.update(file_path.name.encode("utf-8"))
fingerprint.update(str(stat.st_mtime_ns).encode("ascii"))
fingerprint.update(str(stat.st_size).encode("ascii"))
return fingerprint.hexdigest()
def get_today_filename() -> Path:
"""Returns the path for today's journal entry in the active DATA_DIR."""
return DATA_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.md"
def save_entry_content(
content: str, file_path: Path | None = None, mode: str = "Daily"
):
target_file = file_path or get_today_filename()
target_file.parent.mkdir(parents=True, exist_ok=True)
if mode == "Fragment":
print(f"Appending fragment to {target_file.name}...")
with open(target_file, "a", encoding="utf-8") as f:
# Ensure there's a newline before the new content
_ = f.write("\n\n" + content.strip())
return
# For Daily, Deep, etc., perform a merge
if target_file.exists():
print(f"Merging content into existing file: {target_file.name}")
existing_entry = parse_journal_file(str(target_file))
new_entry_data = parse_journal_content(content, target_file.stem)
existing_entry.merge_with(new_entry_data)
final_content = existing_entry.to_markdown()
else:
print(f"Creating new entry: {target_file.name}")
final_content = content
_ = target_file.write_text(final_content, encoding="utf-8")
def load_all_vaults(password: str) -> bool:
"""
Decrypts and extracts all monthly vaults into the DATA_DIR.
Cleans DATA_DIR before extraction.
Returns True on success, False if password is incorrect for existing vaults.
"""
if not password:
raise ValueError("Password cannot be empty.")
with _vault_io_lock:
_month_fingerprint_cache.clear()
# Clear DATA_DIR first
_clear_data_dir_with_retries()
DATA_DIR.mkdir(parents=True, exist_ok=True)
if not VAULT_DIR.exists() or not any(VAULT_DIR.iterdir()):
print("Vault directory is empty or does not exist. Assuming new vault.")
return True # No vaults to load, so it's a success (new vault)
decryption_successful = False
for vault_file in VAULT_DIR.glob("*.vault"):
if vault_file.name == "_init_vault.vault":
print(f"Deleting old dummy vault file: {vault_file.name}")
vault_file.unlink()
continue
try:
with open(vault_file, "rb") as f_in:
encrypted_data = f_in.read()
decrypted_zip_content = decrypt_data(encrypted_data, password)
# Write decrypted content to a temporary zip file
temp_zip_path = VAULT_DIR / f"temp_{vault_file.name}.zip"
with open(temp_zip_path, "wb") as f_out:
_ = f_out.write(decrypted_zip_content)
_extract_monthly_archive(temp_zip_path, DATA_DIR)
temp_zip_path.unlink() # Clean up temp zip
decryption_successful = True
print(f"Successfully loaded {vault_file.name}")
print(
f"Contents of DATA_DIR after loading {vault_file.name}: {list(DATA_DIR.iterdir())}"
)
except InvalidTag:
print(
f"Warning: Could not decrypt '{vault_file.name}'. Invalid password for this file."
)
# Do not set decryption_successful to True if only some files fail
except Exception as e:
print(f"Error loading vault '{vault_file.name}': {e}")
# If any other error occurs, it's not necessarily a password issue
if not decryption_successful and any(VAULT_DIR.iterdir()):
# If there are vault files, but none could be decrypted, password is wrong
print("Error: No vault files could be decrypted with the provided password.")
return False
# --- Database Hydration ---
# After successfully decrypting files, hydrate the live, encrypted database.
conn = None
try:
all_entries = [parse_journal_file(str(f)) for f in DATA_DIR.glob("*.md")]
if all_entries:
conn = get_db_connection(password)
hydrate_database(conn, all_entries)
except Exception as e:
print(f"Fatal error during database hydration: {e}")
return False # Treat DB hydration failure as a critical error
finally:
if conn is not None:
conn.close()
return True
def rebuild_all_vaults(password: str):
"""
Rebuilds all monthly vaults from the files in the DATA_DIR.
This is a comprehensive but slower operation, intended for use on shutdown
or via the CLI to ensure all changes, including to older entries, are
persisted. It iterates through all decrypted files and saves them to their
respective monthly vaults.
"""
print("rebuild_all_vaults called.")
if not password:
raise ValueError("Password cannot be empty.")
with _vault_io_lock:
# Group files by month
monthly_files: dict[str, list[Path]] = {}
for file_path in DATA_DIR.glob("*.md"):
try:
file_date = datetime.strptime(file_path.stem, "%Y-%m-%d")
month_key = file_date.strftime("%Y-%m")
if month_key not in monthly_files:
monthly_files[month_key] = []
monthly_files[month_key].append(file_path)
except ValueError: # Skip files that don't match YYYY-MM-DD format
print(f"Skipping non-journal file in DATA_DIR: {file_path.name}")
continue
# Ensure VAULT_DIR exists
VAULT_DIR.mkdir(parents=True, exist_ok=True)
for month_key, files_in_month in monthly_files.items():
_save_month(password, month_key, files_in_month)
def save_current_month_vault(password: str):
"""
Optimized save function that only rebuilds the current month's vault.
This is used for frequent, in-session saves from the UI to provide better
performance, as it only operates on the files for the current month.
"""
print("save_current_month_vault called.")
if not password:
raise ValueError("Password cannot be empty.")
with _vault_io_lock:
# Determine current month
now = datetime.now()
month_key = now.strftime("%Y-%m")
# Collect files for the current month
files_in_month: list[Path] = []
for file_path in DATA_DIR.glob("*.md"):
if file_path.stem.startswith(month_key):
files_in_month.append(file_path)
if not files_in_month:
print(f"No files found for the current month ({month_key}) to save.")
return
current_fingerprint = _compute_month_fingerprint(files_in_month)
cached_fingerprint = _month_fingerprint_cache.get(month_key)
if cached_fingerprint == current_fingerprint:
print(f"Skipping vault save for {month_key}; no file changes detected.")
return
_save_month(password, month_key, files_in_month)
def initialize_vault(password: str):
"""
Ensures the VAULT_DIR exists. The first save operation will create the initial vault files.
"""
if not password:
raise ValueError("Password cannot be empty.")
VAULT_DIR.mkdir(parents=True, exist_ok=True)
print("Vault directory ensured to exist.")
def clear_data_directory():
"""
Clears the DATA_DIR. This should only be called on application shutdown.
"""
print("Clearing DATA_DIR...")
with _vault_io_lock:
# The encrypted database file lives in DATA_DIR, so this function
# will securely delete it along with all the decrypted .md files.
_clear_data_dir_with_retries()
DATA_DIR.mkdir(parents=True, exist_ok=True)
_month_fingerprint_cache.clear()
print("DATA_DIR cleared.")
def _clear_data_dir_with_retries(retries: int = 5, delay_seconds: float = 0.2) -> None:
if not DATA_DIR.exists():
return
for attempt in range(retries):
try:
shutil.rmtree(DATA_DIR)
return
except PermissionError:
if attempt == retries - 1:
raise
time.sleep(delay_seconds)