2026-02-21 18:35:20 -06:00

97 lines
3.9 KiB
Python

import sys
import re
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
from .models import JournalEntry, Fragment, ParsedSection, SECTION_TITLES
CHECKBOX_PATTERN = re.compile(r"^\s*[-*]\s*\[([xX ])\]\s*(.*)$")
def parse_journal_file(file_path: str) -> JournalEntry:
content = Path(file_path).read_text(encoding="utf-8")
return parse_journal_content(content, Path(file_path).stem)
def parse_journal_content(content: str, file_stem: str) -> JournalEntry:
"""Parses the raw text content of a journal entry."""
date_match = re.search(r"(?:\*\*Date:|Date:)\s*(.+)", content)
date = date_match.group(1).strip() if date_match else file_stem
parsed_sections: dict[str, ParsedSection] = {}
current_section_title: str | None = None
current_section_content: list[str] = []
current_section_checkboxes: dict[str, bool] = {}
# Iterate through blocks to find sections
# We need to re-parse the content to correctly associate lines with sections
lines = content.splitlines()
for line in lines:
section_header_match = re.match(r"^\#\#+\s*(.*)$", line.strip())
if section_header_match:
# Save previous section if exists
if current_section_title:
parsed_sections[current_section_title] = ParsedSection(
title=current_section_title,
content=current_section_content,
checkboxes=current_section_checkboxes,
)
# Start new section
header_text = section_header_match.group(1).strip()
found_title = None
for title_key in SECTION_TITLES:
if title_key.lower() in header_text.lower():
found_title = title_key
break
if found_title:
current_section_title = found_title
current_section_content = []
current_section_checkboxes = {}
else:
current_section_title = None # Not a recognized section
current_section_content = []
current_section_checkboxes = {}
continue # Don't add the header itself to the content
if current_section_title:
checkbox_match = CHECKBOX_PATTERN.match(line)
if checkbox_match:
is_checked = checkbox_match.group(1).strip().lower() == "x"
checkbox_text = checkbox_match.group(2).strip()
current_section_checkboxes[checkbox_text] = is_checked
current_section_content.append(line)
# Save the last section
if current_section_title:
parsed_sections[current_section_title] = ParsedSection(
title=current_section_title,
content=current_section_content,
checkboxes=current_section_checkboxes,
)
fragments: list[Fragment] = []
# Regex for !TYPE @time #tag1 #tag2 description (can be multi-line)
# This pattern is more robust for fragments that might span multiple lines
fragment_pattern = re.compile(
r"^(!\w+)\s*((?:@\S+\s*)?)(?:\s*((?:#\S+\s*)*))?\s*\n" # Type, optional time, optional tags, newline
+ r"((?:(?!^!\w+\s*).*\n)*)", # Content lines (non-fragment start) until next fragment or end
re.MULTILINE,
)
for match in fragment_pattern.finditer(content):
frag_type = match.group(1)
time_str = match.group(2).strip().lstrip("@") if match.group(2) else None
tag_str = match.group(3).strip() if match.group(3) else ""
description = match.group(4).strip()
tags = [t.strip().lstrip("#") for t in tag_str.split()] if tag_str else []
fragments.append(
Fragment(type=frag_type, description=description, time=time_str, tags=tags)
)
return JournalEntry(
date=date, raw_content=content, fragments=fragments, sections=parsed_sections
)