import sys import re from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) from .models import JournalEntry, Fragment, ParsedSection, SECTION_TITLES CHECKBOX_PATTERN = re.compile(r"^\s*[-*]\s*\[([xX ])\]\s*(.*)$") def parse_journal_file(file_path: str) -> JournalEntry: content = Path(file_path).read_text(encoding="utf-8") return parse_journal_content(content, Path(file_path).stem) def parse_journal_content(content: str, file_stem: str) -> JournalEntry: """Parses the raw text content of a journal entry.""" date_match = re.search(r"(?:\*\*Date:\*\*|Date:)\s*(.+)", content) date = date_match.group(1).strip() if date_match else file_stem parsed_sections: dict[str, ParsedSection] = {} current_section_title: str | None = None current_section_content: list[str] = [] current_section_checkboxes: dict[str, bool] = {} # Iterate through blocks to find sections # We need to re-parse the content to correctly associate lines with sections lines = content.splitlines() for line in lines: section_header_match = re.match(r"^\#\#+\s*(.*)$", line.strip()) if section_header_match: # Save previous section if exists if current_section_title: parsed_sections[current_section_title] = ParsedSection( title=current_section_title, content=current_section_content, checkboxes=current_section_checkboxes, ) # Start new section header_text = section_header_match.group(1).strip() found_title = None for title_key in SECTION_TITLES: if title_key.lower() in header_text.lower(): found_title = title_key break if found_title: current_section_title = found_title current_section_content = [] current_section_checkboxes = {} else: current_section_title = None # Not a recognized section current_section_content = [] current_section_checkboxes = {} continue # Don't add the header itself to the content if current_section_title: checkbox_match = CHECKBOX_PATTERN.match(line) if checkbox_match: is_checked = checkbox_match.group(1).strip().lower() == "x" checkbox_text = checkbox_match.group(2).strip() current_section_checkboxes[checkbox_text] = is_checked current_section_content.append(line) # Save the last section if current_section_title: parsed_sections[current_section_title] = ParsedSection( title=current_section_title, content=current_section_content, checkboxes=current_section_checkboxes, ) fragments: list[Fragment] = [] # Regex for !TYPE @time #tag1 #tag2 description (can be multi-line) # This pattern is more robust for fragments that might span multiple lines fragment_pattern = re.compile( r"^(!\w+)\s*((?:@\S+\s*)?)(?:\s*((?:#\S+\s*)*))?\s*\n" # Type, optional time, optional tags, newline + r"((?:(?!^!\w+\s*).*\n)*)", # Content lines (non-fragment start) until next fragment or end re.MULTILINE, ) for match in fragment_pattern.finditer(content): frag_type = match.group(1) time_str = match.group(2).strip().lstrip("@") if match.group(2) else None tag_str = match.group(3).strip() if match.group(3) else "" description = match.group(4).strip() tags = [t.strip().lstrip("#") for t in tag_str.split()] if tag_str else [] fragments.append( Fragment(type=frag_type, description=description, time=time_str, tags=tags) ) return JournalEntry( date=date, raw_content=content, fragments=fragments, sections=parsed_sections )