97 lines
3.9 KiB
Python
97 lines
3.9 KiB
Python
import sys
|
|
import re
|
|
from pathlib import Path
|
|
|
|
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
|
|
from .models import JournalEntry, Fragment, ParsedSection, SECTION_TITLES
|
|
|
|
CHECKBOX_PATTERN = re.compile(r"^\s*[-*]\s*\[([xX ])\]\s*(.*)$")
|
|
|
|
|
|
def parse_journal_file(file_path: str) -> JournalEntry:
|
|
content = Path(file_path).read_text(encoding="utf-8")
|
|
return parse_journal_content(content, Path(file_path).stem)
|
|
|
|
|
|
def parse_journal_content(content: str, file_stem: str) -> JournalEntry:
|
|
"""Parses the raw text content of a journal entry."""
|
|
date_match = re.search(r"(?:\*\*Date:\*\*|Date:)\s*(.+)", content)
|
|
date = date_match.group(1).strip() if date_match else file_stem
|
|
|
|
parsed_sections: dict[str, ParsedSection] = {}
|
|
current_section_title: str | None = None
|
|
current_section_content: list[str] = []
|
|
current_section_checkboxes: dict[str, bool] = {}
|
|
|
|
# Iterate through blocks to find sections
|
|
# We need to re-parse the content to correctly associate lines with sections
|
|
lines = content.splitlines()
|
|
for line in lines:
|
|
section_header_match = re.match(r"^\#\#+\s*(.*)$", line.strip())
|
|
if section_header_match:
|
|
# Save previous section if exists
|
|
if current_section_title:
|
|
parsed_sections[current_section_title] = ParsedSection(
|
|
title=current_section_title,
|
|
content=current_section_content,
|
|
checkboxes=current_section_checkboxes,
|
|
)
|
|
|
|
# Start new section
|
|
header_text = section_header_match.group(1).strip()
|
|
found_title = None
|
|
for title_key in SECTION_TITLES:
|
|
if title_key.lower() in header_text.lower():
|
|
found_title = title_key
|
|
break
|
|
|
|
if found_title:
|
|
current_section_title = found_title
|
|
current_section_content = []
|
|
current_section_checkboxes = {}
|
|
else:
|
|
current_section_title = None # Not a recognized section
|
|
current_section_content = []
|
|
current_section_checkboxes = {}
|
|
continue # Don't add the header itself to the content
|
|
|
|
if current_section_title:
|
|
checkbox_match = CHECKBOX_PATTERN.match(line)
|
|
if checkbox_match:
|
|
is_checked = checkbox_match.group(1).strip().lower() == "x"
|
|
checkbox_text = checkbox_match.group(2).strip()
|
|
current_section_checkboxes[checkbox_text] = is_checked
|
|
current_section_content.append(line)
|
|
|
|
# Save the last section
|
|
if current_section_title:
|
|
parsed_sections[current_section_title] = ParsedSection(
|
|
title=current_section_title,
|
|
content=current_section_content,
|
|
checkboxes=current_section_checkboxes,
|
|
)
|
|
|
|
fragments: list[Fragment] = []
|
|
# Regex for !TYPE @time #tag1 #tag2 description (can be multi-line)
|
|
# This pattern is more robust for fragments that might span multiple lines
|
|
fragment_pattern = re.compile(
|
|
r"^(!\w+)\s*((?:@\S+\s*)?)(?:\s*((?:#\S+\s*)*))?\s*\n" # Type, optional time, optional tags, newline
|
|
+ r"((?:(?!^!\w+\s*).*\n)*)", # Content lines (non-fragment start) until next fragment or end
|
|
re.MULTILINE,
|
|
)
|
|
|
|
for match in fragment_pattern.finditer(content):
|
|
frag_type = match.group(1)
|
|
time_str = match.group(2).strip().lstrip("@") if match.group(2) else None
|
|
tag_str = match.group(3).strip() if match.group(3) else ""
|
|
description = match.group(4).strip()
|
|
|
|
tags = [t.strip().lstrip("#") for t in tag_str.split()] if tag_str else []
|
|
fragments.append(
|
|
Fragment(type=frag_type, description=description, time=time_str, tags=tags)
|
|
)
|
|
|
|
return JournalEntry(
|
|
date=date, raw_content=content, fragments=fragments, sections=parsed_sections
|
|
)
|