journal/Journal.Core/Services/JournalParser.cs

176 lines
6.0 KiB
C#

using System.Text.RegularExpressions;
using Journal.Core.Models;
namespace Journal.Core.Services;
public static partial class JournalParser
{
[GeneratedRegex(@"(?:\*\*Date:\*\*|\*\*Date:|Date:)\s*(.+)")]
private static partial Regex DatePattern();
[GeneratedRegex(@"^\#\#+\s*(.*)$")]
private static partial Regex SectionHeaderPattern();
[GeneratedRegex(@"^\s*[-*]\s*\[([xX ])\]\s*(.*)$")]
private static partial Regex CheckboxPattern();
[GeneratedRegex(@"^(!\w+)\s*((?:@\S+\s*)?)(?:\s*((?:#\S+\s*)*))?\s*$")]
private static partial Regex FragmentHeaderPattern();
[GeneratedRegex(@"^!\w+\s*")]
private static partial Regex FragmentBoundaryPattern();
public static JournalEntry ParseJournalContent(string content, string fileStem)
{
ArgumentNullException.ThrowIfNull(content);
return new JournalEntry(
date: ExtractDate(content, fileStem),
rawContent: content,
sections: ParseSections(content),
fragments: ParseFragments(content));
}
public static string ExtractDate(string content, string fileStem)
{
ArgumentNullException.ThrowIfNull(content);
if (string.IsNullOrWhiteSpace(fileStem))
throw new ArgumentException("File stem is required", nameof(fileStem));
var match = DatePattern().Match(content);
if (match.Success)
{
var parsed = match.Groups[1].Value.Trim();
if (!string.IsNullOrWhiteSpace(parsed))
return parsed;
}
return fileStem.Trim();
}
public static Dictionary<string, ParsedSection> ParseSections(string content)
{
ArgumentNullException.ThrowIfNull(content);
var parsedSections = new Dictionary<string, ParsedSection>();
string? currentSectionTitle = null;
var currentSectionContent = new List<string>();
var currentSectionCheckboxes = new Dictionary<string, bool>();
var lines = content.Split(["\r\n", "\n", "\r"], StringSplitOptions.None);
foreach (var line in lines)
{
var sectionHeaderMatch = SectionHeaderPattern().Match(line.Trim());
if (sectionHeaderMatch.Success)
{
if (currentSectionTitle is not null)
{
parsedSections[currentSectionTitle] = new ParsedSection(
currentSectionTitle,
currentSectionContent,
currentSectionCheckboxes);
}
var headerText = sectionHeaderMatch.Groups[1].Value.Trim();
var foundTitle = FindCanonicalSectionTitle(headerText);
if (foundTitle is not null)
{
currentSectionTitle = foundTitle;
currentSectionContent = [];
currentSectionCheckboxes = [];
}
else
{
currentSectionTitle = null;
currentSectionContent = [];
currentSectionCheckboxes = [];
}
continue;
}
if (currentSectionTitle is not null)
{
var checkboxMatch = CheckboxPattern().Match(line);
if (checkboxMatch.Success)
{
var isChecked = checkboxMatch.Groups[1].Value.Trim().Equals("x", StringComparison.OrdinalIgnoreCase);
var checkboxText = checkboxMatch.Groups[2].Value.Trim();
currentSectionCheckboxes[checkboxText] = isChecked;
}
currentSectionContent.Add(line);
}
}
if (currentSectionTitle is not null)
{
parsedSections[currentSectionTitle] = new ParsedSection(
currentSectionTitle,
currentSectionContent,
currentSectionCheckboxes);
}
return parsedSections;
}
public static List<Fragment> ParseFragments(string content)
{
ArgumentNullException.ThrowIfNull(content);
var fragments = new List<Fragment>();
var lines = content.Split(["\r\n", "\n", "\r"], StringSplitOptions.None);
for (var i = 0; i < lines.Length; i++)
{
var headerMatch = FragmentHeaderPattern().Match(lines[i]);
if (!headerMatch.Success)
continue;
var type = headerMatch.Groups[1].Value.Trim();
var timeToken = headerMatch.Groups[2].Value.Trim().TrimStart('@');
var tagsToken = headerMatch.Groups[3].Value.Trim();
var descriptionLines = new List<string>();
var j = i + 1;
while (j < lines.Length && !FragmentBoundaryPattern().IsMatch(lines[j]))
{
descriptionLines.Add(lines[j]);
j++;
}
var description = string.Join("\n", descriptionLines).Trim();
if (!string.IsNullOrWhiteSpace(description))
{
var fragment = new Fragment(type, description);
if (!string.IsNullOrWhiteSpace(timeToken) && DateTimeOffset.TryParse(timeToken, out var parsedTime))
fragment.Time = parsedTime;
if (!string.IsNullOrWhiteSpace(tagsToken))
{
fragment.Tags =
[
.. tagsToken.Split(' ', StringSplitOptions.RemoveEmptyEntries)
.Where(t => t.StartsWith('#'))
.Select(t => t.Trim().TrimStart('#'))
.Where(t => !string.IsNullOrWhiteSpace(t))
];
}
fragments.Add(fragment);
}
i = j - 1;
}
return fragments;
}
private static string? FindCanonicalSectionTitle(string headerText)
{
foreach (var title in SectionTitles.Canonical)
{
if (headerText.Contains(title, StringComparison.OrdinalIgnoreCase))
return title;
}
return null;
}
}