diff --git a/Journal.Core/Services/Entries/HtmlSanitizer.cs b/Journal.Core/Services/Entries/HtmlSanitizer.cs
index 163c538..7b31c48 100644
--- a/Journal.Core/Services/Entries/HtmlSanitizer.cs
+++ b/Journal.Core/Services/Entries/HtmlSanitizer.cs
@@ -3,8 +3,43 @@ using System.Text.RegularExpressions;
namespace Journal.Core.Services.Entries;
-public static class HtmlSanitizer
+public static partial class HtmlSanitizer
{
+ [GeneratedRegex("<(script|style)\\b[^>]*>.*?\\1>", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
+ private static partial Regex ScriptStyleRegex();
+
+ [GeneratedRegex("
", RegexOptions.IgnoreCase)]
+ private static partial Regex BrTagRegex();
+
+ [GeneratedRegex("(p|div|h[1-6]|tr|table|ul|ol|blockquote)>", RegexOptions.IgnoreCase)]
+ private static partial Regex BlockEndTagRegex();
+
+ [GeneratedRegex("
]*>", RegexOptions.IgnoreCase)]
+ private static partial Regex LiStartTagRegex();
+
+ [GeneratedRegex("", RegexOptions.IgnoreCase)]
+ private static partial Regex LiEndTagRegex();
+
+ [GeneratedRegex("<(td|th)\\b[^>]*>", RegexOptions.IgnoreCase)]
+ private static partial Regex CellStartTagRegex();
+
+ [GeneratedRegex("(td|th)>", RegexOptions.IgnoreCase)]
+ private static partial Regex CellEndTagRegex();
+
+ [GeneratedRegex("
]*>", RegexOptions.IgnoreCase)]
+ private static partial Regex HrTagRegex();
+
+ [GeneratedRegex("<[^>]+>", RegexOptions.Singleline)]
+ private static partial Regex AllTagsRegex();
+
+ [GeneratedRegex("[ \\t]{2,}")]
+ private static partial Regex MultipleSpacesRegex();
+
+ [GeneratedRegex("\n{3,}")]
+ private static partial Regex MultipleNewlinesRegex();
+
+ [GeneratedRegex("?[a-z][^>]*>")]
+ private static partial Regex HtmlTagCountRegex();
public static string StripRichHtml(string content)
{
if (string.IsNullOrWhiteSpace(content))
@@ -13,21 +48,21 @@ public static class HtmlSanitizer
return content;
var text = content.Replace("\r\n", "\n").Replace("\r", "\n");
- text = Regex.Replace(text, "<(script|style)\\b[^>]*>.*?\\1>", "", RegexOptions.IgnoreCase | RegexOptions.Singleline);
- text = Regex.Replace(text, "
", "\n", RegexOptions.IgnoreCase);
- text = Regex.Replace(text, "(p|div|h[1-6]|tr|table|ul|ol|blockquote)>", "\n", RegexOptions.IgnoreCase);
- text = Regex.Replace(text, "]*>", "\n- ", RegexOptions.IgnoreCase);
- text = Regex.Replace(text, "", "\n", RegexOptions.IgnoreCase);
- text = Regex.Replace(text, "<(td|th)\\b[^>]*>", " | ", RegexOptions.IgnoreCase);
- text = Regex.Replace(text, "(td|th)>", " ", RegexOptions.IgnoreCase);
- text = Regex.Replace(text, "
]*>", "\n---\n", RegexOptions.IgnoreCase);
- text = Regex.Replace(text, "<[^>]+>", "", RegexOptions.Singleline);
+ text = ScriptStyleRegex().Replace(text, "");
+ text = BrTagRegex().Replace(text, "\n");
+ text = BlockEndTagRegex().Replace(text, "\n");
+ text = LiStartTagRegex().Replace(text, "\n- ");
+ text = LiEndTagRegex().Replace(text, "\n");
+ text = CellStartTagRegex().Replace(text, " | ");
+ text = CellEndTagRegex().Replace(text, " ");
+ text = HrTagRegex().Replace(text, "\n---\n");
+ text = AllTagsRegex().Replace(text, "");
text = WebUtility.HtmlDecode(text)
.Replace('\u00a0', ' ')
.Replace("\u200b", "", StringComparison.Ordinal);
text = string.Join("\n", text.Split('\n').Select(line => line.TrimEnd()));
- text = Regex.Replace(text, "[ \\t]{2,}", " ");
- text = Regex.Replace(text, "\n{3,}", "\n\n").Trim();
+ text = MultipleSpacesRegex().Replace(text, " ");
+ text = MultipleNewlinesRegex().Replace(text, "\n\n").Trim();
return string.IsNullOrEmpty(text) ? content : text;
}
@@ -41,6 +76,6 @@ public static class HtmlSanitizer
];
if (markers.Any(marker => lowered.Contains(marker, StringComparison.Ordinal)))
return true;
- return Regex.Matches(lowered, "?[a-z][^>]*>").Count >= 8;
+ return HtmlTagCountRegex().Matches(lowered).Count >= 8;
}
}
diff --git a/REFACTORING_SUMMARY.md b/REFACTORING_SUMMARY.md
index bda1ebd..a52d005 100644
--- a/REFACTORING_SUMMARY.md
+++ b/REFACTORING_SUMMARY.md
@@ -9,7 +9,7 @@
Removed all business logic, HTML processing, logging implementation, and private record types. `Entry` now only parses the incoming JSON command, routes to the correct service, and returns the `{ok, data}` / `{ok: false, error}` envelope.
### 2. Extracted `HtmlSanitizer` (new file)
-`StripRichHtml` and `LooksLikeRichHtml` moved from `Entry.cs` to `Services/HtmlSanitizer.cs` as a static utility class.
+`StripRichHtml` and `LooksLikeRichHtml` moved from `Entry.cs` to `Services/Entries/HtmlSanitizer.cs` as a static utility class. Refactored to use `[GeneratedRegex]` attributes for compile-time regex generation, improving performance by eliminating runtime regex compilation overhead.
### 3. Extracted `CommandLogger` (new file)
`LogStart`, `LogSuccess`, `LogFailure`, `EmitLog`, `ShouldLog`, and `LogLevelRank` moved from `Entry.cs` to `Services/CommandLogger.cs`. Entry now receives this as a dependency.