stan44 e0f298ba36 Add LyricFlow .NET backend API and Python bridge integration
- introduce `LyricFlow.Core.Backend` with shared DTOs, rhyme/spellcheck engines, and REST endpoints
- wire Python GUI/core to run and call the backend via new bridge/client modules
- add backend parity/integration tests and update packaging/ignore settings
2026-03-15 01:44:56 -05:00

456 lines
13 KiB
C#

using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using LyricFlow.Core.Dtos;
using LyricFlow.Core.Services;
namespace LyricFlow.Core.Engine;
public class SpellcheckEngine
{
private readonly PhoneticProcessor _processor;
private readonly WordNetLexicon _wordNet;
private Dictionary<char, List<string>>? _cmuByInitial;
// MARK: - Lifecycle
#region Lifecycle
public SpellcheckEngine(PhoneticProcessor processor, WordNetLexicon wordNet)
{
_processor = processor;
_wordNet = wordNet;
}
#endregion
// MARK: - Dictionary Index
#region Dictionary Index
private Dictionary<char, List<string>> CmuWordsByInitial
{
get
{
if (_cmuByInitial == null)
{
_cmuByInitial = new Dictionary<char, List<string>>();
foreach (var word in _processor.Dictionary.Keys)
{
if (string.IsNullOrEmpty(word)) continue;
char initial = word[0];
if (!_cmuByInitial.ContainsKey(initial)) _cmuByInitial[initial] = new List<string>();
_cmuByInitial[initial].Add(word);
}
}
return _cmuByInitial;
}
}
#endregion
// MARK: - Suggestion Queries
#region Suggestion Queries
public bool IsKnownWord(string word)
{
var normalized = _processor.NormalizeWord(word);
if (string.IsNullOrEmpty(normalized)) return true;
return _processor.Dictionary.ContainsKey(normalized) || _wordNet.ContainsWord(normalized);
}
public List<string> GetSpellingSuggestions(string word, int limit = 6)
{
var normalized = _processor.NormalizeWord(word);
if (string.IsNullOrEmpty(normalized) || IsKnownWord(normalized)) return new List<string>();
char initial = normalized[0];
if (!CmuWordsByInitial.TryGetValue(initial, out var candidates))
{
candidates = _processor.Dictionary.Keys.ToList();
}
var lengthFiltered = candidates.Where(w => Math.Abs(w.Length - normalized.Length) <= 3).ToList();
if (lengthFiltered.Count == 0) lengthFiltered = candidates;
return GetCloseMatches(normalized, lengthFiltered, limit, 0.75);
}
private List<string> GetCloseMatches(string word, List<string> possibilities, int n, double cutoff)
{
var scored = new List<(int HeuristicRank, int Distance, double Similarity, int LengthDelta, int SharedPrefix, int SharedSuffix, string Match)>();
foreach (var p in possibilities)
{
double ratio = CalculateSimilarityRatio(word, p);
if (ratio >= cutoff)
{
scored.Add((
HeuristicRank(word, p),
DamerauLevenshteinDistance(word, p),
SequenceSimilarity(word, p),
Math.Abs(word.Length - p.Length),
SharedPrefixLength(word, p),
SharedSuffixLength(word, p),
p
));
}
}
return scored
.OrderBy(item => item.HeuristicRank)
.ThenBy(item => item.Distance)
.ThenByDescending(item => item.Similarity)
.ThenBy(item => item.LengthDelta)
.ThenByDescending(item => item.SharedPrefix)
.ThenByDescending(item => item.SharedSuffix)
.ThenBy(item => item.Match, StringComparer.Ordinal)
.Take(n)
.Select(item => item.Match)
.ToList();
}
private double CalculateSimilarityRatio(string a, string b)
{
int distance = DamerauLevenshteinDistance(a, b);
int totalLen = a.Length + b.Length;
if (totalLen == 0) return 1.0;
return (double)(totalLen - distance) / totalLen;
}
#endregion
// MARK: - Similarity Helpers
#region Similarity Helpers
public static int DamerauLevenshteinDistance(string a, string b)
{
if (a == b) return 0;
if (string.IsNullOrEmpty(a)) return b.Length;
if (string.IsNullOrEmpty(b)) return a.Length;
var da = new Dictionary<char, int>();
foreach (var ch in a.Concat(b))
{
if (!da.ContainsKey(ch))
{
da[ch] = 0;
}
}
int maxDistance = a.Length + b.Length;
int[,] d = new int[a.Length + 2, b.Length + 2];
d[0, 0] = maxDistance;
for (int i = 0; i <= a.Length; i++)
{
d[i + 1, 0] = maxDistance;
d[i + 1, 1] = i;
}
for (int j = 0; j <= b.Length; j++)
{
d[0, j + 1] = maxDistance;
d[1, j + 1] = j;
}
for (int i = 1; i <= a.Length; i++)
{
int db = 0;
for (int j = 1; j <= b.Length; j++)
{
int i1 = da[b[j - 1]];
int j1 = db;
int cost = 1;
if (a[i - 1] == b[j - 1])
{
cost = 0;
db = j;
}
d[i + 1, j + 1] = Math.Min(
Math.Min(
d[i, j] + cost,
d[i + 1, j] + 1
),
Math.Min(
d[i, j + 1] + 1,
d[i1, j1] + (i - i1 - 1) + 1 + (j - j1 - 1)
)
);
}
da[a[i - 1]] = i;
}
return d[a.Length + 1, b.Length + 1];
}
public static int LevenshteinDistance(string a, string b)
{
if (a == b) return 0;
if (string.IsNullOrEmpty(a)) return b.Length;
if (string.IsNullOrEmpty(b)) return a.Length;
int[] prevRow = new int[b.Length + 1];
for (int i = 0; i <= b.Length; i++) prevRow[i] = i;
for (int i = 1; i <= a.Length; i++)
{
int[] row = new int[b.Length + 1];
row[0] = i;
for (int j = 1; j <= b.Length; j++)
{
int insertCost = row[j - 1] + 1;
int deleteCost = prevRow[j] + 1;
int replaceCost = prevRow[j - 1] + (a[i - 1] == b[j - 1] ? 0 : 1);
row[j] = Math.Min(Math.Min(insertCost, deleteCost), replaceCost);
}
prevRow = row;
}
return prevRow[b.Length];
}
private static int SharedPrefixLength(string a, string b)
{
int limit = Math.Min(a.Length, b.Length);
int count = 0;
while (count < limit && a[count] == b[count])
{
count++;
}
return count;
}
private static int SharedSuffixLength(string a, string b)
{
int count = 0;
while (
count < a.Length &&
count < b.Length &&
a[a.Length - 1 - count] == b[b.Length - 1 - count]
)
{
count++;
}
return count;
}
private static double SequenceSimilarity(string a, string b)
{
int lcs = LongestCommonSubsequenceLength(a, b);
int total = a.Length + b.Length;
if (total == 0)
{
return 1.0;
}
return (2.0 * lcs) / total;
}
private static int LongestCommonSubsequenceLength(string a, string b)
{
int[,] dp = new int[a.Length + 1, b.Length + 1];
for (int i = 1; i <= a.Length; i++)
{
for (int j = 1; j <= b.Length; j++)
{
if (a[i - 1] == b[j - 1])
{
dp[i, j] = dp[i - 1, j - 1] + 1;
}
else
{
dp[i, j] = Math.Max(dp[i - 1, j], dp[i, j - 1]);
}
}
}
return dp[a.Length, b.Length];
}
private static int HeuristicRank(string source, string candidate)
{
if (IsAdjacentTransposition(source, candidate))
{
return 0;
}
if (IsRepeatedLetterExpansion(source, candidate))
{
return 1;
}
return 2;
}
private static bool IsAdjacentTransposition(string source, string candidate)
{
if (source.Length != candidate.Length)
{
return false;
}
for (int i = 0; i < source.Length - 1; i++)
{
if (source[i] == candidate[i])
{
continue;
}
return source[i] == candidate[i + 1]
&& source[i + 1] == candidate[i]
&& source[(i + 2)..] == candidate[(i + 2)..]
&& source[..i] == candidate[..i];
}
return false;
}
private static bool IsRepeatedLetterExpansion(string source, string candidate)
{
if (candidate.Length != source.Length + 1)
{
return false;
}
for (int i = 0; i < candidate.Length - 1; i++)
{
if (candidate[i] != candidate[i + 1])
{
continue;
}
var collapsed = candidate.Remove(i, 1);
if (collapsed == source)
{
return true;
}
}
return false;
}
#endregion
// MARK: - Autocorrect
#region Autocorrect
public string? GetAutocorrectCandidate(string word, double minRatio = 0.75, int maxEditDistance = 2)
{
var normalized = _processor.NormalizeWord(word);
if (string.IsNullOrEmpty(normalized) || normalized.Length < 3 || IsKnownWord(normalized))
{
return null;
}
var suggestions = GetSpellingSuggestions(normalized, 3);
if (suggestions.Count == 0)
{
return null;
}
var scored = new List<(double Ratio, int LexicalRank, int ApostrophePenalty, int LengthDelta, int Distance, string Word)>();
foreach (var candidate in suggestions)
{
var ratio = CalculateSimilarityRatio(normalized, candidate);
var distance = DamerauLevenshteinDistance(normalized, candidate);
if (ratio < minRatio || distance > maxEditDistance)
{
continue;
}
scored.Add((
ratio,
_wordNet.ContainsWord(candidate) ? 1 : 0,
candidate.Contains('\'') ? 1 : 0,
Math.Abs(candidate.Length - normalized.Length),
distance,
candidate
));
}
var ranked = scored
.OrderByDescending(item => item.LexicalRank)
.ThenBy(item => item.ApostrophePenalty)
.ThenBy(item => item.LengthDelta)
.ThenBy(item => item.Distance)
.ThenByDescending(item => item.Ratio)
.ToList();
if (ranked.Count == 0)
{
return null;
}
var best = ranked[0];
if (normalized.EndsWith("ign", StringComparison.Ordinal))
{
var ingCandidate = ranked
.Where(item => item.Word.EndsWith("ing", StringComparison.Ordinal))
.OrderByDescending(item => item.LexicalRank)
.ThenBy(item => item.ApostrophePenalty)
.ThenBy(item => item.LengthDelta)
.ThenBy(item => item.Distance)
.ThenByDescending(item => item.Ratio)
.FirstOrDefault();
if (!string.IsNullOrWhiteSpace(ingCandidate.Word))
{
return ingCandidate.Word;
}
}
var exactLength = ranked
.Where(item => item.LengthDelta == 0)
.OrderByDescending(item => item.LexicalRank)
.ThenBy(item => item.ApostrophePenalty)
.ThenBy(item => item.Distance)
.ThenByDescending(item => item.Ratio)
.FirstOrDefault();
if (!string.IsNullOrWhiteSpace(exactLength.Word))
{
return exactLength.Word;
}
return best.Word;
}
#endregion
// MARK: - Text Analysis
#region Text Analysis
public List<SpellingIssueDto> GetTextSpellingIssues(string text, int suggestionLimit = 6)
{
var issues = new List<SpellingIssueDto>();
var lines = text.Split('\n');
for (int i = 0; i < lines.Length; i++)
{
var line = lines[i].Trim();
if (line.StartsWith("#") || line.StartsWith("@") || line.StartsWith(">")) continue;
// Remove tags [tag]
var analysisText = Regex.Replace(line, @"\[.*?\]", "");
var words = Regex.Matches(analysisText, @"\b\w+\b");
foreach (Match match in words)
{
var rawWord = match.Value;
var normalized = _processor.NormalizeWord(rawWord);
if (string.IsNullOrEmpty(normalized)) continue;
if (IsKnownWord(normalized)) continue;
issues.Add(new SpellingIssueDto(
rawWord,
normalized,
i,
GetSpellingSuggestions(normalized, suggestionLimit)
));
}
}
return issues;
}
#endregion
}