stan44 e0f298ba36 Add LyricFlow .NET backend API and Python bridge integration
- introduce `LyricFlow.Core.Backend` with shared DTOs, rhyme/spellcheck engines, and REST endpoints
- wire Python GUI/core to run and call the backend via new bridge/client modules
- add backend parity/integration tests and update packaging/ignore settings
2026-03-15 01:44:56 -05:00

70 lines
1.9 KiB
C#

using System.Text.RegularExpressions;
using System.IO;
using System.Collections.Generic;
using System.Linq;
namespace LyricFlow.Core.Engine;
public class PhoneticProcessor
{
private readonly Dictionary<string, List<List<string>>> _dictionary = new();
private static readonly Regex WordCleanupRegex = new(@"[^a-z']", RegexOptions.Compiled | RegexOptions.IgnoreCase);
public PhoneticProcessor(string? cmudictPath)
{
if (!string.IsNullOrWhiteSpace(cmudictPath))
{
LoadCmuDict(cmudictPath);
}
}
private void LoadCmuDict(string path)
{
if (!File.Exists(path)) return;
foreach (var line in File.ReadLines(path))
{
if (string.IsNullOrWhiteSpace(line) || line.StartsWith(";;;")) continue;
var parts = line.Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (parts.Length < 3) continue;
var word = parts[0].ToLower();
// The format from NLTK has [WORD] [VARIATION_ID] [PH1] [PH2]...
var phonemes = parts.Skip(2).ToList();
if (!_dictionary.TryGetValue(word, out var variations))
{
variations = new List<List<string>>();
_dictionary[word] = variations;
}
variations.Add(phonemes);
}
}
public string NormalizeWord(string word)
{
word = word.ToLower().Trim();
word = WordCleanupRegex.Replace(word, "");
if (word.EndsWith("in'"))
{
word = word[..^1] + "g";
}
return word;
}
public List<List<string>> GetPhonemes(string word)
{
var normalized = NormalizeWord(word);
if (_dictionary.TryGetValue(normalized, out var phones))
{
return phones;
}
return new List<List<string>>();
}
public Dictionary<string, List<List<string>>> Dictionary => _dictionary;
}