pyLyricFlow/src/lyricflow_core/engine/phonetics.py

import re
from functools import lru_cache
from nltk.corpus import cmudict

class PhoneticProcessor:
    def __init__(self):
        self.dict = self._load_cmudict()

    @staticmethod
    def _load_cmudict():
        try:
            data = cmudict.dict()
            return data
        except LookupError:
            return {}

    @lru_cache(maxsize=8192)
    def normalize_word(self, word: str) -> str:
        """Standardizes word for dictionary lookup."""
        # Lowercase, remove non-alphanumeric except apostrophes
        word = word.lower().strip()
        word = re.sub(r"[^a-z']", "", word)

        # Handle common rap contractions
        if word.endswith("in'"):
            word = word[:-1] + "g"

        return word

    @lru_cache(maxsize=8192)
    def get_phonemes(self, word: str):
        """Returns a list of possible phoneme lists for a word."""
        normalized = self.normalize_word(word)
        return tuple(tuple(phones) for phones in self.dict.get(normalized, []))

# Singleton instance
processor = PhoneticProcessor()