import speech_recognition as sr from typing import Protocol import queue import sys from .config import ( MICROPHONE_DEVICE_INDEX, FASTER_WHISPER_DEVICE, FASTER_WHISPER_COMPUTE_TYPE, ) # Windows fallback: pyaudiowpatch ships wheels for newer Python versions # where PyAudio source builds can fail due missing PortAudio headers. try: import pyaudio # type: ignore # noqa: F401 except Exception: try: import pyaudiowpatch as _pyaudio_patch # type: ignore sys.modules.setdefault("pyaudio", _pyaudio_patch) except Exception: pass class Stoppable(Protocol): """Protocol for a callable that can be stopped, like the background listener.""" def __call__(self, wait_for_stop: bool = True) -> None: ... # This global variable will hold the background listening process handle background_listener_stop: Stoppable | None = None def _recognize_local_whisper( recognizer: sr.Recognizer, audio: sr.AudioData, engine: str, whisper_model: str, faster_whisper_device: str | None = None, faster_whisper_compute_type: str | None = None, ) -> str: if engine == "faster-whisper": if hasattr(recognizer, "recognize_faster_whisper"): init_options: dict[str, str] = {} active_device = ( (faster_whisper_device or "").strip().lower() or FASTER_WHISPER_DEVICE ) active_compute = ( (faster_whisper_compute_type or "").strip().lower() or FASTER_WHISPER_COMPUTE_TYPE ) if active_device != "auto": init_options["device"] = active_device if active_compute not in {"auto", "default"}: init_options["compute_type"] = active_compute return recognizer.recognize_faster_whisper( audio, model=whisper_model, init_options=init_options or None, ) # Older/newer SpeechRecognition builds may not expose this wrapper. return recognizer.recognize_whisper(audio, model=whisper_model) return recognizer.recognize_whisper(audio, model=whisper_model) def start_background_listening( message_queue: queue.Queue[tuple[str, str]], engine: str, whisper_model: str, faster_whisper_device: str | None = None, faster_whisper_compute_type: str | None = None, ): """ Starts listening to the microphone in a background thread. Puts status and result messages into the provided queue. """ global background_listener_stop if background_listener_stop: message_queue.put(("status", "Already listening.")) return try: recognizer = sr.Recognizer() microphone = sr.Microphone(device_index=MICROPHONE_DEVICE_INDEX) except (OSError, AttributeError) as e: message_queue.put(("status", f"Error: No microphone found. ({e})")) return with microphone as source: message_queue.put(("status", "Adjusting for ambient noise...")) recognizer.adjust_for_ambient_noise(source) message_queue.put(("status", "Listening...")) def recognition_callback(recognizer: sr.Recognizer, audio: sr.AudioData) -> None: message_queue.put(("status", "Processing...")) try: if engine == "google": text = recognizer.recognize_google(audio) elif engine in {"whisper", "faster-whisper"}: # Use local Whisper-family inference for privacy. # Models are downloaded automatically on first use. if engine == "faster-whisper" and not hasattr( recognizer, "recognize_faster_whisper" ): message_queue.put( ( "status", "faster-whisper not exposed by this SpeechRecognition build; falling back to whisper.", ) ) text = _recognize_local_whisper( recognizer, audio, engine, whisper_model, faster_whisper_device=faster_whisper_device, faster_whisper_compute_type=faster_whisper_compute_type, ) else: # Default to the fast, offline, but less accurate sphinx text = recognizer.recognize_sphinx(audio) message_queue.put( ("result", f"{text} ") ) # Add space for continuous dictation message_queue.put(("status", "Listening...")) # Ready for the next phrase except sr.UnknownValueError: message_queue.put( ("status", "Could not understand audio. Still listening...") ) except sr.RequestError as e: error_msg = f"API error: {e}" if engine == "google": error_msg += ". Check internet connection." message_queue.put(("status", error_msg)) except Exception as e: message_queue.put( ("status", f"An unexpected error occurred in speech recognition: {e}") ) # `listen_in_background` returns a function to stop the background listener stop_listening = recognizer.listen_in_background(microphone, recognition_callback) background_listener_stop = stop_listening def stop_background_listening(): """Stops the background listener if it is running.""" global background_listener_stop if background_listener_stop: background_listener_stop(wait_for_stop=False) background_listener_stop = None