2026-02-23 20:12:10 -06:00

154 lines
5.5 KiB
Python

import speech_recognition as sr
from typing import Protocol
import queue
import sys
from .config import (
MICROPHONE_DEVICE_INDEX,
FASTER_WHISPER_DEVICE,
FASTER_WHISPER_COMPUTE_TYPE,
)
# Windows fallback: pyaudiowpatch ships wheels for newer Python versions
# where PyAudio source builds can fail due missing PortAudio headers.
try:
import pyaudio # type: ignore # noqa: F401
except Exception:
try:
import pyaudiowpatch as _pyaudio_patch # type: ignore
sys.modules.setdefault("pyaudio", _pyaudio_patch)
except Exception:
pass
class Stoppable(Protocol):
"""Protocol for a callable that can be stopped, like the background listener."""
def __call__(self, wait_for_stop: bool = True) -> None: ...
# This global variable will hold the background listening process handle
background_listener_stop: Stoppable | None = None
def _recognize_local_whisper(
recognizer: sr.Recognizer,
audio: sr.AudioData,
engine: str,
whisper_model: str,
faster_whisper_device: str | None = None,
faster_whisper_compute_type: str | None = None,
) -> str:
if engine == "faster-whisper":
if hasattr(recognizer, "recognize_faster_whisper"):
init_options: dict[str, str] = {}
active_device = (
(faster_whisper_device or "").strip().lower() or FASTER_WHISPER_DEVICE
)
active_compute = (
(faster_whisper_compute_type or "").strip().lower()
or FASTER_WHISPER_COMPUTE_TYPE
)
if active_device != "auto":
init_options["device"] = active_device
if active_compute not in {"auto", "default"}:
init_options["compute_type"] = active_compute
return recognizer.recognize_faster_whisper(
audio,
model=whisper_model,
init_options=init_options or None,
)
# Older/newer SpeechRecognition builds may not expose this wrapper.
return recognizer.recognize_whisper(audio, model=whisper_model)
return recognizer.recognize_whisper(audio, model=whisper_model)
def start_background_listening(
message_queue: queue.Queue[tuple[str, str]],
engine: str,
whisper_model: str,
faster_whisper_device: str | None = None,
faster_whisper_compute_type: str | None = None,
):
"""
Starts listening to the microphone in a background thread.
Puts status and result messages into the provided queue.
"""
global background_listener_stop
if background_listener_stop:
message_queue.put(("status", "Already listening."))
return
try:
recognizer = sr.Recognizer()
microphone = sr.Microphone(device_index=MICROPHONE_DEVICE_INDEX)
except (OSError, AttributeError) as e:
message_queue.put(("status", f"Error: No microphone found. ({e})"))
return
with microphone as source:
message_queue.put(("status", "Adjusting for ambient noise..."))
recognizer.adjust_for_ambient_noise(source)
message_queue.put(("status", "Listening..."))
def recognition_callback(recognizer: sr.Recognizer, audio: sr.AudioData) -> None:
message_queue.put(("status", "Processing..."))
try:
if engine == "google":
text = recognizer.recognize_google(audio)
elif engine in {"whisper", "faster-whisper"}:
# Use local Whisper-family inference for privacy.
# Models are downloaded automatically on first use.
if engine == "faster-whisper" and not hasattr(
recognizer, "recognize_faster_whisper"
):
message_queue.put(
(
"status",
"faster-whisper not exposed by this SpeechRecognition build; falling back to whisper.",
)
)
text = _recognize_local_whisper(
recognizer,
audio,
engine,
whisper_model,
faster_whisper_device=faster_whisper_device,
faster_whisper_compute_type=faster_whisper_compute_type,
)
else: # Default to the fast, offline, but less accurate sphinx
text = recognizer.recognize_sphinx(audio)
message_queue.put(
("result", f"{text} ")
) # Add space for continuous dictation
message_queue.put(("status", "Listening...")) # Ready for the next phrase
except sr.UnknownValueError:
message_queue.put(
("status", "Could not understand audio. Still listening...")
)
except sr.RequestError as e:
error_msg = f"API error: {e}"
if engine == "google":
error_msg += ". Check internet connection."
message_queue.put(("status", error_msg))
except Exception as e:
message_queue.put(
("status", f"An unexpected error occurred in speech recognition: {e}")
)
# `listen_in_background` returns a function to stop the background listener
stop_listening = recognizer.listen_in_background(microphone, recognition_callback)
background_listener_stop = stop_listening
def stop_background_listening():
"""Stops the background listener if it is running."""
global background_listener_stop
if background_listener_stop:
background_listener_stop(wait_for_stop=False)
background_listener_stop = None