83 lines
3.1 KiB
Python
83 lines
3.1 KiB
Python
import speech_recognition as sr
|
|
from typing import Protocol
|
|
import queue
|
|
from .config import MICROPHONE_DEVICE_INDEX
|
|
|
|
|
|
class Stoppable(Protocol):
|
|
"""Protocol for a callable that can be stopped, like the background listener."""
|
|
|
|
def __call__(self, wait_for_stop: bool = True) -> None: ...
|
|
|
|
|
|
# This global variable will hold the background listening process handle
|
|
background_listener_stop: Stoppable | None = None
|
|
|
|
|
|
def start_background_listening(
|
|
message_queue: queue.Queue[tuple[str, str]], engine: str, whisper_model: str
|
|
):
|
|
"""
|
|
Starts listening to the microphone in a background thread.
|
|
|
|
Puts status and result messages into the provided queue.
|
|
"""
|
|
global background_listener_stop
|
|
if background_listener_stop:
|
|
message_queue.put(("status", "Already listening."))
|
|
return
|
|
|
|
try:
|
|
recognizer = sr.Recognizer()
|
|
microphone = sr.Microphone(device_index=MICROPHONE_DEVICE_INDEX)
|
|
except (OSError, AttributeError) as e:
|
|
message_queue.put(("status", f"Error: No microphone found. ({e})"))
|
|
return
|
|
|
|
with microphone as source:
|
|
message_queue.put(("status", "Adjusting for ambient noise..."))
|
|
recognizer.adjust_for_ambient_noise(source)
|
|
message_queue.put(("status", "Listening..."))
|
|
|
|
def recognition_callback(recognizer: sr.Recognizer, audio: sr.AudioData) -> None:
|
|
message_queue.put(("status", "Processing..."))
|
|
try:
|
|
if engine == "google":
|
|
text = recognizer.recognize_google(audio)
|
|
elif engine == "whisper":
|
|
# Use local Whisper for high accuracy and privacy.
|
|
# The model will be downloaded automatically on first use.
|
|
text = recognizer.recognize_whisper(audio, model=whisper_model)
|
|
else: # Default to the fast, offline, but less accurate sphinx
|
|
text = recognizer.recognize_sphinx(audio)
|
|
|
|
message_queue.put(
|
|
("result", f"{text} ")
|
|
) # Add space for continuous dictation
|
|
message_queue.put(("status", "Listening...")) # Ready for the next phrase
|
|
except sr.UnknownValueError:
|
|
message_queue.put(
|
|
("status", "Could not understand audio. Still listening...")
|
|
)
|
|
except sr.RequestError as e:
|
|
error_msg = f"API error: {e}"
|
|
if engine == "google":
|
|
error_msg += ". Check internet connection."
|
|
message_queue.put(("status", error_msg))
|
|
except Exception as e:
|
|
message_queue.put(
|
|
("status", f"An unexpected error occurred in speech recognition: {e}")
|
|
)
|
|
|
|
# `listen_in_background` returns a function to stop the background listener
|
|
stop_listening = recognizer.listen_in_background(microphone, recognition_callback)
|
|
background_listener_stop = stop_listening
|
|
|
|
|
|
def stop_background_listening():
|
|
"""Stops the background listener if it is running."""
|
|
global background_listener_stop
|
|
if background_listener_stop:
|
|
background_listener_stop(wait_for_stop=False)
|
|
background_listener_stop = None
|