Project_Journal-Csharp_back.../journal/core/speech.py

import speech_recognition as sr
from typing import Protocol
import queue
from .config import MICROPHONE_DEVICE_INDEX


class Stoppable(Protocol):
    """Protocol for a callable that can be stopped, like the background listener."""

    def __call__(self, wait_for_stop: bool = True) -> None: ...


# This global variable will hold the background listening process handle
background_listener_stop: Stoppable | None = None


def start_background_listening(
    message_queue: queue.Queue[tuple[str, str]], engine: str, whisper_model: str
):
    """
    Starts listening to the microphone in a background thread.

    Puts status and result messages into the provided queue.
    """
    global background_listener_stop
    if background_listener_stop:
        message_queue.put(("status", "Already listening."))
        return

    try:
        recognizer = sr.Recognizer()
        microphone = sr.Microphone(device_index=MICROPHONE_DEVICE_INDEX)
    except (OSError, AttributeError) as e:
        message_queue.put(("status", f"Error: No microphone found. ({e})"))
        return

    with microphone as source:
        message_queue.put(("status", "Adjusting for ambient noise..."))
        recognizer.adjust_for_ambient_noise(source)
        message_queue.put(("status", "Listening..."))

    def recognition_callback(recognizer: sr.Recognizer, audio: sr.AudioData) -> None:
        message_queue.put(("status", "Processing..."))
        try:
            if engine == "google":
                text = recognizer.recognize_google(audio)
            elif engine == "whisper":
                # Use local Whisper for high accuracy and privacy.
                # The model will be downloaded automatically on first use.
                text = recognizer.recognize_whisper(audio, model=whisper_model)
            else:  # Default to the fast, offline, but less accurate sphinx
                text = recognizer.recognize_sphinx(audio)

            message_queue.put(
                ("result", f"{text} ")
            )  # Add space for continuous dictation
            message_queue.put(("status", "Listening..."))  # Ready for the next phrase
        except sr.UnknownValueError:
            message_queue.put(
                ("status", "Could not understand audio. Still listening...")
            )
        except sr.RequestError as e:
            error_msg = f"API error: {e}"
            if engine == "google":
                error_msg += ". Check internet connection."
            message_queue.put(("status", error_msg))
        except Exception as e:
            message_queue.put(
                ("status", f"An unexpected error occurred in speech recognition: {e}")
            )

    # `listen_in_background` returns a function to stop the background listener
    stop_listening = recognizer.listen_in_background(microphone, recognition_callback)
    background_listener_stop = stop_listening


def stop_background_listening():
    """Stops the background listener if it is running."""
    global background_listener_stop
    if background_listener_stop:
        background_listener_stop(wait_for_stop=False)
        background_listener_stop = None