Project_Journal-Csharp_back.../journal/core/speech.py

import speech_recognition as sr
from typing import Protocol
import queue
import sys
from .config import (
    MICROPHONE_DEVICE_INDEX,
    FASTER_WHISPER_DEVICE,
    FASTER_WHISPER_COMPUTE_TYPE,
)

# Windows fallback: pyaudiowpatch ships wheels for newer Python versions
# where PyAudio source builds can fail due missing PortAudio headers.
try:
    import pyaudio  # type: ignore  # noqa: F401
except Exception:
    try:
        import pyaudiowpatch as _pyaudio_patch  # type: ignore

        sys.modules.setdefault("pyaudio", _pyaudio_patch)
    except Exception:
        pass


class Stoppable(Protocol):
    """Protocol for a callable that can be stopped, like the background listener."""

    def __call__(self, wait_for_stop: bool = True) -> None: ...


# This global variable will hold the background listening process handle
background_listener_stop: Stoppable | None = None


def _recognize_local_whisper(
    recognizer: sr.Recognizer,
    audio: sr.AudioData,
    engine: str,
    whisper_model: str,
    faster_whisper_device: str | None = None,
    faster_whisper_compute_type: str | None = None,
) -> str:
    if engine == "faster-whisper":
        if hasattr(recognizer, "recognize_faster_whisper"):
            init_options: dict[str, str] = {}
            active_device = (
                (faster_whisper_device or "").strip().lower() or FASTER_WHISPER_DEVICE
            )
            active_compute = (
                (faster_whisper_compute_type or "").strip().lower()
                or FASTER_WHISPER_COMPUTE_TYPE
            )
            if active_device != "auto":
                init_options["device"] = active_device
            if active_compute not in {"auto", "default"}:
                init_options["compute_type"] = active_compute

            return recognizer.recognize_faster_whisper(
                audio,
                model=whisper_model,
                init_options=init_options or None,
            )
        # Older/newer SpeechRecognition builds may not expose this wrapper.
        return recognizer.recognize_whisper(audio, model=whisper_model)

    return recognizer.recognize_whisper(audio, model=whisper_model)


def start_background_listening(
    message_queue: queue.Queue[tuple[str, str]],
    engine: str,
    whisper_model: str,
    faster_whisper_device: str | None = None,
    faster_whisper_compute_type: str | None = None,
):
    """
    Starts listening to the microphone in a background thread.

    Puts status and result messages into the provided queue.
    """
    global background_listener_stop
    if background_listener_stop:
        message_queue.put(("status", "Already listening."))
        return

    try:
        recognizer = sr.Recognizer()
        microphone = sr.Microphone(device_index=MICROPHONE_DEVICE_INDEX)
    except (OSError, AttributeError) as e:
        message_queue.put(("status", f"Error: No microphone found. ({e})"))
        return

    with microphone as source:
        message_queue.put(("status", "Adjusting for ambient noise..."))
        recognizer.adjust_for_ambient_noise(source)
        message_queue.put(("status", "Listening..."))

    def recognition_callback(recognizer: sr.Recognizer, audio: sr.AudioData) -> None:
        message_queue.put(("status", "Processing..."))
        try:
            if engine == "google":
                text = recognizer.recognize_google(audio)
            elif engine in {"whisper", "faster-whisper"}:
                # Use local Whisper-family inference for privacy.
                # Models are downloaded automatically on first use.
                if engine == "faster-whisper" and not hasattr(
                    recognizer, "recognize_faster_whisper"
                ):
                    message_queue.put(
                        (
                            "status",
                            "faster-whisper not exposed by this SpeechRecognition build; falling back to whisper.",
                        )
                    )
                text = _recognize_local_whisper(
                    recognizer,
                    audio,
                    engine,
                    whisper_model,
                    faster_whisper_device=faster_whisper_device,
                    faster_whisper_compute_type=faster_whisper_compute_type,
                )
            else:  # Default to the fast, offline, but less accurate sphinx
                text = recognizer.recognize_sphinx(audio)

            message_queue.put(
                ("result", f"{text} ")
            )  # Add space for continuous dictation
            message_queue.put(("status", "Listening..."))  # Ready for the next phrase
        except sr.UnknownValueError:
            message_queue.put(
                ("status", "Could not understand audio. Still listening...")
            )
        except sr.RequestError as e:
            error_msg = f"API error: {e}"
            if engine == "google":
                error_msg += ". Check internet connection."
            message_queue.put(("status", error_msg))
        except Exception as e:
            message_queue.put(
                ("status", f"An unexpected error occurred in speech recognition: {e}")
            )

    # `listen_in_background` returns a function to stop the background listener
    stop_listening = recognizer.listen_in_background(microphone, recognition_callback)
    background_listener_stop = stop_listening


def stop_background_listening():
    """Stops the background listener if it is running."""
    global background_listener_stop
    if background_listener_stop:
        background_listener_stop(wait_for_stop=False)
        background_listener_stop = None