2026-02-23 20:12:10 -06:00

105 lines
3.9 KiB
Python

from nicegui import ui, run, app
from typing import Callable, cast
import sys
from pathlib import Path
import queue
# Add project root to sys.path to allow for absolute imports
sys.path.append(str(Path(__file__).resolve().parent.parent.parent))
from journal.core.config import (
SPEECH_RECOGNITION_ENGINE,
WHISPER_MODEL_SIZE,
FASTER_WHISPER_DEVICE,
FASTER_WHISPER_COMPUTE_TYPE,
)
from journal.core.speech import start_background_listening, stop_background_listening
def speech_to_text(on_result: Callable[[str], None]) -> None:
"""
A speech-to-text component that uses the server's microphone for transcription.
This approach is more reliable for desktop webview apps than browser-based APIs.
"""
# This label provides feedback to the user about the state of the listener.
status_label = ui.label("Ready").classes("text-sm text-gray-500 my-auto")
# Create a queue for thread-safe communication from the background thread
message_queue: queue.Queue[tuple[str, str]] = queue.Queue()
queue_timer = None
def process_queue():
"""Process messages from the background thread to update the UI."""
try:
message_type, data = message_queue.get_nowait()
if message_type == "status":
status_label.set_text(data)
ui.notify(data)
elif message_type == "result":
on_result(data)
except queue.Empty:
pass # No messages to process
except RuntimeError:
# Parent UI container may be gone (page switch/refresh); stop polling.
if queue_timer is not None:
queue_timer.active = False
# A timer polls the queue every 100ms to update the UI from the main thread
queue_timer = ui.timer(0.1, process_queue, active=False)
async def start_listening_task():
"""Runs the blocking speech recognition function in an executor thread."""
# Using an if/else is more explicit for the type checker than .get().
if "speech_engine" in app.storage.user:
engine = cast(str, app.storage.user["speech_engine"])
else:
engine = SPEECH_RECOGNITION_ENGINE
if "whisper_model" in app.storage.user:
whisper_model = cast(str, app.storage.user["whisper_model"])
else:
whisper_model = WHISPER_MODEL_SIZE
if "faster_whisper_device" in app.storage.user:
faster_whisper_device = cast(str, app.storage.user["faster_whisper_device"])
else:
faster_whisper_device = FASTER_WHISPER_DEVICE
if "faster_whisper_compute_type" in app.storage.user:
faster_whisper_compute_type = cast(
str, app.storage.user["faster_whisper_compute_type"]
)
else:
faster_whisper_compute_type = FASTER_WHISPER_COMPUTE_TYPE
if queue_timer is not None:
queue_timer.active = True
await run.io_bound(
start_background_listening,
message_queue,
engine,
whisper_model,
faster_whisper_device,
faster_whisper_compute_type,
)
async def stop_listening_task():
"""Runs the blocking stop function in an executor thread."""
await run.io_bound(stop_background_listening)
if queue_timer is not None:
queue_timer.active = False
status_label.set_text("Stopped listening.")
ui.notify("Stopped listening.")
with ui.card().tight():
with ui.row().classes("w-full items-center justify-between px-4"):
_ = ui.label("Speech to Text").classes("text-lg font-bold")
_ = status_label
with ui.row().classes("w-full items-center px-4"):
_ = ui.button(icon="mic", on_click=start_listening_task).props(
"flat round dense"
)
_ = ui.button(icon="stop", on_click=stop_listening_task).props(
"flat round dense"
)