using System.Text.Json; using Journal.Core.Dtos; using Journal.Core.Models; namespace Journal.Core.Services; public sealed class PythonSidecarSpeechService : ISpeechBridgeService { private readonly PythonSidecarClient _client; public PythonSidecarSpeechService(JournalConfig config) { if (string.IsNullOrWhiteSpace(config.PythonAiSidecarPath)) throw new ArgumentException("Python sidecar path is required."); if (!File.Exists(config.PythonAiSidecarPath)) throw new FileNotFoundException($"Python sidecar not found: {config.PythonAiSidecarPath}"); _client = new PythonSidecarClient(config); } public async Task ListDevicesAsync(CancellationToken cancellationToken = default) { var data = await _client.SendAsync("speech.devices.list", new { }, cancellationToken); if (data is null || data.Value.ValueKind != JsonValueKind.Object) return new SpeechDevicesResultDto([], "Unexpected speech device response from Python sidecar."); var warning = data.Value.TryGetProperty("warning", out var warningNode) ? warningNode.GetString() : null; var devices = new List(); if (data.Value.TryGetProperty("devices", out var devicesNode) && devicesNode.ValueKind == JsonValueKind.Array) { foreach (var device in devicesNode.EnumerateArray()) { if (device.ValueKind != JsonValueKind.Object) continue; var index = device.TryGetProperty("index", out var indexNode) && indexNode.ValueKind == JsonValueKind.Number ? indexNode.GetInt32() : -1; var name = device.TryGetProperty("name", out var nameNode) ? nameNode.GetString() ?? "" : ""; devices.Add(new SpeechDeviceDto(index, name)); } } return new SpeechDevicesResultDto(devices, warning); } public async Task TranscribeAsync( SpeechTranscribeRequestDto request, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(request); var data = await _client.SendAsync("speech.transcribe", new { audio_base64 = request.AudioBase64, engine = request.Engine, whisper_model = request.WhisperModel, text = request.Text, simulate_delay_ms = request.SimulateDelayMs }, cancellationToken); if (data is null || data.Value.ValueKind != JsonValueKind.Object) throw new InvalidOperationException("Python sidecar speech response must be a JSON object."); var text = data.Value.TryGetProperty("text", out var textNode) ? textNode.GetString() ?? "" : ""; var engine = data.Value.TryGetProperty("engine", out var engineNode) ? engineNode.GetString() ?? (request.Engine ?? "whisper") : (request.Engine ?? "whisper"); var warning = data.Value.TryGetProperty("warning", out var warningNode) ? warningNode.GetString() : null; return new SpeechTranscribeResultDto(text, engine, warning); } }