journal/Journal.Core/Services/PythonSidecarSpeechService.cs

185 lines
6.7 KiB
C#

using System.Diagnostics;
using System.Text.Json;
using Journal.Core.Dtos;
using Journal.Core.Models;
namespace Journal.Core.Services;
public sealed class PythonSidecarSpeechService : ISpeechBridgeService
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true
};
private readonly JournalConfig _config;
public PythonSidecarSpeechService(JournalConfig config)
{
_config = config;
if (string.IsNullOrWhiteSpace(_config.PythonAiSidecarPath))
throw new ArgumentException("Python sidecar path is required.");
if (!File.Exists(_config.PythonAiSidecarPath))
throw new FileNotFoundException($"Python sidecar not found: {_config.PythonAiSidecarPath}");
}
public async Task<SpeechDevicesResultDto> ListDevicesAsync(CancellationToken cancellationToken = default)
{
var data = await SendAsync("speech.devices.list", new { }, cancellationToken);
if (data is null || data.Value.ValueKind != JsonValueKind.Object)
return new SpeechDevicesResultDto([], "Unexpected speech device response from Python sidecar.");
var warning = data.Value.TryGetProperty("warning", out var warningNode)
? warningNode.GetString()
: null;
var devices = new List<SpeechDeviceDto>();
if (data.Value.TryGetProperty("devices", out var devicesNode) && devicesNode.ValueKind == JsonValueKind.Array)
{
foreach (var device in devicesNode.EnumerateArray())
{
if (device.ValueKind != JsonValueKind.Object)
continue;
var index = device.TryGetProperty("index", out var indexNode) && indexNode.ValueKind == JsonValueKind.Number
? indexNode.GetInt32()
: -1;
var name = device.TryGetProperty("name", out var nameNode)
? nameNode.GetString() ?? ""
: "";
devices.Add(new SpeechDeviceDto(index, name));
}
}
return new SpeechDevicesResultDto(devices, warning);
}
public async Task<SpeechTranscribeResultDto> TranscribeAsync(
SpeechTranscribeRequestDto request,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(request);
var data = await SendAsync("speech.transcribe", new
{
audio_base64 = request.AudioBase64,
engine = request.Engine,
whisper_model = request.WhisperModel,
text = request.Text,
simulate_delay_ms = request.SimulateDelayMs
}, cancellationToken);
if (data is null || data.Value.ValueKind != JsonValueKind.Object)
throw new InvalidOperationException("Python sidecar speech response must be a JSON object.");
var text = data.Value.TryGetProperty("text", out var textNode)
? textNode.GetString() ?? ""
: "";
var engine = data.Value.TryGetProperty("engine", out var engineNode)
? engineNode.GetString() ?? (request.Engine ?? "whisper")
: (request.Engine ?? "whisper");
var warning = data.Value.TryGetProperty("warning", out var warningNode)
? warningNode.GetString()
: null;
return new SpeechTranscribeResultDto(text, engine, warning);
}
private async Task<JsonElement?> SendAsync(string action, object payload, CancellationToken cancellationToken)
{
var request = JsonSerializer.Serialize(new { action, payload }, JsonOptions);
using var process = new Process();
process.StartInfo = new ProcessStartInfo
{
FileName = _config.PythonExecutable,
UseShellExecute = false,
RedirectStandardInput = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true,
WorkingDirectory = _config.ProjectRoot
};
process.StartInfo.ArgumentList.Add(_config.PythonAiSidecarPath);
if (!process.Start())
throw new InvalidOperationException("Failed to start Python sidecar process.");
await process.StandardInput.WriteLineAsync(request);
process.StandardInput.Close();
using var timeoutCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
timeoutCts.CancelAfter(_config.AiSidecarTimeoutMs);
try
{
await process.WaitForExitAsync(timeoutCts.Token);
}
catch (OperationCanceledException)
{
TryKill(process);
throw new TimeoutException($"Python sidecar timed out after {_config.AiSidecarTimeoutMs} ms.");
}
var stdout = await process.StandardOutput.ReadToEndAsync();
var stderr = await process.StandardError.ReadToEndAsync();
var line = LastJsonLine(stdout);
if (string.IsNullOrWhiteSpace(line))
throw new InvalidOperationException($"Python sidecar returned no JSON response. stderr: {stderr}".Trim());
JsonDocument doc;
try
{
doc = JsonDocument.Parse(line);
}
catch (JsonException ex)
{
throw new InvalidOperationException($"Invalid JSON from Python sidecar: {line}", ex);
}
using (doc)
{
var root = doc.RootElement;
if (!root.TryGetProperty("ok", out var okNode) || okNode.ValueKind != JsonValueKind.True && okNode.ValueKind != JsonValueKind.False)
throw new InvalidOperationException("Python sidecar response missing boolean 'ok' field.");
if (!okNode.GetBoolean())
{
var error = root.TryGetProperty("error", out var errorNode)
? errorNode.GetString() ?? "Unknown sidecar error."
: "Unknown sidecar error.";
throw new InvalidOperationException(error);
}
if (!root.TryGetProperty("data", out var dataNode))
return null;
return dataNode.Clone();
}
}
private static string LastJsonLine(string text)
{
var lines = text.Split(['\r', '\n'], StringSplitOptions.RemoveEmptyEntries);
for (var i = lines.Length - 1; i >= 0; i--)
{
var line = lines[i].Trim();
if (line.StartsWith("{", StringComparison.Ordinal) && line.EndsWith("}", StringComparison.Ordinal))
return line;
}
return "";
}
private static void TryKill(Process process)
{
try
{
if (!process.HasExited)
process.Kill(entireProcessTree: true);
}
catch
{
// Ignore timeout cleanup failures.
}
}
}