Filter silent audio and blank placeholder transcripts in S2T

This commit is contained in:
Jacob Schmidt 2026-03-01 00:39:11 -06:00
parent b1f0791f04
commit ee96c05d15

View File

@ -14,6 +14,7 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
private const int Channels = 1; private const int Channels = 1;
private const int ChunkMs = 2000; private const int ChunkMs = 2000;
private const int MaxBufferedItems = 256; private const int MaxBufferedItems = 256;
private const int SilenceRmsThreshold = 150;
private readonly object _sync = new(); private readonly object _sync = new();
private readonly object _segmentLock = new(); private readonly object _segmentLock = new();
@ -202,6 +203,9 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
{ {
try try
{ {
if (IsLikelySilence(pcmChunk))
continue;
using var pcmStream = new MemoryStream(pcmChunk, writable: false); using var pcmStream = new MemoryStream(pcmChunk, writable: false);
using var raw = new RawSourceWaveStream(pcmStream, waveFormat); using var raw = new RawSourceWaveStream(pcmStream, waveFormat);
using var wavStream = new MemoryStream(); using var wavStream = new MemoryStream();
@ -213,6 +217,8 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
var text = result.Text?.Trim(); var text = result.Text?.Trim();
if (string.IsNullOrWhiteSpace(text)) if (string.IsNullOrWhiteSpace(text))
continue; continue;
if (IsPlaceholderTranscript(text))
continue;
EnqueueTranscript(text); EnqueueTranscript(text);
} }
} }
@ -253,6 +259,37 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
return modelPath; return modelPath;
} }
private static bool IsLikelySilence(byte[] pcmChunk)
{
if (pcmChunk.Length < 2)
return true;
long sumSquares = 0;
int samples = pcmChunk.Length / 2;
for (int i = 0; i + 1 < pcmChunk.Length; i += 2)
{
short sample = (short)(pcmChunk[i] | (pcmChunk[i + 1] << 8));
sumSquares += (long)sample * sample;
}
if (samples <= 0)
return true;
var rms = Math.Sqrt(sumSquares / (double)samples);
return rms < SilenceRmsThreshold;
}
private static bool IsPlaceholderTranscript(string text)
{
var normalized = text.Trim();
if (!(normalized.StartsWith('[') && normalized.EndsWith(']')))
return false;
return normalized.Equals("[BLANK_AUDIO]", StringComparison.OrdinalIgnoreCase)
|| normalized.Equals("[NO AUDIO]", StringComparison.OrdinalIgnoreCase)
|| normalized.Equals("[SILENCE]", StringComparison.OrdinalIgnoreCase);
}
private void EnqueueTranscript(string text) private void EnqueueTranscript(string text)
{ {
_transcripts.Enqueue(text); _transcripts.Enqueue(text);