Filter silent audio and blank placeholder transcripts in S2T

This commit is contained in:
Jacob Schmidt 2026-03-01 00:39:11 -06:00
parent b1f0791f04
commit ee96c05d15

View File

@ -14,6 +14,7 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
private const int Channels = 1;
private const int ChunkMs = 2000;
private const int MaxBufferedItems = 256;
private const int SilenceRmsThreshold = 150;
private readonly object _sync = new();
private readonly object _segmentLock = new();
@ -202,6 +203,9 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
{
try
{
if (IsLikelySilence(pcmChunk))
continue;
using var pcmStream = new MemoryStream(pcmChunk, writable: false);
using var raw = new RawSourceWaveStream(pcmStream, waveFormat);
using var wavStream = new MemoryStream();
@ -213,6 +217,8 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
var text = result.Text?.Trim();
if (string.IsNullOrWhiteSpace(text))
continue;
if (IsPlaceholderTranscript(text))
continue;
EnqueueTranscript(text);
}
}
@ -253,6 +259,37 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
return modelPath;
}
private static bool IsLikelySilence(byte[] pcmChunk)
{
if (pcmChunk.Length < 2)
return true;
long sumSquares = 0;
int samples = pcmChunk.Length / 2;
for (int i = 0; i + 1 < pcmChunk.Length; i += 2)
{
short sample = (short)(pcmChunk[i] | (pcmChunk[i + 1] << 8));
sumSquares += (long)sample * sample;
}
if (samples <= 0)
return true;
var rms = Math.Sqrt(sumSquares / (double)samples);
return rms < SilenceRmsThreshold;
}
private static bool IsPlaceholderTranscript(string text)
{
var normalized = text.Trim();
if (!(normalized.StartsWith('[') && normalized.EndsWith(']')))
return false;
return normalized.Equals("[BLANK_AUDIO]", StringComparison.OrdinalIgnoreCase)
|| normalized.Equals("[NO AUDIO]", StringComparison.OrdinalIgnoreCase)
|| normalized.Equals("[SILENCE]", StringComparison.OrdinalIgnoreCase);
}
private void EnqueueTranscript(string text)
{
_transcripts.Enqueue(text);