Filter silent audio and blank placeholder transcripts in S2T
This commit is contained in:
parent
b1f0791f04
commit
ee96c05d15
@ -14,6 +14,7 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
|
||||
private const int Channels = 1;
|
||||
private const int ChunkMs = 2000;
|
||||
private const int MaxBufferedItems = 256;
|
||||
private const int SilenceRmsThreshold = 150;
|
||||
|
||||
private readonly object _sync = new();
|
||||
private readonly object _segmentLock = new();
|
||||
@ -202,6 +203,9 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
|
||||
{
|
||||
try
|
||||
{
|
||||
if (IsLikelySilence(pcmChunk))
|
||||
continue;
|
||||
|
||||
using var pcmStream = new MemoryStream(pcmChunk, writable: false);
|
||||
using var raw = new RawSourceWaveStream(pcmStream, waveFormat);
|
||||
using var wavStream = new MemoryStream();
|
||||
@ -213,6 +217,8 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
|
||||
var text = result.Text?.Trim();
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
continue;
|
||||
if (IsPlaceholderTranscript(text))
|
||||
continue;
|
||||
EnqueueTranscript(text);
|
||||
}
|
||||
}
|
||||
@ -253,6 +259,37 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
|
||||
return modelPath;
|
||||
}
|
||||
|
||||
private static bool IsLikelySilence(byte[] pcmChunk)
|
||||
{
|
||||
if (pcmChunk.Length < 2)
|
||||
return true;
|
||||
|
||||
long sumSquares = 0;
|
||||
int samples = pcmChunk.Length / 2;
|
||||
for (int i = 0; i + 1 < pcmChunk.Length; i += 2)
|
||||
{
|
||||
short sample = (short)(pcmChunk[i] | (pcmChunk[i + 1] << 8));
|
||||
sumSquares += (long)sample * sample;
|
||||
}
|
||||
|
||||
if (samples <= 0)
|
||||
return true;
|
||||
|
||||
var rms = Math.Sqrt(sumSquares / (double)samples);
|
||||
return rms < SilenceRmsThreshold;
|
||||
}
|
||||
|
||||
private static bool IsPlaceholderTranscript(string text)
|
||||
{
|
||||
var normalized = text.Trim();
|
||||
if (!(normalized.StartsWith('[') && normalized.EndsWith(']')))
|
||||
return false;
|
||||
|
||||
return normalized.Equals("[BLANK_AUDIO]", StringComparison.OrdinalIgnoreCase)
|
||||
|| normalized.Equals("[NO AUDIO]", StringComparison.OrdinalIgnoreCase)
|
||||
|| normalized.Equals("[SILENCE]", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private void EnqueueTranscript(string text)
|
||||
{
|
||||
_transcripts.Enqueue(text);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user