Filter silent audio and blank placeholder transcripts in S2T
This commit is contained in:
parent
b1f0791f04
commit
ee96c05d15
@ -14,6 +14,7 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
|
|||||||
private const int Channels = 1;
|
private const int Channels = 1;
|
||||||
private const int ChunkMs = 2000;
|
private const int ChunkMs = 2000;
|
||||||
private const int MaxBufferedItems = 256;
|
private const int MaxBufferedItems = 256;
|
||||||
|
private const int SilenceRmsThreshold = 150;
|
||||||
|
|
||||||
private readonly object _sync = new();
|
private readonly object _sync = new();
|
||||||
private readonly object _segmentLock = new();
|
private readonly object _segmentLock = new();
|
||||||
@ -202,6 +203,9 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
if (IsLikelySilence(pcmChunk))
|
||||||
|
continue;
|
||||||
|
|
||||||
using var pcmStream = new MemoryStream(pcmChunk, writable: false);
|
using var pcmStream = new MemoryStream(pcmChunk, writable: false);
|
||||||
using var raw = new RawSourceWaveStream(pcmStream, waveFormat);
|
using var raw = new RawSourceWaveStream(pcmStream, waveFormat);
|
||||||
using var wavStream = new MemoryStream();
|
using var wavStream = new MemoryStream();
|
||||||
@ -213,6 +217,8 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
|
|||||||
var text = result.Text?.Trim();
|
var text = result.Text?.Trim();
|
||||||
if (string.IsNullOrWhiteSpace(text))
|
if (string.IsNullOrWhiteSpace(text))
|
||||||
continue;
|
continue;
|
||||||
|
if (IsPlaceholderTranscript(text))
|
||||||
|
continue;
|
||||||
EnqueueTranscript(text);
|
EnqueueTranscript(text);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -253,6 +259,37 @@ public sealed class LocalWhisperS2TService : IS2TService, IDisposable
|
|||||||
return modelPath;
|
return modelPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static bool IsLikelySilence(byte[] pcmChunk)
|
||||||
|
{
|
||||||
|
if (pcmChunk.Length < 2)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
long sumSquares = 0;
|
||||||
|
int samples = pcmChunk.Length / 2;
|
||||||
|
for (int i = 0; i + 1 < pcmChunk.Length; i += 2)
|
||||||
|
{
|
||||||
|
short sample = (short)(pcmChunk[i] | (pcmChunk[i + 1] << 8));
|
||||||
|
sumSquares += (long)sample * sample;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (samples <= 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
var rms = Math.Sqrt(sumSquares / (double)samples);
|
||||||
|
return rms < SilenceRmsThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool IsPlaceholderTranscript(string text)
|
||||||
|
{
|
||||||
|
var normalized = text.Trim();
|
||||||
|
if (!(normalized.StartsWith('[') && normalized.EndsWith(']')))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return normalized.Equals("[BLANK_AUDIO]", StringComparison.OrdinalIgnoreCase)
|
||||||
|
|| normalized.Equals("[NO AUDIO]", StringComparison.OrdinalIgnoreCase)
|
||||||
|
|| normalized.Equals("[SILENCE]", StringComparison.OrdinalIgnoreCase);
|
||||||
|
}
|
||||||
|
|
||||||
private void EnqueueTranscript(string text)
|
private void EnqueueTranscript(string text)
|
||||||
{
|
{
|
||||||
_transcripts.Enqueue(text);
|
_transcripts.Enqueue(text);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user