Manjunath Kudlur commited on
Commit
7e3cf0f
·
1 Parent(s): 252a98f

Minimum segment duration to 2s

Browse files
Files changed (1) hide show
  1. streaming_asr.js +12 -3
streaming_asr.js CHANGED
@@ -16,8 +16,10 @@ const VAD_CHUNK_SAMPLES = 160; // 10ms - optimal for TenVAD
16
  const ASR_CHUNK_SAMPLES = 320; // 20ms - Moonshine frame size
17
  const ENCODER_BATCH_SAMPLES = 5120; // 320ms - batch size for encoder
18
 
19
- const PRE_BUFFER_CHUNKS = 15; // ~300ms at 20ms chunks
20
- const POST_BUFFER_CHUNKS = 3; // ~60ms at 20ms chunks
 
 
21
 
22
  const MODEL_CONFIGS = {
23
  sleeker: {
@@ -210,6 +212,7 @@ class PipelinedStreamingASR {
210
  this.emaProb = 0;
211
  this.onsetCounter = 0;
212
  this.offsetCounter = 0;
 
213
 
214
  // Buffers
215
  this.vadBuffer = [];
@@ -569,9 +572,14 @@ class PipelinedStreamingASR {
569
  this.onsetCounter = 0;
570
  }
571
  } else if (this.state === 'speech') {
 
 
 
 
572
  if (this.emaProb < this.offsetThreshold) {
573
  this.offsetCounter++;
574
- if (this.offsetCounter >= 3) {
 
575
  this.endSegment();
576
  }
577
  } else {
@@ -585,6 +593,7 @@ class PipelinedStreamingASR {
585
  this.state = 'speech';
586
  this.onsetCounter = 0;
587
  this.offsetCounter = 0;
 
588
  this.encoderBatchBuffer = []; // Reset batch buffer for new segment
589
 
590
  // Tell encoder to start new segment
 
16
  const ASR_CHUNK_SAMPLES = 320; // 20ms - Moonshine frame size
17
  const ENCODER_BATCH_SAMPLES = 5120; // 320ms - batch size for encoder
18
 
19
+ const PRE_BUFFER_CHUNKS = 25; // ~500ms at 20ms chunks - capture more audio before onset
20
+ const POST_BUFFER_CHUNKS = 5; // ~100ms at 20ms chunks
21
+ const MIN_SEGMENT_DURATION_MS = 2000; // Minimum 2 seconds before allowing segment end
22
+ const OFFSET_CHUNKS_REQUIRED = 10; // ~100ms of silence needed to end segment
23
 
24
  const MODEL_CONFIGS = {
25
  sleeker: {
 
212
  this.emaProb = 0;
213
  this.onsetCounter = 0;
214
  this.offsetCounter = 0;
215
+ this.segmentStartTime = 0;
216
 
217
  // Buffers
218
  this.vadBuffer = [];
 
572
  this.onsetCounter = 0;
573
  }
574
  } else if (this.state === 'speech') {
575
+ // Check if minimum segment duration has passed
576
+ const segmentDuration = Date.now() - this.segmentStartTime;
577
+ const minDurationMet = segmentDuration >= MIN_SEGMENT_DURATION_MS;
578
+
579
  if (this.emaProb < this.offsetThreshold) {
580
  this.offsetCounter++;
581
+ // Only end segment if minimum duration met AND enough silence chunks
582
+ if (minDurationMet && this.offsetCounter >= OFFSET_CHUNKS_REQUIRED) {
583
  this.endSegment();
584
  }
585
  } else {
 
593
  this.state = 'speech';
594
  this.onsetCounter = 0;
595
  this.offsetCounter = 0;
596
+ this.segmentStartTime = Date.now();
597
  this.encoderBatchBuffer = []; // Reset batch buffer for new segment
598
 
599
  // Tell encoder to start new segment