Manjunath Kudlur
commited on
Commit
·
7e3cf0f
1
Parent(s):
252a98f
Minimum segment duration to 2s
Browse files- streaming_asr.js +12 -3
streaming_asr.js
CHANGED
|
@@ -16,8 +16,10 @@ const VAD_CHUNK_SAMPLES = 160; // 10ms - optimal for TenVAD
|
|
| 16 |
const ASR_CHUNK_SAMPLES = 320; // 20ms - Moonshine frame size
|
| 17 |
const ENCODER_BATCH_SAMPLES = 5120; // 320ms - batch size for encoder
|
| 18 |
|
| 19 |
-
const PRE_BUFFER_CHUNKS =
|
| 20 |
-
const POST_BUFFER_CHUNKS =
|
|
|
|
|
|
|
| 21 |
|
| 22 |
const MODEL_CONFIGS = {
|
| 23 |
sleeker: {
|
|
@@ -210,6 +212,7 @@ class PipelinedStreamingASR {
|
|
| 210 |
this.emaProb = 0;
|
| 211 |
this.onsetCounter = 0;
|
| 212 |
this.offsetCounter = 0;
|
|
|
|
| 213 |
|
| 214 |
// Buffers
|
| 215 |
this.vadBuffer = [];
|
|
@@ -569,9 +572,14 @@ class PipelinedStreamingASR {
|
|
| 569 |
this.onsetCounter = 0;
|
| 570 |
}
|
| 571 |
} else if (this.state === 'speech') {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
if (this.emaProb < this.offsetThreshold) {
|
| 573 |
this.offsetCounter++;
|
| 574 |
-
if
|
|
|
|
| 575 |
this.endSegment();
|
| 576 |
}
|
| 577 |
} else {
|
|
@@ -585,6 +593,7 @@ class PipelinedStreamingASR {
|
|
| 585 |
this.state = 'speech';
|
| 586 |
this.onsetCounter = 0;
|
| 587 |
this.offsetCounter = 0;
|
|
|
|
| 588 |
this.encoderBatchBuffer = []; // Reset batch buffer for new segment
|
| 589 |
|
| 590 |
// Tell encoder to start new segment
|
|
|
|
| 16 |
const ASR_CHUNK_SAMPLES = 320; // 20ms - Moonshine frame size
|
| 17 |
const ENCODER_BATCH_SAMPLES = 5120; // 320ms - batch size for encoder
|
| 18 |
|
| 19 |
+
const PRE_BUFFER_CHUNKS = 25; // ~500ms at 20ms chunks - capture more audio before onset
|
| 20 |
+
const POST_BUFFER_CHUNKS = 5; // ~100ms at 20ms chunks
|
| 21 |
+
const MIN_SEGMENT_DURATION_MS = 2000; // Minimum 2 seconds before allowing segment end
|
| 22 |
+
const OFFSET_CHUNKS_REQUIRED = 10; // ~100ms of silence needed to end segment
|
| 23 |
|
| 24 |
const MODEL_CONFIGS = {
|
| 25 |
sleeker: {
|
|
|
|
| 212 |
this.emaProb = 0;
|
| 213 |
this.onsetCounter = 0;
|
| 214 |
this.offsetCounter = 0;
|
| 215 |
+
this.segmentStartTime = 0;
|
| 216 |
|
| 217 |
// Buffers
|
| 218 |
this.vadBuffer = [];
|
|
|
|
| 572 |
this.onsetCounter = 0;
|
| 573 |
}
|
| 574 |
} else if (this.state === 'speech') {
|
| 575 |
+
// Check if minimum segment duration has passed
|
| 576 |
+
const segmentDuration = Date.now() - this.segmentStartTime;
|
| 577 |
+
const minDurationMet = segmentDuration >= MIN_SEGMENT_DURATION_MS;
|
| 578 |
+
|
| 579 |
if (this.emaProb < this.offsetThreshold) {
|
| 580 |
this.offsetCounter++;
|
| 581 |
+
// Only end segment if minimum duration met AND enough silence chunks
|
| 582 |
+
if (minDurationMet && this.offsetCounter >= OFFSET_CHUNKS_REQUIRED) {
|
| 583 |
this.endSegment();
|
| 584 |
}
|
| 585 |
} else {
|
|
|
|
| 593 |
this.state = 'speech';
|
| 594 |
this.onsetCounter = 0;
|
| 595 |
this.offsetCounter = 0;
|
| 596 |
+
this.segmentStartTime = Date.now();
|
| 597 |
this.encoderBatchBuffer = []; // Reset batch buffer for new segment
|
| 598 |
|
| 599 |
// Tell encoder to start new segment
|