Manjunath Kudlur commited on
Commit
f5961bb
·
1 Parent(s): 7e3cf0f

Detect repetition while decoding

Browse files
Files changed (2) hide show
  1. decoder_worker.js +52 -2
  2. index.html +3 -3
decoder_worker.js CHANGED
@@ -73,6 +73,46 @@ async function fetchModelWithProgress(url, modelName) {
73
  let cfg = null;
74
  let tailLatency = 0;
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  // Sessions
77
  let adapterSession = null;
78
  let decoderInitSession = null;
@@ -203,7 +243,9 @@ async function decodeAccumulated() {
203
  await initDecoderCache(context);
204
 
205
  const numFrames = accumulatedFeatures.dims[1];
206
- const maxTokens = Math.max(10, Math.floor(numFrames * 1.5));
 
 
207
 
208
  const tokens = [1]; // BOS
209
  for (let step = 0; step < maxTokens; step++) {
@@ -219,7 +261,15 @@ async function decodeAccumulated() {
219
  }
220
 
221
  tokens.push(maxIdx);
222
- if (maxIdx === 2) break; // EOS
 
 
 
 
 
 
 
 
223
  }
224
 
225
  return tokenizer.decode(tokens, true);
 
73
  let cfg = null;
74
  let tailLatency = 0;
75
 
76
+ // Decoding config
77
+ const TOKENS_PER_SECOND = 6.5; // Max tokens per second of audio
78
+ const FRAME_DURATION_MS = 20; // Each encoder frame is 20ms
79
+
80
+ // Check for repetitive token patterns that indicate decoding should stop
81
+ function hasRepetition(tokens) {
82
+ const len = tokens.length;
83
+ if (len < 5) return false;
84
+
85
+ // Check if last 5 tokens are the same
86
+ const last5 = tokens.slice(-5);
87
+ if (last5.every(t => t === last5[0])) {
88
+ return true;
89
+ }
90
+
91
+ // Check for 3 repeated same pairs (e.g., [A,B,A,B,A,B])
92
+ if (len >= 6) {
93
+ const pair1 = [tokens[len - 6], tokens[len - 5]];
94
+ const pair2 = [tokens[len - 4], tokens[len - 3]];
95
+ const pair3 = [tokens[len - 2], tokens[len - 1]];
96
+ if (pair1[0] === pair2[0] && pair2[0] === pair3[0] &&
97
+ pair1[1] === pair2[1] && pair2[1] === pair3[1]) {
98
+ return true;
99
+ }
100
+ }
101
+
102
+ // Check for 2 repeated same triples (e.g., [A,B,C,A,B,C])
103
+ if (len >= 6) {
104
+ const triple1 = [tokens[len - 6], tokens[len - 5], tokens[len - 4]];
105
+ const triple2 = [tokens[len - 3], tokens[len - 2], tokens[len - 1]];
106
+ if (triple1[0] === triple2[0] &&
107
+ triple1[1] === triple2[1] &&
108
+ triple1[2] === triple2[2]) {
109
+ return true;
110
+ }
111
+ }
112
+
113
+ return false;
114
+ }
115
+
116
  // Sessions
117
  let adapterSession = null;
118
  let decoderInitSession = null;
 
243
  await initDecoderCache(context);
244
 
245
  const numFrames = accumulatedFeatures.dims[1];
246
+ // Calculate duration in seconds and max tokens based on that
247
+ const durationSeconds = (numFrames * FRAME_DURATION_MS) / 1000;
248
+ const maxTokens = Math.max(10, Math.floor(durationSeconds * TOKENS_PER_SECOND));
249
 
250
  const tokens = [1]; // BOS
251
  for (let step = 0; step < maxTokens; step++) {
 
261
  }
262
 
263
  tokens.push(maxIdx);
264
+
265
+ // Stop on EOS
266
+ if (maxIdx === 2) break;
267
+
268
+ // Stop on repetitive patterns
269
+ if (hasRepetition(tokens)) {
270
+ console.log('Stopping decode due to repetition detected');
271
+ break;
272
+ }
273
  }
274
 
275
  return tokenizer.decode(tokens, true);
index.html CHANGED
@@ -207,8 +207,8 @@
207
  display: flex;
208
  flex-direction: column;
209
  justify-content: center;
210
- align-items: center;
211
- text-align: center;
212
  backdrop-filter: blur(10px);
213
  border: 1px solid rgba(255, 255, 255, 0.1);
214
  box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
@@ -404,7 +404,7 @@
404
  background: rgba(0, 0, 0, 0.9);
405
  border-radius: 0 0 12px 12px;
406
  min-height: 80px;
407
- text-align: center;
408
  backdrop-filter: blur(10px);
409
  box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5);
410
  }
 
207
  display: flex;
208
  flex-direction: column;
209
  justify-content: center;
210
+ align-items: flex-start;
211
+ text-align: left;
212
  backdrop-filter: blur(10px);
213
  border: 1px solid rgba(255, 255, 255, 0.1);
214
  box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
 
404
  background: rgba(0, 0, 0, 0.9);
405
  border-radius: 0 0 12px 12px;
406
  min-height: 80px;
407
+ text-align: left;
408
  backdrop-filter: blur(10px);
409
  box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5);
410
  }