/**
* Streaming ASR Demo - JavaScript Implementation with Web Workers
*
* Architecture:
* - Main thread: Audio capture, VAD, UI updates
* - Encoder Worker: Preprocessor + Encoder ONNX inference
* - Decoder Worker: Adapter + Decoder ONNX inference
*/
// =============================================================================
// Constants
// =============================================================================
const SAMPLE_RATE = 16000;
const VAD_CHUNK_SAMPLES = 160; // 10ms - optimal for TenVAD
const ASR_CHUNK_SAMPLES = 320; // 20ms - Moonshine frame size
const ENCODER_BATCH_SAMPLES = 5120; // 320ms - batch size for encoder
const PRE_BUFFER_CHUNKS = 25; // ~500ms at 20ms chunks - capture more audio before onset
const POST_BUFFER_CHUNKS = 5; // ~100ms at 20ms chunks
const MIN_SEGMENT_DURATION_MS = 2000; // Minimum 2 seconds before allowing segment end
const OFFSET_RAMP_START_MS = 6000; // Start ramping offset threshold at 6 seconds
const OFFSET_RAMP_END_MS = 8000; // Reach max offset threshold (1.0) at 8 seconds
const OFFSET_CHUNKS_REQUIRED = 10; // ~100ms of silence needed to end segment
const MODEL_CONFIGS = {
sleeker: {
dim: 336,
dec_dim: 288,
depth: 6,
encoder_depth: 6,
n_past: 16,
n_future: 4,
nheads: 8,
head_dim: 36,
vocab_size: 32768
},
spindlier: {
dim: 620,
dec_dim: 512,
depth: 10,
encoder_depth: 10,
n_past: 16,
n_future: 4,
nheads: 8,
head_dim: 64,
vocab_size: 32768
}
};
// =============================================================================
// TenVAD - WebAssembly-based Voice Activity Detection
// =============================================================================
class TenVAD {
constructor(hopSize = 160, threshold = 0.5) {
this.hopSize = hopSize;
this.threshold = threshold;
this.module = null;
this.vadHandle = null;
this.audioPtr = null;
this.probPtr = null;
this.flagPtr = null;
this.ready = false;
}
async init(wasmUrl = './ten_vad.js') {
const wasmBinaryUrl = wasmUrl.replace('.js', '.wasm');
// Dynamic import of the ES module
const vadModule = await import(wasmUrl);
const createTenVadModule = vadModule.default;
this.module = await createTenVadModule({
locateFile: (path) => {
if (path.endsWith('.wasm')) {
return wasmBinaryUrl;
}
return path;
}
});
// Create VAD instance
const vadHandlePtr = this.module._malloc(4);
const result = this.module._ten_vad_create(vadHandlePtr, this.hopSize, this.threshold);
if (result !== 0) {
this.module._free(vadHandlePtr);
throw new Error(`Failed to create TenVAD instance: ${result}`);
}
this.vadHandle = this.module.HEAP32[vadHandlePtr / 4];
this.module._free(vadHandlePtr);
// Allocate buffers
this.audioPtr = this.module._malloc(this.hopSize * 2);
this.probPtr = this.module._malloc(4);
this.flagPtr = this.module._malloc(4);
this.ready = true;
}
process(audioChunkFloat32) {
if (!this.ready) return -1;
const int16Data = new Int16Array(this.hopSize);
for (let i = 0; i < this.hopSize && i < audioChunkFloat32.length; i++) {
int16Data[i] = Math.max(-32768, Math.min(32767, Math.round(audioChunkFloat32[i] * 32767)));
}
this.module.HEAP16.set(int16Data, this.audioPtr / 2);
this.module._ten_vad_process(this.vadHandle, this.audioPtr, this.hopSize, this.probPtr, this.flagPtr);
return this.module.HEAPF32[this.probPtr / 4];
}
destroy() {
if (!this.ready || !this.module) return;
this.ready = false; // Prevent further use
try {
if (this.audioPtr) {
this.module._free(this.audioPtr);
this.audioPtr = null;
}
if (this.probPtr) {
this.module._free(this.probPtr);
this.probPtr = null;
}
if (this.flagPtr) {
this.module._free(this.flagPtr);
this.flagPtr = null;
}
// Skip _ten_vad_destroy as it causes memory access errors
// The WASM memory will be cleaned up when the module is garbage collected
this.vadHandle = null;
} catch (e) {
console.warn('TenVAD cleanup error:', e);
}
this.module = null;
}
}
// Fallback simple energy-based VAD
class SimpleVAD {
constructor(sampleRate = 16000, frameSize = 160) {
this.frameSize = frameSize;
this.energyHistory = [];
this.historySize = 50;
this.noiseFloor = 0.001;
this.ready = true;
}
async init() {}
process(audioChunk) {
let energy = 0;
for (let i = 0; i < audioChunk.length; i++) {
energy += audioChunk[i] * audioChunk[i];
}
energy = Math.sqrt(energy / audioChunk.length);
this.energyHistory.push(energy);
if (this.energyHistory.length > this.historySize) {
this.energyHistory.shift();
}
if (this.energyHistory.length > 10) {
const sorted = [...this.energyHistory].sort((a, b) => a - b);
this.noiseFloor = sorted[Math.floor(sorted.length * 0.1)] || 0.001;
}
const snr = energy / (this.noiseFloor + 1e-10);
return 1 / (1 + Math.exp(-2 * (snr - 3)));
}
destroy() {}
}
// =============================================================================
// Pipelined Streaming ASR with Web Workers
// =============================================================================
class PipelinedStreamingASR {
constructor(config) {
this.modelName = config.modelName || 'sleeker';
this.onnxUrl = config.onnxUrl || './models';
this.backendChoice = config.backend || 'wasm';
this.onsetThreshold = config.onsetThreshold || 0.5;
this.offsetThreshold = config.offsetThreshold || 0.3;
this.emaAlpha = config.emaAlpha || 0.3;
this.cfg = MODEL_CONFIGS[this.modelName];
// Workers
this.encoderWorker = null;
this.decoderWorker = null;
this.encoderReady = false;
this.decoderReady = false;
// VAD (runs on main thread for low latency)
this.vad = null;
// Audio capture
this.audioContext = null;
this.sourceNode = null;
this.workletNode = null;
// State
this.running = false;
this.state = 'idle';
this.currentSegmentId = 0;
this.emaProb = 0;
this.onsetCounter = 0;
this.offsetCounter = 0;
this.segmentStartTime = 0;
// Buffers
this.vadBuffer = [];
this.asrBuffer = [];
this.preBuffer = [];
this.postBufferRemaining = 0;
this.encoderBatchBuffer = []; // Accumulate 320ms before sending to encoder
// Display state
this.vadHistory = [];
this.vadUpdateCounter = 0;
this.vadUpdateInterval = 5; // Update display every 5 VAD chunks (50ms)
this.segmentEvents = []; // Track segment start/end for visualization
this.vadHistoryStartTime = 0; // When the current history window started
// Callbacks
this.onVadUpdate = null;
this.onTranscript = null;
this.onLiveCaption = null;
this.onStatusUpdate = null;
this.onQueueUpdate = null;
this.onBackendUpdate = null;
// Backend info
this.backend = 'unknown';
}
async loadModels(progressCallback, detailedProgressCallback) {
// Track overall progress
const totalModels = 7; // VAD, Preprocessor, Encoder, Tokenizer, Adapter, Decoder Init, Decoder Step
let completedModels = 0;
let currentModel = '';
let currentProgress = { loaded: 0, total: 0 };
const updateProgress = () => {
const overallPercent = (completedModels / totalModels) * 100;
detailedProgressCallback?.({
completedModels,
totalModels,
overallPercent,
currentModel,
currentProgress
});
};
// Initialize VAD
try {
currentModel = 'VAD';
progressCallback?.('Loading TenVAD...');
updateProgress();
this.vad = new TenVAD(VAD_CHUNK_SAMPLES, 0.5);
await this.vad.init('./ten_vad.js');
console.log('Using TenVAD');
completedModels++;
updateProgress();
} catch (e) {
console.warn('TenVAD failed, using SimpleVAD:', e.message);
this.vad = new SimpleVAD(SAMPLE_RATE, VAD_CHUNK_SAMPLES);
await this.vad.init();
completedModels++;
updateProgress();
}
// Initialize Encoder Worker with progress tracking
progressCallback?.('Loading encoder models...');
await this.initEncoderWorker((model, progress) => {
currentModel = model;
currentProgress = progress;
updateProgress();
}, () => {
completedModels++;
updateProgress();
});
// Initialize Decoder Worker with progress tracking
progressCallback?.('Loading decoder models...');
await this.initDecoderWorker((model, progress) => {
currentModel = model;
currentProgress = progress;
updateProgress();
}, () => {
completedModels++;
updateProgress();
});
progressCallback?.('Ready!');
}
initEncoderWorker(onProgress, onModelDone) {
return new Promise((resolve, reject) => {
this.encoderWorker = new Worker('./encoder_worker.js');
this.encoderWorker.onmessage = (e) => {
const { type } = e.data;
switch (type) {
case 'ready':
this.encoderReady = true;
resolve();
break;
case 'error':
reject(new Error(e.data.message));
break;
case 'status':
// Progress update from worker
break;
case 'progress':
onProgress?.(e.data.model, { loaded: e.data.loaded, total: e.data.total, cached: e.data.cached });
break;
case 'model_done':
onModelDone?.(e.data.model);
break;
case 'segment_start':
this.decoderWorker?.postMessage({ type: 'segment_start', data: { segmentId: e.data.segmentId } });
break;
case 'segment_end':
this.decoderWorker?.postMessage({ type: 'segment_end', data: { segmentId: e.data.segmentId } });
break;
case 'features':
// Forward features to decoder worker
this.decoderWorker?.postMessage({
type: 'features',
data: {
segmentId: e.data.segmentId,
features: e.data.features,
dims: e.data.dims
}
}, [e.data.features.buffer]);
break;
}
};
this.encoderWorker.postMessage({
type: 'init',
data: {
cfg: this.cfg,
onnxUrl: this.onnxUrl,
modelName: this.modelName,
backend: this.backendChoice
}
});
});
}
initDecoderWorker(onProgress, onModelDone) {
return new Promise((resolve, reject) => {
this.decoderWorker = new Worker('./decoder_worker.js');
this.decoderWorker.onmessage = (e) => {
const { type } = e.data;
switch (type) {
case 'ready':
this.decoderReady = true;
this.backend = e.data.backend || 'wasm';
this.onBackendUpdate?.(this.backend);
resolve();
break;
case 'error':
reject(new Error(e.data.message));
break;
case 'status':
break;
case 'progress':
onProgress?.(e.data.model, { loaded: e.data.loaded, total: e.data.total, cached: e.data.cached });
break;
case 'model_done':
onModelDone?.(e.data.model);
break;
case 'transcript':
this.onTranscript?.(e.data.text, e.data.segmentId);
break;
case 'live_caption':
this.onLiveCaption?.(e.data.text);
break;
}
};
this.decoderWorker.postMessage({
type: 'init',
data: {
cfg: this.cfg,
onnxUrl: this.onnxUrl,
modelName: this.modelName,
backend: this.backendChoice
}
});
});
}
async start() {
if (this.running) return;
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: SAMPLE_RATE,
channelCount: 1,
echoCancellation: false,
noiseSuppression: false,
autoGainControl: false
}
});
this.audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
// Check actual sample rate
console.log(`Requested sample rate: ${SAMPLE_RATE}, Actual: ${this.audioContext.sampleRate}`);
this.sourceNode = this.audioContext.createMediaStreamSource(stream);
// Use AudioWorklet for better performance
try {
await this.audioContext.audioWorklet.addModule('./audio_processor.js');
this.workletNode = new AudioWorkletNode(this.audioContext, 'audio-processor');
this.workletNode.port.onmessage = (e) => {
if (this.running) {
this.processAudioChunk(e.data.audio);
}
};
this.sourceNode.connect(this.workletNode);
this.workletNode.connect(this.audioContext.destination);
} catch (e) {
// Fallback to ScriptProcessor
console.warn('AudioWorklet not available, using ScriptProcessor');
const bufferSize = 2048;
this.scriptNode = this.audioContext.createScriptProcessor(bufferSize, 1, 1);
this.scriptNode.onaudioprocess = (e) => {
if (this.running) {
const inputData = e.inputBuffer.getChannelData(0);
this.processAudioChunk(new Float32Array(inputData));
}
};
this.sourceNode.connect(this.scriptNode);
this.scriptNode.connect(this.audioContext.destination);
}
this.running = true;
this.state = 'idle';
this.onsetCounter = 0;
this.offsetCounter = 0;
this.emaProb = 0;
this.onStatusUpdate?.('listening', 'Listening...');
}
stop() {
this.running = false;
if (this.workletNode) {
this.workletNode.disconnect();
this.workletNode = null;
}
if (this.scriptNode) {
this.scriptNode.disconnect();
this.scriptNode = null;
}
if (this.sourceNode) {
this.sourceNode.disconnect();
this.sourceNode = null;
}
if (this.audioContext) {
this.audioContext.close();
this.audioContext = null;
}
if (this.vad) {
this.vad.destroy();
}
if (this.encoderWorker) {
this.encoderWorker.terminate();
this.encoderWorker = null;
}
if (this.decoderWorker) {
this.decoderWorker.terminate();
this.decoderWorker = null;
}
this.onStatusUpdate?.('idle', 'Stopped');
}
processAudioChunk(audioData) {
// Accumulate for VAD (10ms chunks)
this.vadBuffer.push(...audioData);
// Accumulate for ASR (20ms chunks)
this.asrBuffer.push(...audioData);
// Process VAD chunks
while (this.vadBuffer.length >= VAD_CHUNK_SAMPLES) {
const vadChunk = new Float32Array(this.vadBuffer.splice(0, VAD_CHUNK_SAMPLES));
const prob = this.vad.process(vadChunk);
if (prob >= 0) {
this.emaProb = this.emaAlpha * prob + (1 - this.emaAlpha) * this.emaProb;
// Throttle display updates (every 50ms instead of 10ms)
this.vadUpdateCounter++;
if (this.vadUpdateCounter >= this.vadUpdateInterval) {
this.vadUpdateCounter = 0;
const now = Date.now();
// Initialize history start time
if (this.vadHistory.length === 0) {
this.vadHistoryStartTime = now;
}
this.vadHistory.push(this.emaProb);
if (this.vadHistory.length > 100) {
this.vadHistory.shift();
// Shift the start time by 50ms (one history entry)
this.vadHistoryStartTime += 50;
}
// Remove old segment events that are outside the history window
const historyDuration = this.vadHistory.length * 50; // ms
const historyStart = now - historyDuration;
this.segmentEvents = this.segmentEvents.filter(e => e.time >= historyStart);
this.onVadUpdate?.(this.emaProb, this.vadHistory, this.segmentEvents, this.vadHistoryStartTime);
}
this.updateSegmentState();
}
}
// Extract complete ASR chunks
while (this.asrBuffer.length >= ASR_CHUNK_SAMPLES) {
const chunkData = this.asrBuffer.splice(0, ASR_CHUNK_SAMPLES);
const chunk = new Float32Array(chunkData);
if (this.state === 'speech') {
this.sendAudioToEncoder(chunk);
} else {
this.preBuffer.push(chunk);
if (this.preBuffer.length > PRE_BUFFER_CHUNKS) {
this.preBuffer.shift();
}
if (this.postBufferRemaining > 0) {
this.sendAudioToEncoder(chunk);
this.postBufferRemaining--;
if (this.postBufferRemaining === 0) {
this.finalizeSegmentEnd();
}
}
}
}
}
sendAudioToEncoder(chunk, flush = false) {
if (!this.encoderWorker || !this.encoderReady) return;
// Accumulate chunks into batch buffer
this.encoderBatchBuffer.push(...chunk);
// Send when we have 320ms worth of audio, or on flush
if (this.encoderBatchBuffer.length >= ENCODER_BATCH_SAMPLES || flush) {
if (this.encoderBatchBuffer.length > 0) {
const batch = new Float32Array(this.encoderBatchBuffer);
this.encoderBatchBuffer = [];
this.encoderWorker.postMessage({
type: 'audio',
data: {
audio: batch,
segmentId: this.currentSegmentId
}
}, [batch.buffer]);
}
}
}
updateSegmentState() {
if (this.state === 'idle') {
if (this.emaProb >= this.onsetThreshold) {
this.onsetCounter++;
if (this.onsetCounter >= 2) {
this.startSegment();
}
} else {
this.onsetCounter = 0;
}
} else if (this.state === 'speech') {
const segmentDuration = Date.now() - this.segmentStartTime;
// Calculate effective offset threshold with gradual ramp
// Before OFFSET_RAMP_START_MS: use normal offsetThreshold
// Between OFFSET_RAMP_START_MS and OFFSET_RAMP_END_MS: linearly ramp to 1.0
// After OFFSET_RAMP_END_MS: use 1.0 (any VAD level will trigger offset)
let effectiveOffsetThreshold = this.offsetThreshold;
if (segmentDuration >= OFFSET_RAMP_START_MS) {
const rampProgress = Math.min(1.0,
(segmentDuration - OFFSET_RAMP_START_MS) / (OFFSET_RAMP_END_MS - OFFSET_RAMP_START_MS)
);
// Lerp from offsetThreshold to 1.0
effectiveOffsetThreshold = this.offsetThreshold + rampProgress * (1.0 - this.offsetThreshold);
}
// Check if minimum segment duration has passed
const minDurationMet = segmentDuration >= MIN_SEGMENT_DURATION_MS;
if (this.emaProb < effectiveOffsetThreshold) {
this.offsetCounter++;
// Only end segment if minimum duration met AND enough silence chunks
if (minDurationMet && this.offsetCounter >= OFFSET_CHUNKS_REQUIRED) {
this.endSegment();
}
} else {
this.offsetCounter = 0;
}
}
}
startSegment() {
// If previous segment wasn't fully finalized, finalize it now
if (this.postBufferRemaining > 0) {
this.finalizeSegmentEnd();
this.postBufferRemaining = 0;
}
this.currentSegmentId++;
this.state = 'speech';
this.onsetCounter = 0;
this.offsetCounter = 0;
this.segmentStartTime = Date.now();
this.encoderBatchBuffer = []; // Reset batch buffer for new segment
// Record segment start for visualization
this.segmentEvents.push({ type: 'start', time: this.segmentStartTime });
// Tell encoder to start new segment
this.encoderWorker?.postMessage({
type: 'segment_start',
data: { segmentId: this.currentSegmentId }
});
// Drain pre-buffer
while (this.preBuffer.length > 0) {
const chunk = this.preBuffer.shift();
this.sendAudioToEncoder(chunk);
}
this.onStatusUpdate?.('recording', 'Recording...');
}
endSegment() {
this.state = 'idle';
this.offsetCounter = 0;
this.postBufferRemaining = POST_BUFFER_CHUNKS;
// Record segment end for visualization
this.segmentEvents.push({ type: 'end', time: Date.now() });
if (this.postBufferRemaining === 0) {
this.finalizeSegmentEnd();
}
this.onStatusUpdate?.('listening', 'Listening...');
}
finalizeSegmentEnd() {
// Process remaining complete chunks
while (this.asrBuffer.length >= ASR_CHUNK_SAMPLES) {
const chunkData = this.asrBuffer.splice(0, ASR_CHUNK_SAMPLES);
const chunk = new Float32Array(chunkData);
this.sendAudioToEncoder(chunk);
}
// Pad and send partial chunk
if (this.asrBuffer.length > 0) {
const padded = new Float32Array(ASR_CHUNK_SAMPLES);
padded.set(this.asrBuffer);
this.sendAudioToEncoder(padded);
}
this.asrBuffer = [];
// Flush any remaining audio in the batch buffer
this.sendAudioToEncoder(new Float32Array(0), true);
// Signal segment end
this.encoderWorker?.postMessage({
type: 'segment_end',
data: { segmentId: this.currentSegmentId }
});
}
}
// =============================================================================
// UI Controller
// =============================================================================
class ASRDemoUI {
constructor() {
this.asr = null;
this.vadCanvas = null;
this.vadCtx = null;
this.initElements();
this.initCanvas();
this.bindEvents();
}
initElements() {
this.loadingOverlay = document.getElementById('loadingOverlay');
this.loadingText = document.getElementById('loadingText');
this.loadingProgressFill = document.getElementById('loadingProgressFill');
this.loadingProgressText = document.getElementById('loadingProgressText');
this.loadingDetails = document.getElementById('loadingDetails');
this.errorMessage = document.getElementById('errorMessage');
this.statusDot = document.getElementById('statusDot');
this.statusText = document.getElementById('statusText');
this.startBtn = document.getElementById('startBtn');
this.stopBtn = document.getElementById('stopBtn');
this.vadBarFill = document.getElementById('vadBarFill');
this.vadValue = document.getElementById('vadValue');
this.audioQueueSize = document.getElementById('audioQueueSize');
this.featuresQueueSize = document.getElementById('featuresQueueSize');
this.droppedChunksEl = document.getElementById('droppedChunks');
this.transcriptsList = document.getElementById('transcriptsList');
this.liveCaption = document.getElementById('liveCaption');
this.liveCaptionText = document.getElementById('liveCaptionText');
this.liveCaptionMobile = document.getElementById('liveCaptionMobile');
this.liveCaptionTextMobile = document.getElementById('liveCaptionTextMobile');
this.backendBadge = document.getElementById('backendBadge');
this.modelSelect = document.getElementById('modelSelect');
this.backendSelect = document.getElementById('backendSelect');
this.onnxUrl = document.getElementById('onnxUrl');
this.onsetThreshold = document.getElementById('onsetThreshold');
this.offsetThreshold = document.getElementById('offsetThreshold');
}
initCanvas() {
this.vadCanvas = document.getElementById('vadCanvas');
this.vadCtx = this.vadCanvas.getContext('2d');
const rect = this.vadCanvas.getBoundingClientRect();
this.vadCanvas.width = rect.width * window.devicePixelRatio;
this.vadCanvas.height = rect.height * window.devicePixelRatio;
this.vadCtx.scale(window.devicePixelRatio, window.devicePixelRatio);
}
bindEvents() {
this.startBtn.addEventListener('click', () => this.handleStart());
this.stopBtn.addEventListener('click', () => this.handleStop());
// Mobile: collapsible sections
const configSection = document.querySelector('.config-section');
const vadSection = document.querySelector('.vad-section');
configSection?.querySelector('h3')?.addEventListener('click', () => {
configSection.classList.toggle('collapsed');
});
vadSection?.querySelector('h3')?.addEventListener('click', () => {
vadSection.classList.toggle('collapsed');
// Re-init canvas on expand in case it needs redrawing
if (!vadSection.classList.contains('collapsed')) {
this.initCanvas();
}
});
// Start with config collapsed on mobile
if (window.innerWidth <= 768) {
configSection?.classList.add('collapsed');
}
}
async handleStart() {
try {
this.showLoading('Initializing...');
const config = {
modelName: this.modelSelect.value,
onnxUrl: this.onnxUrl.value || './models',
backend: this.backendSelect.value,
onsetThreshold: parseFloat(this.onsetThreshold.value),
offsetThreshold: parseFloat(this.offsetThreshold.value)
};
this.asr = new PipelinedStreamingASR(config);
this.asr.onVadUpdate = (prob, history, segmentEvents, historyStartTime) => this.updateVadDisplay(prob, history, segmentEvents, historyStartTime);
this.asr.onTranscript = (text, segmentId) => this.addTranscript(text, segmentId);
this.asr.onLiveCaption = (text) => this.updateLiveCaption(text);
this.asr.onStatusUpdate = (status, text) => this.updateStatus(status, text);
this.asr.onBackendUpdate = (backend) => this.updateBackendBadge(backend);
await this.asr.loadModels(
(text) => {
this.loadingText.textContent = text;
},
(progress) => {
this.updateLoadingProgress(progress);
}
);
await this.asr.start();
this.hideLoading();
this.startBtn.disabled = true;
this.stopBtn.disabled = false;
this.disableConfig(true);
} catch (error) {
console.error('Start error:', error);
this.hideLoading();
this.showError(`Failed to start: ${error.message}`);
}
}
handleStop() {
if (this.asr) {
this.asr.stop();
this.asr = null;
}
this.startBtn.disabled = false;
this.stopBtn.disabled = true;
this.disableConfig(false);
this.updateStatus('idle', 'Ready');
this.backendBadge.classList.remove('visible');
}
updateVadDisplay(prob, history, segmentEvents = [], historyStartTime = 0) {
this.vadBarFill.style.width = `${prob * 100}%`;
this.vadValue.textContent = `${Math.round(prob * 100)}%`;
const ctx = this.vadCtx;
const rect = this.vadCanvas.getBoundingClientRect();
const width = rect.width;
const height = rect.height;
// Leave space for x-axis labels
const graphHeight = height - 20;
const graphTop = 0;
ctx.fillStyle = '#0f0f23';
ctx.fillRect(0, 0, width, height);
if (history.length < 2) return;
const historyDuration = history.length * 50; // ms (each entry is 50ms)
const now = Date.now();
// Draw x-axis ticks (every 0.1 seconds = 100ms)
ctx.strokeStyle = '#333';
ctx.fillStyle = '#666';
ctx.font = '10px monospace';
ctx.textAlign = 'center';
ctx.lineWidth = 1;
for (let t = 0; t <= historyDuration; t += 100) {
const x = (t / historyDuration) * width;
// Draw tick mark
ctx.beginPath();
ctx.moveTo(x, graphHeight);
ctx.lineTo(x, graphHeight + 5);
ctx.stroke();
// Draw vertical grid line (lighter for minor ticks)
if (t % 500 === 0) {
ctx.strokeStyle = '#444';
} else {
ctx.strokeStyle = '#222';
}
ctx.beginPath();
ctx.moveTo(x, graphTop);
ctx.lineTo(x, graphHeight);
ctx.stroke();
ctx.strokeStyle = '#333';
// Draw label every 0.5 seconds
if (t % 500 === 0) {
const seconds = (t / 1000).toFixed(1);
ctx.fillText(seconds + 's', x, height - 2);
}
}
// Draw segment events (start = green line, end = red line)
for (const event of segmentEvents) {
const eventAge = now - event.time; // ms ago
const eventPos = historyDuration - eventAge; // position in history
if (eventPos < 0 || eventPos > historyDuration) continue;
const x = (eventPos / historyDuration) * width;
ctx.lineWidth = 2;
if (event.type === 'start') {
ctx.strokeStyle = '#00ff88'; // Green for start
} else {
ctx.strokeStyle = '#ff4444'; // Red for end
}
ctx.beginPath();
ctx.moveTo(x, graphTop);
ctx.lineTo(x, graphHeight);
ctx.stroke();
}
// Draw threshold lines
const onsetY = graphHeight * (1 - parseFloat(this.onsetThreshold.value));
const offsetY = graphHeight * (1 - parseFloat(this.offsetThreshold.value));
ctx.strokeStyle = '#ff444466';
ctx.lineWidth = 1;
ctx.beginPath();
ctx.moveTo(0, onsetY);
ctx.lineTo(width, onsetY);
ctx.stroke();
ctx.strokeStyle = '#00ff8866';
ctx.beginPath();
ctx.moveTo(0, offsetY);
ctx.lineTo(width, offsetY);
ctx.stroke();
// Draw VAD probability line
ctx.strokeStyle = '#00d4ff';
ctx.lineWidth = 2;
ctx.beginPath();
for (let i = 0; i < history.length; i++) {
const x = (i / (history.length - 1)) * width;
const y = graphHeight * (1 - history[i]);
if (i === 0) {
ctx.moveTo(x, y);
} else {
ctx.lineTo(x, y);
}
}
ctx.stroke();
}
addTranscript(text, segmentId) {
if (!text || !text.trim()) return;
const item = document.createElement('div');
item.className = 'transcript-item';
item.innerHTML = `
#${segmentId}
${this.escapeHtml(text)}
`;
this.transcriptsList.appendChild(item);
this.transcriptsList.scrollTop = this.transcriptsList.scrollHeight;
}
updateLiveCaption(text) {
if (text) {
// Desktop
this.liveCaptionText.textContent = text;
this.liveCaptionText.classList.remove('placeholder');
this.liveCaption.classList.add('active');
// Mobile
this.liveCaptionTextMobile.textContent = text;
this.liveCaptionTextMobile.classList.remove('placeholder');
this.liveCaptionMobile.classList.add('active');
} else {
// Desktop
this.liveCaptionText.textContent = 'Waiting for speech...';
this.liveCaptionText.classList.add('placeholder');
this.liveCaption.classList.remove('active');
// Mobile
this.liveCaptionTextMobile.textContent = 'Waiting for speech...';
this.liveCaptionTextMobile.classList.add('placeholder');
this.liveCaptionMobile.classList.remove('active');
}
}
updateStatus(status, text) {
this.statusDot.className = 'status-dot ' + status;
this.statusText.textContent = text;
}
updateBackendBadge(backend) {
const labels = { 'wasm': 'WASM', 'webgl': 'WebGL', 'webgpu': 'WebGPU' };
this.backendBadge.textContent = labels[backend] || backend;
this.backendBadge.className = 'backend-badge visible ' + backend;
}
showLoading(text) {
this.loadingText.textContent = text;
this.loadingProgressFill.style.width = '0%';
this.loadingProgressText.textContent = '0 / 7 models';
this.loadingDetails.textContent = '';
this.loadingOverlay.classList.remove('hidden');
}
hideLoading() {
this.loadingOverlay.classList.add('hidden');
}
updateLoadingProgress(progress) {
const { completedModels, totalModels, currentModel, currentProgress } = progress;
// Update overall progress bar
const overallPercent = (completedModels / totalModels) * 100;
this.loadingProgressFill.style.width = `${overallPercent}%`;
this.loadingProgressText.textContent = `${completedModels} / ${totalModels} models`;
// Update details with current model and byte progress
if (currentModel && currentProgress.total > 0) {
if (currentProgress.cached) {
const sizeMB = (currentProgress.total / (1024 * 1024)).toFixed(1);
this.loadingDetails.textContent = `${currentModel}: ${sizeMB} MB (cached)`;
} else {
const loadedMB = (currentProgress.loaded / (1024 * 1024)).toFixed(1);
const totalMB = (currentProgress.total / (1024 * 1024)).toFixed(1);
const percent = Math.round((currentProgress.loaded / currentProgress.total) * 100);
this.loadingDetails.textContent = `${currentModel}: ${loadedMB} / ${totalMB} MB (${percent}%)`;
}
} else if (currentModel) {
this.loadingDetails.textContent = `Loading ${currentModel}...`;
}
}
showError(message) {
this.errorMessage.textContent = message;
this.errorMessage.classList.add('visible');
}
disableConfig(disabled) {
this.modelSelect.disabled = disabled;
this.backendSelect.disabled = disabled;
this.onnxUrl.disabled = disabled;
this.onsetThreshold.disabled = disabled;
this.offsetThreshold.disabled = disabled;
}
escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
}
// Initialize on page load
document.addEventListener('DOMContentLoaded', () => {
window.asrDemo = new ASRDemoUI();
});