/** * Streaming ASR Demo - JavaScript Implementation with Web Workers * * Architecture: * - Main thread: Audio capture, VAD, UI updates * - Encoder Worker: Preprocessor + Encoder ONNX inference * - Decoder Worker: Adapter + Decoder ONNX inference */ // ============================================================================= // Constants // ============================================================================= const SAMPLE_RATE = 16000; const VAD_CHUNK_SAMPLES = 160; // 10ms - optimal for TenVAD const ASR_CHUNK_SAMPLES = 320; // 20ms - Moonshine frame size const ENCODER_BATCH_SAMPLES = 5120; // 320ms - batch size for encoder const PRE_BUFFER_CHUNKS = 25; // ~500ms at 20ms chunks - capture more audio before onset const POST_BUFFER_CHUNKS = 5; // ~100ms at 20ms chunks const MIN_SEGMENT_DURATION_MS = 2000; // Minimum 2 seconds before allowing segment end const OFFSET_RAMP_START_MS = 6000; // Start ramping offset threshold at 6 seconds const OFFSET_RAMP_END_MS = 8000; // Reach max offset threshold (1.0) at 8 seconds const OFFSET_CHUNKS_REQUIRED = 10; // ~100ms of silence needed to end segment const MODEL_CONFIGS = { sleeker: { dim: 336, dec_dim: 288, depth: 6, encoder_depth: 6, n_past: 16, n_future: 4, nheads: 8, head_dim: 36, vocab_size: 32768 }, spindlier: { dim: 620, dec_dim: 512, depth: 10, encoder_depth: 10, n_past: 16, n_future: 4, nheads: 8, head_dim: 64, vocab_size: 32768 } }; // ============================================================================= // TenVAD - WebAssembly-based Voice Activity Detection // ============================================================================= class TenVAD { constructor(hopSize = 160, threshold = 0.5) { this.hopSize = hopSize; this.threshold = threshold; this.module = null; this.vadHandle = null; this.audioPtr = null; this.probPtr = null; this.flagPtr = null; this.ready = false; } async init(wasmUrl = './ten_vad.js') { const wasmBinaryUrl = wasmUrl.replace('.js', '.wasm'); // Dynamic import of the ES module const vadModule = await import(wasmUrl); const createTenVadModule = vadModule.default; this.module = await createTenVadModule({ locateFile: (path) => { if (path.endsWith('.wasm')) { return wasmBinaryUrl; } return path; } }); // Create VAD instance const vadHandlePtr = this.module._malloc(4); const result = this.module._ten_vad_create(vadHandlePtr, this.hopSize, this.threshold); if (result !== 0) { this.module._free(vadHandlePtr); throw new Error(`Failed to create TenVAD instance: ${result}`); } this.vadHandle = this.module.HEAP32[vadHandlePtr / 4]; this.module._free(vadHandlePtr); // Allocate buffers this.audioPtr = this.module._malloc(this.hopSize * 2); this.probPtr = this.module._malloc(4); this.flagPtr = this.module._malloc(4); this.ready = true; } process(audioChunkFloat32) { if (!this.ready) return -1; const int16Data = new Int16Array(this.hopSize); for (let i = 0; i < this.hopSize && i < audioChunkFloat32.length; i++) { int16Data[i] = Math.max(-32768, Math.min(32767, Math.round(audioChunkFloat32[i] * 32767))); } this.module.HEAP16.set(int16Data, this.audioPtr / 2); this.module._ten_vad_process(this.vadHandle, this.audioPtr, this.hopSize, this.probPtr, this.flagPtr); return this.module.HEAPF32[this.probPtr / 4]; } destroy() { if (!this.ready || !this.module) return; this.ready = false; // Prevent further use try { if (this.audioPtr) { this.module._free(this.audioPtr); this.audioPtr = null; } if (this.probPtr) { this.module._free(this.probPtr); this.probPtr = null; } if (this.flagPtr) { this.module._free(this.flagPtr); this.flagPtr = null; } // Skip _ten_vad_destroy as it causes memory access errors // The WASM memory will be cleaned up when the module is garbage collected this.vadHandle = null; } catch (e) { console.warn('TenVAD cleanup error:', e); } this.module = null; } } // Fallback simple energy-based VAD class SimpleVAD { constructor(sampleRate = 16000, frameSize = 160) { this.frameSize = frameSize; this.energyHistory = []; this.historySize = 50; this.noiseFloor = 0.001; this.ready = true; } async init() {} process(audioChunk) { let energy = 0; for (let i = 0; i < audioChunk.length; i++) { energy += audioChunk[i] * audioChunk[i]; } energy = Math.sqrt(energy / audioChunk.length); this.energyHistory.push(energy); if (this.energyHistory.length > this.historySize) { this.energyHistory.shift(); } if (this.energyHistory.length > 10) { const sorted = [...this.energyHistory].sort((a, b) => a - b); this.noiseFloor = sorted[Math.floor(sorted.length * 0.1)] || 0.001; } const snr = energy / (this.noiseFloor + 1e-10); return 1 / (1 + Math.exp(-2 * (snr - 3))); } destroy() {} } // ============================================================================= // Pipelined Streaming ASR with Web Workers // ============================================================================= class PipelinedStreamingASR { constructor(config) { this.modelName = config.modelName || 'sleeker'; this.onnxUrl = config.onnxUrl || './models'; this.backendChoice = config.backend || 'wasm'; this.onsetThreshold = config.onsetThreshold || 0.5; this.offsetThreshold = config.offsetThreshold || 0.3; this.emaAlpha = config.emaAlpha || 0.3; this.cfg = MODEL_CONFIGS[this.modelName]; // Workers this.encoderWorker = null; this.decoderWorker = null; this.encoderReady = false; this.decoderReady = false; // VAD (runs on main thread for low latency) this.vad = null; // Audio capture this.audioContext = null; this.sourceNode = null; this.workletNode = null; // State this.running = false; this.state = 'idle'; this.currentSegmentId = 0; this.emaProb = 0; this.onsetCounter = 0; this.offsetCounter = 0; this.segmentStartTime = 0; // Buffers this.vadBuffer = []; this.asrBuffer = []; this.preBuffer = []; this.postBufferRemaining = 0; this.encoderBatchBuffer = []; // Accumulate 320ms before sending to encoder // Display state this.vadHistory = []; this.vadUpdateCounter = 0; this.vadUpdateInterval = 5; // Update display every 5 VAD chunks (50ms) this.segmentEvents = []; // Track segment start/end for visualization this.vadHistoryStartTime = 0; // When the current history window started // Callbacks this.onVadUpdate = null; this.onTranscript = null; this.onLiveCaption = null; this.onStatusUpdate = null; this.onQueueUpdate = null; this.onBackendUpdate = null; // Backend info this.backend = 'unknown'; } async loadModels(progressCallback, detailedProgressCallback) { // Track overall progress const totalModels = 7; // VAD, Preprocessor, Encoder, Tokenizer, Adapter, Decoder Init, Decoder Step let completedModels = 0; let currentModel = ''; let currentProgress = { loaded: 0, total: 0 }; const updateProgress = () => { const overallPercent = (completedModels / totalModels) * 100; detailedProgressCallback?.({ completedModels, totalModels, overallPercent, currentModel, currentProgress }); }; // Initialize VAD try { currentModel = 'VAD'; progressCallback?.('Loading TenVAD...'); updateProgress(); this.vad = new TenVAD(VAD_CHUNK_SAMPLES, 0.5); await this.vad.init('./ten_vad.js'); console.log('Using TenVAD'); completedModels++; updateProgress(); } catch (e) { console.warn('TenVAD failed, using SimpleVAD:', e.message); this.vad = new SimpleVAD(SAMPLE_RATE, VAD_CHUNK_SAMPLES); await this.vad.init(); completedModels++; updateProgress(); } // Initialize Encoder Worker with progress tracking progressCallback?.('Loading encoder models...'); await this.initEncoderWorker((model, progress) => { currentModel = model; currentProgress = progress; updateProgress(); }, () => { completedModels++; updateProgress(); }); // Initialize Decoder Worker with progress tracking progressCallback?.('Loading decoder models...'); await this.initDecoderWorker((model, progress) => { currentModel = model; currentProgress = progress; updateProgress(); }, () => { completedModels++; updateProgress(); }); progressCallback?.('Ready!'); } initEncoderWorker(onProgress, onModelDone) { return new Promise((resolve, reject) => { this.encoderWorker = new Worker('./encoder_worker.js'); this.encoderWorker.onmessage = (e) => { const { type } = e.data; switch (type) { case 'ready': this.encoderReady = true; resolve(); break; case 'error': reject(new Error(e.data.message)); break; case 'status': // Progress update from worker break; case 'progress': onProgress?.(e.data.model, { loaded: e.data.loaded, total: e.data.total, cached: e.data.cached }); break; case 'model_done': onModelDone?.(e.data.model); break; case 'segment_start': this.decoderWorker?.postMessage({ type: 'segment_start', data: { segmentId: e.data.segmentId } }); break; case 'segment_end': this.decoderWorker?.postMessage({ type: 'segment_end', data: { segmentId: e.data.segmentId } }); break; case 'features': // Forward features to decoder worker this.decoderWorker?.postMessage({ type: 'features', data: { segmentId: e.data.segmentId, features: e.data.features, dims: e.data.dims } }, [e.data.features.buffer]); break; } }; this.encoderWorker.postMessage({ type: 'init', data: { cfg: this.cfg, onnxUrl: this.onnxUrl, modelName: this.modelName, backend: this.backendChoice } }); }); } initDecoderWorker(onProgress, onModelDone) { return new Promise((resolve, reject) => { this.decoderWorker = new Worker('./decoder_worker.js'); this.decoderWorker.onmessage = (e) => { const { type } = e.data; switch (type) { case 'ready': this.decoderReady = true; this.backend = e.data.backend || 'wasm'; this.onBackendUpdate?.(this.backend); resolve(); break; case 'error': reject(new Error(e.data.message)); break; case 'status': break; case 'progress': onProgress?.(e.data.model, { loaded: e.data.loaded, total: e.data.total, cached: e.data.cached }); break; case 'model_done': onModelDone?.(e.data.model); break; case 'transcript': this.onTranscript?.(e.data.text, e.data.segmentId); break; case 'live_caption': this.onLiveCaption?.(e.data.text); break; } }; this.decoderWorker.postMessage({ type: 'init', data: { cfg: this.cfg, onnxUrl: this.onnxUrl, modelName: this.modelName, backend: this.backendChoice } }); }); } async start() { if (this.running) return; const stream = await navigator.mediaDevices.getUserMedia({ audio: { sampleRate: SAMPLE_RATE, channelCount: 1, echoCancellation: false, noiseSuppression: false, autoGainControl: false } }); this.audioContext = new AudioContext({ sampleRate: SAMPLE_RATE }); // Check actual sample rate console.log(`Requested sample rate: ${SAMPLE_RATE}, Actual: ${this.audioContext.sampleRate}`); this.sourceNode = this.audioContext.createMediaStreamSource(stream); // Use AudioWorklet for better performance try { await this.audioContext.audioWorklet.addModule('./audio_processor.js'); this.workletNode = new AudioWorkletNode(this.audioContext, 'audio-processor'); this.workletNode.port.onmessage = (e) => { if (this.running) { this.processAudioChunk(e.data.audio); } }; this.sourceNode.connect(this.workletNode); this.workletNode.connect(this.audioContext.destination); } catch (e) { // Fallback to ScriptProcessor console.warn('AudioWorklet not available, using ScriptProcessor'); const bufferSize = 2048; this.scriptNode = this.audioContext.createScriptProcessor(bufferSize, 1, 1); this.scriptNode.onaudioprocess = (e) => { if (this.running) { const inputData = e.inputBuffer.getChannelData(0); this.processAudioChunk(new Float32Array(inputData)); } }; this.sourceNode.connect(this.scriptNode); this.scriptNode.connect(this.audioContext.destination); } this.running = true; this.state = 'idle'; this.onsetCounter = 0; this.offsetCounter = 0; this.emaProb = 0; this.onStatusUpdate?.('listening', 'Listening...'); } stop() { this.running = false; if (this.workletNode) { this.workletNode.disconnect(); this.workletNode = null; } if (this.scriptNode) { this.scriptNode.disconnect(); this.scriptNode = null; } if (this.sourceNode) { this.sourceNode.disconnect(); this.sourceNode = null; } if (this.audioContext) { this.audioContext.close(); this.audioContext = null; } if (this.vad) { this.vad.destroy(); } if (this.encoderWorker) { this.encoderWorker.terminate(); this.encoderWorker = null; } if (this.decoderWorker) { this.decoderWorker.terminate(); this.decoderWorker = null; } this.onStatusUpdate?.('idle', 'Stopped'); } processAudioChunk(audioData) { // Accumulate for VAD (10ms chunks) this.vadBuffer.push(...audioData); // Accumulate for ASR (20ms chunks) this.asrBuffer.push(...audioData); // Process VAD chunks while (this.vadBuffer.length >= VAD_CHUNK_SAMPLES) { const vadChunk = new Float32Array(this.vadBuffer.splice(0, VAD_CHUNK_SAMPLES)); const prob = this.vad.process(vadChunk); if (prob >= 0) { this.emaProb = this.emaAlpha * prob + (1 - this.emaAlpha) * this.emaProb; // Throttle display updates (every 50ms instead of 10ms) this.vadUpdateCounter++; if (this.vadUpdateCounter >= this.vadUpdateInterval) { this.vadUpdateCounter = 0; const now = Date.now(); // Initialize history start time if (this.vadHistory.length === 0) { this.vadHistoryStartTime = now; } this.vadHistory.push(this.emaProb); if (this.vadHistory.length > 100) { this.vadHistory.shift(); // Shift the start time by 50ms (one history entry) this.vadHistoryStartTime += 50; } // Remove old segment events that are outside the history window const historyDuration = this.vadHistory.length * 50; // ms const historyStart = now - historyDuration; this.segmentEvents = this.segmentEvents.filter(e => e.time >= historyStart); this.onVadUpdate?.(this.emaProb, this.vadHistory, this.segmentEvents, this.vadHistoryStartTime); } this.updateSegmentState(); } } // Extract complete ASR chunks while (this.asrBuffer.length >= ASR_CHUNK_SAMPLES) { const chunkData = this.asrBuffer.splice(0, ASR_CHUNK_SAMPLES); const chunk = new Float32Array(chunkData); if (this.state === 'speech') { this.sendAudioToEncoder(chunk); } else { this.preBuffer.push(chunk); if (this.preBuffer.length > PRE_BUFFER_CHUNKS) { this.preBuffer.shift(); } if (this.postBufferRemaining > 0) { this.sendAudioToEncoder(chunk); this.postBufferRemaining--; if (this.postBufferRemaining === 0) { this.finalizeSegmentEnd(); } } } } } sendAudioToEncoder(chunk, flush = false) { if (!this.encoderWorker || !this.encoderReady) return; // Accumulate chunks into batch buffer this.encoderBatchBuffer.push(...chunk); // Send when we have 320ms worth of audio, or on flush if (this.encoderBatchBuffer.length >= ENCODER_BATCH_SAMPLES || flush) { if (this.encoderBatchBuffer.length > 0) { const batch = new Float32Array(this.encoderBatchBuffer); this.encoderBatchBuffer = []; this.encoderWorker.postMessage({ type: 'audio', data: { audio: batch, segmentId: this.currentSegmentId } }, [batch.buffer]); } } } updateSegmentState() { if (this.state === 'idle') { if (this.emaProb >= this.onsetThreshold) { this.onsetCounter++; if (this.onsetCounter >= 2) { this.startSegment(); } } else { this.onsetCounter = 0; } } else if (this.state === 'speech') { const segmentDuration = Date.now() - this.segmentStartTime; // Calculate effective offset threshold with gradual ramp // Before OFFSET_RAMP_START_MS: use normal offsetThreshold // Between OFFSET_RAMP_START_MS and OFFSET_RAMP_END_MS: linearly ramp to 1.0 // After OFFSET_RAMP_END_MS: use 1.0 (any VAD level will trigger offset) let effectiveOffsetThreshold = this.offsetThreshold; if (segmentDuration >= OFFSET_RAMP_START_MS) { const rampProgress = Math.min(1.0, (segmentDuration - OFFSET_RAMP_START_MS) / (OFFSET_RAMP_END_MS - OFFSET_RAMP_START_MS) ); // Lerp from offsetThreshold to 1.0 effectiveOffsetThreshold = this.offsetThreshold + rampProgress * (1.0 - this.offsetThreshold); } // Check if minimum segment duration has passed const minDurationMet = segmentDuration >= MIN_SEGMENT_DURATION_MS; if (this.emaProb < effectiveOffsetThreshold) { this.offsetCounter++; // Only end segment if minimum duration met AND enough silence chunks if (minDurationMet && this.offsetCounter >= OFFSET_CHUNKS_REQUIRED) { this.endSegment(); } } else { this.offsetCounter = 0; } } } startSegment() { // If previous segment wasn't fully finalized, finalize it now if (this.postBufferRemaining > 0) { this.finalizeSegmentEnd(); this.postBufferRemaining = 0; } this.currentSegmentId++; this.state = 'speech'; this.onsetCounter = 0; this.offsetCounter = 0; this.segmentStartTime = Date.now(); this.encoderBatchBuffer = []; // Reset batch buffer for new segment // Record segment start for visualization this.segmentEvents.push({ type: 'start', time: this.segmentStartTime }); // Tell encoder to start new segment this.encoderWorker?.postMessage({ type: 'segment_start', data: { segmentId: this.currentSegmentId } }); // Drain pre-buffer while (this.preBuffer.length > 0) { const chunk = this.preBuffer.shift(); this.sendAudioToEncoder(chunk); } this.onStatusUpdate?.('recording', 'Recording...'); } endSegment() { this.state = 'idle'; this.offsetCounter = 0; this.postBufferRemaining = POST_BUFFER_CHUNKS; // Record segment end for visualization this.segmentEvents.push({ type: 'end', time: Date.now() }); if (this.postBufferRemaining === 0) { this.finalizeSegmentEnd(); } this.onStatusUpdate?.('listening', 'Listening...'); } finalizeSegmentEnd() { // Process remaining complete chunks while (this.asrBuffer.length >= ASR_CHUNK_SAMPLES) { const chunkData = this.asrBuffer.splice(0, ASR_CHUNK_SAMPLES); const chunk = new Float32Array(chunkData); this.sendAudioToEncoder(chunk); } // Pad and send partial chunk if (this.asrBuffer.length > 0) { const padded = new Float32Array(ASR_CHUNK_SAMPLES); padded.set(this.asrBuffer); this.sendAudioToEncoder(padded); } this.asrBuffer = []; // Flush any remaining audio in the batch buffer this.sendAudioToEncoder(new Float32Array(0), true); // Signal segment end this.encoderWorker?.postMessage({ type: 'segment_end', data: { segmentId: this.currentSegmentId } }); } } // ============================================================================= // UI Controller // ============================================================================= class ASRDemoUI { constructor() { this.asr = null; this.vadCanvas = null; this.vadCtx = null; this.initElements(); this.initCanvas(); this.bindEvents(); } initElements() { this.loadingOverlay = document.getElementById('loadingOverlay'); this.loadingText = document.getElementById('loadingText'); this.loadingProgressFill = document.getElementById('loadingProgressFill'); this.loadingProgressText = document.getElementById('loadingProgressText'); this.loadingDetails = document.getElementById('loadingDetails'); this.errorMessage = document.getElementById('errorMessage'); this.statusDot = document.getElementById('statusDot'); this.statusText = document.getElementById('statusText'); this.startBtn = document.getElementById('startBtn'); this.stopBtn = document.getElementById('stopBtn'); this.vadBarFill = document.getElementById('vadBarFill'); this.vadValue = document.getElementById('vadValue'); this.audioQueueSize = document.getElementById('audioQueueSize'); this.featuresQueueSize = document.getElementById('featuresQueueSize'); this.droppedChunksEl = document.getElementById('droppedChunks'); this.transcriptsList = document.getElementById('transcriptsList'); this.liveCaption = document.getElementById('liveCaption'); this.liveCaptionText = document.getElementById('liveCaptionText'); this.liveCaptionMobile = document.getElementById('liveCaptionMobile'); this.liveCaptionTextMobile = document.getElementById('liveCaptionTextMobile'); this.backendBadge = document.getElementById('backendBadge'); this.modelSelect = document.getElementById('modelSelect'); this.backendSelect = document.getElementById('backendSelect'); this.onnxUrl = document.getElementById('onnxUrl'); this.onsetThreshold = document.getElementById('onsetThreshold'); this.offsetThreshold = document.getElementById('offsetThreshold'); } initCanvas() { this.vadCanvas = document.getElementById('vadCanvas'); this.vadCtx = this.vadCanvas.getContext('2d'); const rect = this.vadCanvas.getBoundingClientRect(); this.vadCanvas.width = rect.width * window.devicePixelRatio; this.vadCanvas.height = rect.height * window.devicePixelRatio; this.vadCtx.scale(window.devicePixelRatio, window.devicePixelRatio); } bindEvents() { this.startBtn.addEventListener('click', () => this.handleStart()); this.stopBtn.addEventListener('click', () => this.handleStop()); // Mobile: collapsible sections const configSection = document.querySelector('.config-section'); const vadSection = document.querySelector('.vad-section'); configSection?.querySelector('h3')?.addEventListener('click', () => { configSection.classList.toggle('collapsed'); }); vadSection?.querySelector('h3')?.addEventListener('click', () => { vadSection.classList.toggle('collapsed'); // Re-init canvas on expand in case it needs redrawing if (!vadSection.classList.contains('collapsed')) { this.initCanvas(); } }); // Start with config collapsed on mobile if (window.innerWidth <= 768) { configSection?.classList.add('collapsed'); } } async handleStart() { try { this.showLoading('Initializing...'); const config = { modelName: this.modelSelect.value, onnxUrl: this.onnxUrl.value || './models', backend: this.backendSelect.value, onsetThreshold: parseFloat(this.onsetThreshold.value), offsetThreshold: parseFloat(this.offsetThreshold.value) }; this.asr = new PipelinedStreamingASR(config); this.asr.onVadUpdate = (prob, history, segmentEvents, historyStartTime) => this.updateVadDisplay(prob, history, segmentEvents, historyStartTime); this.asr.onTranscript = (text, segmentId) => this.addTranscript(text, segmentId); this.asr.onLiveCaption = (text) => this.updateLiveCaption(text); this.asr.onStatusUpdate = (status, text) => this.updateStatus(status, text); this.asr.onBackendUpdate = (backend) => this.updateBackendBadge(backend); await this.asr.loadModels( (text) => { this.loadingText.textContent = text; }, (progress) => { this.updateLoadingProgress(progress); } ); await this.asr.start(); this.hideLoading(); this.startBtn.disabled = true; this.stopBtn.disabled = false; this.disableConfig(true); } catch (error) { console.error('Start error:', error); this.hideLoading(); this.showError(`Failed to start: ${error.message}`); } } handleStop() { if (this.asr) { this.asr.stop(); this.asr = null; } this.startBtn.disabled = false; this.stopBtn.disabled = true; this.disableConfig(false); this.updateStatus('idle', 'Ready'); this.backendBadge.classList.remove('visible'); } updateVadDisplay(prob, history, segmentEvents = [], historyStartTime = 0) { this.vadBarFill.style.width = `${prob * 100}%`; this.vadValue.textContent = `${Math.round(prob * 100)}%`; const ctx = this.vadCtx; const rect = this.vadCanvas.getBoundingClientRect(); const width = rect.width; const height = rect.height; // Leave space for x-axis labels const graphHeight = height - 20; const graphTop = 0; ctx.fillStyle = '#0f0f23'; ctx.fillRect(0, 0, width, height); if (history.length < 2) return; const historyDuration = history.length * 50; // ms (each entry is 50ms) const now = Date.now(); // Draw x-axis ticks (every 0.1 seconds = 100ms) ctx.strokeStyle = '#333'; ctx.fillStyle = '#666'; ctx.font = '10px monospace'; ctx.textAlign = 'center'; ctx.lineWidth = 1; for (let t = 0; t <= historyDuration; t += 100) { const x = (t / historyDuration) * width; // Draw tick mark ctx.beginPath(); ctx.moveTo(x, graphHeight); ctx.lineTo(x, graphHeight + 5); ctx.stroke(); // Draw vertical grid line (lighter for minor ticks) if (t % 500 === 0) { ctx.strokeStyle = '#444'; } else { ctx.strokeStyle = '#222'; } ctx.beginPath(); ctx.moveTo(x, graphTop); ctx.lineTo(x, graphHeight); ctx.stroke(); ctx.strokeStyle = '#333'; // Draw label every 0.5 seconds if (t % 500 === 0) { const seconds = (t / 1000).toFixed(1); ctx.fillText(seconds + 's', x, height - 2); } } // Draw segment events (start = green line, end = red line) for (const event of segmentEvents) { const eventAge = now - event.time; // ms ago const eventPos = historyDuration - eventAge; // position in history if (eventPos < 0 || eventPos > historyDuration) continue; const x = (eventPos / historyDuration) * width; ctx.lineWidth = 2; if (event.type === 'start') { ctx.strokeStyle = '#00ff88'; // Green for start } else { ctx.strokeStyle = '#ff4444'; // Red for end } ctx.beginPath(); ctx.moveTo(x, graphTop); ctx.lineTo(x, graphHeight); ctx.stroke(); } // Draw threshold lines const onsetY = graphHeight * (1 - parseFloat(this.onsetThreshold.value)); const offsetY = graphHeight * (1 - parseFloat(this.offsetThreshold.value)); ctx.strokeStyle = '#ff444466'; ctx.lineWidth = 1; ctx.beginPath(); ctx.moveTo(0, onsetY); ctx.lineTo(width, onsetY); ctx.stroke(); ctx.strokeStyle = '#00ff8866'; ctx.beginPath(); ctx.moveTo(0, offsetY); ctx.lineTo(width, offsetY); ctx.stroke(); // Draw VAD probability line ctx.strokeStyle = '#00d4ff'; ctx.lineWidth = 2; ctx.beginPath(); for (let i = 0; i < history.length; i++) { const x = (i / (history.length - 1)) * width; const y = graphHeight * (1 - history[i]); if (i === 0) { ctx.moveTo(x, y); } else { ctx.lineTo(x, y); } } ctx.stroke(); } addTranscript(text, segmentId) { if (!text || !text.trim()) return; const item = document.createElement('div'); item.className = 'transcript-item'; item.innerHTML = ` #${segmentId} ${this.escapeHtml(text)} `; this.transcriptsList.appendChild(item); this.transcriptsList.scrollTop = this.transcriptsList.scrollHeight; } updateLiveCaption(text) { if (text) { // Desktop this.liveCaptionText.textContent = text; this.liveCaptionText.classList.remove('placeholder'); this.liveCaption.classList.add('active'); // Mobile this.liveCaptionTextMobile.textContent = text; this.liveCaptionTextMobile.classList.remove('placeholder'); this.liveCaptionMobile.classList.add('active'); } else { // Desktop this.liveCaptionText.textContent = 'Waiting for speech...'; this.liveCaptionText.classList.add('placeholder'); this.liveCaption.classList.remove('active'); // Mobile this.liveCaptionTextMobile.textContent = 'Waiting for speech...'; this.liveCaptionTextMobile.classList.add('placeholder'); this.liveCaptionMobile.classList.remove('active'); } } updateStatus(status, text) { this.statusDot.className = 'status-dot ' + status; this.statusText.textContent = text; } updateBackendBadge(backend) { const labels = { 'wasm': 'WASM', 'webgl': 'WebGL', 'webgpu': 'WebGPU' }; this.backendBadge.textContent = labels[backend] || backend; this.backendBadge.className = 'backend-badge visible ' + backend; } showLoading(text) { this.loadingText.textContent = text; this.loadingProgressFill.style.width = '0%'; this.loadingProgressText.textContent = '0 / 7 models'; this.loadingDetails.textContent = ''; this.loadingOverlay.classList.remove('hidden'); } hideLoading() { this.loadingOverlay.classList.add('hidden'); } updateLoadingProgress(progress) { const { completedModels, totalModels, currentModel, currentProgress } = progress; // Update overall progress bar const overallPercent = (completedModels / totalModels) * 100; this.loadingProgressFill.style.width = `${overallPercent}%`; this.loadingProgressText.textContent = `${completedModels} / ${totalModels} models`; // Update details with current model and byte progress if (currentModel && currentProgress.total > 0) { if (currentProgress.cached) { const sizeMB = (currentProgress.total / (1024 * 1024)).toFixed(1); this.loadingDetails.textContent = `${currentModel}: ${sizeMB} MB (cached)`; } else { const loadedMB = (currentProgress.loaded / (1024 * 1024)).toFixed(1); const totalMB = (currentProgress.total / (1024 * 1024)).toFixed(1); const percent = Math.round((currentProgress.loaded / currentProgress.total) * 100); this.loadingDetails.textContent = `${currentModel}: ${loadedMB} / ${totalMB} MB (${percent}%)`; } } else if (currentModel) { this.loadingDetails.textContent = `Loading ${currentModel}...`; } } showError(message) { this.errorMessage.textContent = message; this.errorMessage.classList.add('visible'); } disableConfig(disabled) { this.modelSelect.disabled = disabled; this.backendSelect.disabled = disabled; this.onnxUrl.disabled = disabled; this.onsetThreshold.disabled = disabled; this.offsetThreshold.disabled = disabled; } escapeHtml(text) { const div = document.createElement('div'); div.textContent = text; return div.innerHTML; } } // Initialize on page load document.addEventListener('DOMContentLoaded', () => { window.asrDemo = new ASRDemoUI(); });