Spaces:
Running
Running
| // Configuration | |
| const MODEL_ID = 'onnx-community/Supertonic-TTS-ONNX'; | |
| const VOICE_BASE_URL = 'https://huggingface.co/onnx-community/Supertonic-TTS-ONNX/resolve/main/voices/'; | |
| // DOM Elements | |
| const generateBtn = document.getElementById('generate-btn'); | |
| const inputText = document.getElementById('input-text'); | |
| const voiceSelect = document.getElementById('voice-select'); | |
| const gpuToggle = document.getElementById('gpu-toggle'); | |
| const deviceLabel = document.getElementById('device-label'); | |
| const statusContainer = document.getElementById('status-container'); | |
| const statusText = document.getElementById('status-text'); | |
| const progressBar = document.getElementById('progress-bar'); | |
| const outputCard = document.getElementById('output-card'); | |
| const audioPlayer = document.getElementById('audio-player'); | |
| const downloadLink = document.getElementById('download-link'); | |
| const errorMsg = document.getElementById('error-msg'); | |
| // State | |
| let ttsPipeline = null; | |
| let currentDevice = 'wasm'; | |
| // Helper: Check WebGPU support | |
| async function checkWebGPU() { | |
| if (!navigator.gpu) { | |
| gpuToggle.disabled = true; | |
| deviceLabel.innerText = "WebGPU not supported (CPU only)"; | |
| return false; | |
| } | |
| return true; | |
| } | |
| checkWebGPU(); | |
| // UI Event Listeners | |
| gpuToggle.addEventListener('change', (e) => { | |
| const useGPU = e.target.checked; | |
| currentDevice = useGPU ? 'webgpu' : 'wasm'; | |
| deviceLabel.innerText = useGPU ? 'Run on WebGPU' : 'Run on CPU'; | |
| // Reset pipeline to force reload with new device setting next time | |
| ttsPipeline = null; | |
| }); | |
| inputText.addEventListener('input', () => { | |
| document.querySelector('.char-count').innerText = `${inputText.value.length} / 500`; | |
| }); | |
| generateBtn.addEventListener('click', async () => { | |
| const text = inputText.value.trim(); | |
| if (!text) return; | |
| resetUI(); | |
| statusContainer.classList.remove('hidden'); | |
| generateBtn.disabled = true; | |
| try { | |
| // 1. Initialize Pipeline if needed | |
| if (!ttsPipeline) { | |
| updateStatus('Loading model... (this may take a moment)', 0); | |
| // Import pipeline from window (set in HTML) | |
| const { pipeline } = window; | |
| ttsPipeline = await pipeline('text-to-speech', MODEL_ID, { | |
| device: currentDevice, | |
| dtype: 'fp32', // Required for this specific model as per prompt | |
| progress_callback: (data) => { | |
| if (data.status === 'progress') { | |
| updateStatus(`Downloading ${data.file}...`, data.progress); | |
| } else if (data.status === 'ready') { | |
| updateStatus('Model ready!', 100); | |
| } | |
| } | |
| }); | |
| } | |
| // 2. Generate Audio | |
| updateStatus('Generating audio...', 100); | |
| progressBar.classList.add('pulsing'); // Add animation for inference time | |
| const voiceFile = voiceSelect.value; | |
| const speaker_embeddings = `${VOICE_BASE_URL}${voiceFile}`; | |
| // Run inference | |
| const output = await ttsPipeline(text, { | |
| speaker_embeddings: speaker_embeddings | |
| }); | |
| // 3. Process Output | |
| // output.audio is a Float32Array, output.sampling_rate is a number | |
| const wavUrl = createWavUrl(output.audio, output.sampling_rate); | |
| audioPlayer.src = wavUrl; | |
| downloadLink.href = wavUrl; | |
| outputCard.classList.remove('hidden'); | |
| // Auto-play result | |
| try { | |
| await audioPlayer.play(); | |
| } catch (e) { | |
| console.log("Auto-play blocked by browser policy"); | |
| } | |
| } catch (err) { | |
| console.error(err); | |
| showError(err.message); | |
| } finally { | |
| generateBtn.disabled = false; | |
| progressBar.classList.remove('pulsing'); | |
| statusContainer.classList.add('hidden'); | |
| } | |
| }); | |
| // Helper: Update Progress UI | |
| function updateStatus(text, progressPercent) { | |
| statusText.innerText = text; | |
| progressBar.style.width = `${progressPercent}%`; | |
| } | |
| function resetUI() { | |
| outputCard.classList.add('hidden'); | |
| errorMsg.classList.add('hidden'); | |
| progressBar.style.width = '0%'; | |
| } | |
| function showError(msg) { | |
| errorMsg.innerText = `Error: ${msg}`; | |
| errorMsg.classList.remove('hidden'); | |
| } | |
| // Audio Utility: Convert Float32Array to WAV Blob URL | |
| function createWavUrl(audioData, sampleRate) { | |
| const buffer = encodeWAV(audioData, sampleRate); | |
| const blob = new Blob([buffer], { type: 'audio/wav' }); | |
| return URL.createObjectURL(blob); | |
| } | |
| function encodeWAV(samples, sampleRate) { | |
| const buffer = new ArrayBuffer(44 + samples.length * 2); | |
| const view = new DataView(buffer); | |
| // RIFF chunk descriptor | |
| writeString(view, 0, 'RIFF'); | |
| view.setUint32(4, 36 + samples.length * 2, true); | |
| writeString(view, 8, 'WAVE'); | |
| // fmt sub-chunk | |
| writeString(view, 12, 'fmt '); | |
| view.setUint32(16, 16, true); | |
| view.setUint16(20, 1, true); // PCM format | |
| view.setUint16(22, 1, true); // Mono | |
| view.setUint32(24, sampleRate, true); | |
| view.setUint32(28, sampleRate * 2, true); | |
| view.setUint16(32, 2, true); | |
| view.setUint16(34, 16, true); // 16-bit | |
| // data sub-chunk | |
| writeString(view, 36, 'data'); | |
| view.setUint32(40, samples.length * 2, true); | |
| // Write PCM samples | |
| floatTo16BitPCM(view, 44, samples); | |
| return buffer; | |
| } | |
| function writeString(view, offset, string) { | |
| for (let i = 0; i < string.length; i++) { | |
| view.setUint8(offset + i, string.charCodeAt(i)); | |
| } | |
| } | |
| function floatTo16BitPCM(view, offset, input) { | |
| for (let i = 0; i < input.length; i++, offset += 2) { | |
| let s = Math.max(-1, Math.min(1, input[i])); | |
| s = s < 0 ? s * 0x8000 : s * 0x7FFF; | |
| view.setInt16(offset, s, true); | |
| } | |
| } |