Spaces:
Paused
Paused
| import { debounce_timeout } from '../../constants.js'; | |
| import { debounceAsync, splitRecursive } from '../../utils.js'; | |
| import { getPreviewString, saveTtsProviderSettings } from './index.js'; | |
| export class KokoroTtsProvider { | |
| constructor() { | |
| this.settings = { | |
| modelId: 'onnx-community/Kokoro-82M-v1.0-ONNX', | |
| dtype: 'q8', | |
| device: 'wasm', | |
| voiceMap: {}, | |
| defaultVoice: 'af_heart', | |
| speakingRate: 1.0, | |
| }; | |
| this.ready = false; | |
| this.voices = [ | |
| 'af_heart', | |
| 'af_alloy', | |
| 'af_aoede', | |
| 'af_bella', | |
| 'af_jessica', | |
| 'af_kore', | |
| 'af_nicole', | |
| 'af_nova', | |
| 'af_river', | |
| 'af_sarah', | |
| 'af_sky', | |
| 'am_adam', | |
| 'am_echo', | |
| 'am_eric', | |
| 'am_fenrir', | |
| 'am_liam', | |
| 'am_michael', | |
| 'am_onyx', | |
| 'am_puck', | |
| 'am_santa', | |
| 'bf_emma', | |
| 'bf_isabella', | |
| 'bm_george', | |
| 'bm_lewis', | |
| 'bf_alice', | |
| 'bf_lily', | |
| 'bm_daniel', | |
| 'bm_fable', | |
| ]; | |
| this.worker = null; | |
| this.separator = ' ... ... ... '; | |
| this.pendingRequests = new Map(); | |
| this.nextRequestId = 1; | |
| // Update display values immediately but only reinitialize TTS after a delay | |
| this.initTtsDebounced = debounceAsync(this.initializeWorker.bind(this), debounce_timeout.relaxed); | |
| } | |
| /** | |
| * Perform any text processing before passing to TTS engine. | |
| * @param {string} text Input text | |
| * @returns {string} Processed text | |
| */ | |
| processText(text) { | |
| // TILDE! | |
| text = text.replace(/~/g, '.'); | |
| return text; | |
| } | |
| async loadSettings(settings) { | |
| if (settings.modelId !== undefined) this.settings.modelId = settings.modelId; | |
| if (settings.dtype !== undefined) this.settings.dtype = settings.dtype; | |
| if (settings.device !== undefined) this.settings.device = settings.device; | |
| if (settings.voiceMap !== undefined) this.settings.voiceMap = settings.voiceMap; | |
| if (settings.defaultVoice !== undefined) this.settings.defaultVoice = settings.defaultVoice; | |
| if (settings.speakingRate !== undefined) this.settings.speakingRate = settings.speakingRate; | |
| $('#kokoro_model_id').val(this.settings.modelId).on('input', this.onSettingsChange.bind(this)); | |
| $('#kokoro_dtype').val(this.settings.dtype).on('change', this.onSettingsChange.bind(this)); | |
| $('#kokoro_device').val(this.settings.device).on('change', this.onSettingsChange.bind(this)); | |
| $('#kokoro_speaking_rate').val(this.settings.speakingRate).on('input', this.onSettingsChange.bind(this)); | |
| $('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x'); | |
| } | |
| initializeWorker() { | |
| return new Promise((resolve, reject) => { | |
| try { | |
| // Terminate the existing worker if it exists | |
| if (this.worker) { | |
| this.worker.terminate(); | |
| $('#kokoro_status_text').text('Initializing...').removeAttr('style'); | |
| } | |
| // Create a new worker | |
| this.worker = new Worker(new URL('./kokoro-worker.js', import.meta.url), { type: 'module' }); | |
| // Set up message handling | |
| this.worker.onmessage = this.handleWorkerMessage.bind(this); | |
| // Initialize the worker with the current settings | |
| this.worker.postMessage({ | |
| action: 'initialize', | |
| data: { | |
| modelId: this.settings.modelId, | |
| dtype: this.settings.dtype, | |
| device: this.settings.device, | |
| }, | |
| }); | |
| // Create a promise that will resolve when initialization completes | |
| const initPromise = new Promise((initResolve, initReject) => { | |
| const timeoutId = setTimeout(() => { | |
| initReject(new Error('Worker initialization timed out')); | |
| }, 600000); // 600 second timeout | |
| this.pendingRequests.set('initialization', { | |
| resolve: (result) => { | |
| clearTimeout(timeoutId); | |
| initResolve(result); | |
| }, | |
| reject: (error) => { | |
| clearTimeout(timeoutId); | |
| initReject(error); | |
| }, | |
| }); | |
| }); | |
| // Resolve the outer promise when initialization completes | |
| initPromise.then(success => { | |
| this.ready = success; | |
| this.updateStatusDisplay(); | |
| resolve(success); | |
| }).catch(error => { | |
| console.error('Worker initialization failed:', error); | |
| this.ready = false; | |
| this.updateStatusDisplay(); | |
| reject(error); | |
| }); | |
| } catch (error) { | |
| console.error('Failed to create worker:', error); | |
| this.ready = false; | |
| this.updateStatusDisplay(); | |
| reject(error); | |
| } | |
| }); | |
| } | |
| handleWorkerMessage(event) { | |
| const { action, success, ready, error, requestId, blobUrl } = event.data; | |
| switch (action) { | |
| case 'initialized': { | |
| const initRequest = this.pendingRequests.get('initialization'); | |
| if (initRequest) { | |
| if (success) { | |
| initRequest.resolve(true); | |
| } else { | |
| initRequest.reject(new Error(error || 'Initialization failed')); | |
| } | |
| this.pendingRequests.delete('initialization'); | |
| } | |
| } break; | |
| case 'generatedTts': { | |
| const request = this.pendingRequests.get(requestId); | |
| if (request) { | |
| if (success) { | |
| fetch(blobUrl).then(response => response.blob()).then(audioBlob => { | |
| // Clean up the blob URL | |
| URL.revokeObjectURL(blobUrl); | |
| request.resolve(new Response(audioBlob, { | |
| headers: { | |
| 'Content-Type': 'audio/wav', | |
| }, | |
| })); | |
| }).catch(error => { | |
| request.reject(new Error('Failed to fetch TTS audio blob: ' + error)); | |
| }); | |
| } else { | |
| request.reject(new Error(error || 'TTS generation failed')); | |
| } | |
| this.pendingRequests.delete(requestId); | |
| } | |
| } break; | |
| case 'readyStatus': | |
| this.ready = ready; | |
| this.updateStatusDisplay(); | |
| break; | |
| } | |
| } | |
| updateStatusDisplay() { | |
| const statusText = this.ready ? 'Ready' : 'Failed'; | |
| const statusColor = this.ready ? 'green' : 'red'; | |
| $('#kokoro_status_text').text(statusText).css('color', statusColor); | |
| } | |
| async checkReady() { | |
| if (!this.worker) { | |
| return await this.initializeWorker(); | |
| } | |
| this.worker.postMessage({ action: 'checkReady' }); | |
| return this.ready; | |
| } | |
| async onRefreshClick() { | |
| return await this.initializeWorker(); | |
| } | |
| get settingsHtml() { | |
| return ` | |
| <div class="kokoro_tts_settings"> | |
| <label for="kokoro_model_id">Model ID:</label> | |
| <input id="kokoro_model_id" type="text" class="text_pole" value="${this.settings.modelId}" /> | |
| <label for="kokoro_dtype">Data Type:</label> | |
| <select id="kokoro_dtype" class="text_pole"> | |
| <option value="q8" ${this.settings.dtype === 'q8' ? 'selected' : ''}>q8 (Recommended)</option> | |
| <option value="fp32" ${this.settings.dtype === 'fp32' ? 'selected' : ''}>fp32 (High Precision)</option> | |
| <option value="fp16" ${this.settings.dtype === 'fp16' ? 'selected' : ''}>fp16</option> | |
| <option value="q4" ${this.settings.dtype === 'q4' ? 'selected' : ''}>q4 (Low Memory)</option> | |
| <option value="q4f16" ${this.settings.dtype === 'q4f16' ? 'selected' : ''}>q4f16</option> | |
| </select> | |
| <label for="kokoro_device">Device:</label> | |
| <select id="kokoro_device" class="text_pole"> | |
| <option value="wasm" ${this.settings.device === 'wasm' ? 'selected' : ''}>WebAssembly (CPU)</option> | |
| <option value="webgpu" ${this.settings.device === 'webgpu' ? 'selected' : ''}>WebGPU (GPU Acceleration)</option> | |
| </select> | |
| <label for="kokoro_speaking_rate">Speaking Rate: <span id="kokoro_speaking_rate_output">${this.settings.speakingRate}x</span></label> | |
| <input id="kokoro_speaking_rate" type="range" value="${this.settings.speakingRate}" min="0.5" max="2.0" step="0.1" /> | |
| <hr> | |
| <div> | |
| Status: <span id="kokoro_status_text">Initializing...</span> | |
| </div> | |
| </div> | |
| `; | |
| } | |
| async onSettingsChange() { | |
| this.settings.modelId = $('#kokoro_model_id').val().toString(); | |
| this.settings.dtype = $('#kokoro_dtype').val().toString(); | |
| this.settings.device = $('#kokoro_device').val().toString(); | |
| this.settings.speakingRate = parseFloat($('#kokoro_speaking_rate').val().toString()); | |
| // Update UI display | |
| $('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x'); | |
| // Reinitialize TTS engine with debounce | |
| this.initTtsDebounced(); | |
| saveTtsProviderSettings(); | |
| } | |
| async fetchTtsVoiceObjects() { | |
| if (!this.ready) { | |
| await this.checkReady(); | |
| } | |
| return this.voices.map(voice => ({ | |
| name: voice, | |
| voice_id: voice, | |
| preview_url: null, | |
| lang: voice.startsWith('b') ? 'en-GB' : 'en-US', | |
| })); | |
| } | |
| async previewTtsVoice(voiceId) { | |
| if (!this.ready) { | |
| await this.checkReady(); | |
| } | |
| const voice = this.getVoice(voiceId); | |
| const previewText = getPreviewString(voice.lang); | |
| for await (const response of this.generateTts(previewText, voiceId)) { | |
| const audio = await response.blob(); | |
| const url = URL.createObjectURL(audio); | |
| await new Promise(resolve => { | |
| const audioElement = new Audio(); | |
| audioElement.src = url; | |
| audioElement.play(); | |
| audioElement.onended = () => resolve(); | |
| }); | |
| URL.revokeObjectURL(url); | |
| } | |
| } | |
| getVoiceDisplayName(voiceId) { | |
| return voiceId; | |
| } | |
| getVoice(voiceName) { | |
| const defaultVoice = this.settings.defaultVoice || 'af_heart'; | |
| const actualVoiceName = this.voices.includes(voiceName) ? voiceName : defaultVoice; | |
| return { | |
| name: actualVoiceName, | |
| voice_id: actualVoiceName, | |
| preview_url: null, | |
| lang: actualVoiceName.startsWith('b') ? 'en-GB' : 'en-US', | |
| }; | |
| } | |
| /** | |
| * Generate TTS audio for the given text using the specified voice. | |
| * @param {string} text Text to generate | |
| * @param {string} voiceId Voice ID | |
| * @returns {AsyncGenerator<Response>} Audio response generator | |
| */ | |
| async* generateTts(text, voiceId) { | |
| if (!this.ready || !this.worker) { | |
| console.log('TTS not ready, initializing...'); | |
| await this.initializeWorker(); | |
| } | |
| if (!this.ready || !this.worker) { | |
| throw new Error('Failed to initialize TTS engine'); | |
| } | |
| if (text.trim().length === 0) { | |
| throw new Error('Empty text'); | |
| } | |
| const voice = this.getVoice(voiceId); | |
| const requestId = this.nextRequestId++; | |
| const chunkSize = 400; | |
| const chunks = splitRecursive(text, chunkSize, ['\n\n', '\n', '.', '?', '!', ',', ' ', '']); | |
| for (const chunk of chunks) { | |
| yield await new Promise((resolve, reject) => { | |
| // Store the promise callbacks | |
| this.pendingRequests.set(requestId, { resolve, reject }); | |
| // Send the request to the worker | |
| this.worker.postMessage({ | |
| action: 'generateTts', | |
| data: { | |
| text: chunk, | |
| voice: voice.voice_id, | |
| speakingRate: this.settings.speakingRate || 1.0, | |
| requestId, | |
| }, | |
| }); | |
| }); | |
| } | |
| } | |
| dispose() { | |
| // Clean up the worker when the provider is disposed | |
| if (this.worker) { | |
| this.worker.terminate(); | |
| this.worker = null; | |
| } | |
| } | |
| } | |