Spaces:
Paused
Paused
| import { saveTtsProviderSettings } from './index.js'; | |
| export { ChatterboxTtsProvider }; | |
| class ChatterboxTtsProvider { | |
| //########// | |
| // Config // | |
| //########// | |
| settings = {}; | |
| constructor() { | |
| // Initialize with default settings | |
| this.settings = { | |
| provider_endpoint: this.settings.provider_endpoint || 'http://localhost:8004', | |
| voice_mode: this.settings.voice_mode || 'predefined', | |
| predefined_voice: this.settings.predefined_voice || 'S1', | |
| reference_voice: this.settings.reference_voice || '', | |
| temperature: this.settings.temperature || 0.8, | |
| exaggeration: this.settings.exaggeration || 0.5, | |
| cfg_weight: this.settings.cfg_weight || 0.5, | |
| seed: this.settings.seed || -1, | |
| speed_factor: this.settings.speed_factor || 1.0, | |
| language: this.settings.language || 'en', | |
| split_text: this.settings.split_text || true, | |
| chunk_size: this.settings.chunk_size || 120, | |
| output_format: this.settings.output_format || 'wav', | |
| voiceMap: this.settings.voiceMap || {}, | |
| }; | |
| } | |
| ready = false; | |
| voices = []; | |
| separator = '. '; | |
| audioElement = document.createElement('audio'); | |
| languageLabels = { | |
| 'English': 'en', | |
| 'Spanish': 'es', | |
| 'French': 'fr', | |
| 'German': 'de', | |
| 'Italian': 'it', | |
| 'Portuguese': 'pt', | |
| 'Polish': 'pl', | |
| 'Turkish': 'tr', | |
| 'Russian': 'ru', | |
| 'Dutch': 'nl', | |
| 'Czech': 'cs', | |
| 'Arabic': 'ar', | |
| 'Chinese': 'zh-cn', | |
| 'Japanese': 'ja', | |
| 'Korean': 'ko', | |
| 'Hindi': 'hi', | |
| }; | |
| get settingsHtml() { | |
| let html = `<div class="chatterbox-settings-container"> | |
| <div class="chatterbox-settings-header"> | |
| <h3>Chatterbox TTS Settings</h3> | |
| <div class="status-indicator"> | |
| Status: <span id="chatterbox-status" class="offline">Offline</span> | |
| </div> | |
| </div>`; | |
| // Server endpoint | |
| html += `<div class="chatterbox-setting-row"> | |
| <label for="chatterbox-endpoint">Server Endpoint:</label> | |
| <input id="chatterbox-endpoint" type="text" class="text_pole" value="${this.settings.provider_endpoint}" /> | |
| </div>`; | |
| // Language selection | |
| html += `<div class="chatterbox-setting-row"> | |
| <label for="chatterbox-language">Language:</label> | |
| <select id="chatterbox-language">`; | |
| for (let language in this.languageLabels) { | |
| html += `<option value="${this.languageLabels[language]}" ${this.languageLabels[language] === this.settings.language ? 'selected' : ''}>${language}</option>`; | |
| } | |
| html += `</select> | |
| </div>`; | |
| // Generation parameters | |
| html += `<div class="chatterbox-params-section"> | |
| <h4>Generation Parameters</h4>`; | |
| // Temperature | |
| html += `<div class="chatterbox-setting-row"> | |
| <label for="chatterbox-temperature">Temperature: <span id="chatterbox-temperature-value">${this.settings.temperature}</span></label> | |
| <input id="chatterbox-temperature" type="range" min="0" max="1" step="0.1" value="${this.settings.temperature}" /> | |
| </div>`; | |
| // Exaggeration | |
| html += `<div class="chatterbox-setting-row"> | |
| <label for="chatterbox-exaggeration">Exaggeration: <span id="chatterbox-exaggeration-value">${this.settings.exaggeration}</span></label> | |
| <input id="chatterbox-exaggeration" type="range" min="0" max="2" step="0.1" value="${this.settings.exaggeration}" /> | |
| </div>`; | |
| // CFG Weight | |
| html += `<div class="chatterbox-setting-row"> | |
| <label for="chatterbox-cfg-weight">CFG Weight: <span id="chatterbox-cfg-weight-value">${this.settings.cfg_weight}</span></label> | |
| <input id="chatterbox-cfg-weight" type="range" min="0" max="1" step="0.1" value="${this.settings.cfg_weight}" /> | |
| </div>`; | |
| // Speed Factor | |
| html += `<div class="chatterbox-setting-row"> | |
| <label for="chatterbox-speed">Speed Factor: <span id="chatterbox-speed-value">${this.settings.speed_factor}</span></label> | |
| <input id="chatterbox-speed" type="range" min="0.5" max="2" step="0.1" value="${this.settings.speed_factor}" /> | |
| </div>`; | |
| // Seed | |
| html += `<div class="chatterbox-setting-row"> | |
| <label for="chatterbox-seed">Seed (-1 for random):</label> | |
| <input id="chatterbox-seed" class="text_pole" type="number" min="-1" value="${this.settings.seed}" /> | |
| </div>`; | |
| // Text chunking | |
| html += `<div class="chatterbox-setting-row"> | |
| <label class="checkbox_label"> | |
| <input type="checkbox" id="chatterbox-split-text" ${this.settings.split_text ? 'checked' : ''} /> | |
| Split long texts into chunks | |
| </label> | |
| </div>`; | |
| // Chunk size | |
| html += `<div class="chatterbox-setting-row" id="chunk-size-row" ${!this.settings.split_text ? 'style="display: none;"' : ''}> | |
| <label for="chatterbox-chunk-size">Chunk Size:</label> | |
| <input id="chatterbox-chunk-size" class="text_pole" type="number" min="50" max="500" value="${this.settings.chunk_size}" /> | |
| </div>`; | |
| // Output format | |
| html += `<div class="chatterbox-setting-row"> | |
| <label for="chatterbox-format">Output Format:</label> | |
| <select id="chatterbox-format"> | |
| <option value="wav" ${this.settings.output_format === 'wav' ? 'selected' : ''}>WAV</option> | |
| <option value="opus" ${this.settings.output_format === 'opus' ? 'selected' : ''}>Opus</option> | |
| </select> | |
| </div>`; | |
| html += '</div>'; // End params section | |
| // Footer with links | |
| html += `<div class="chatterbox-footer"> | |
| <a href="${this.settings.provider_endpoint}" target="_blank">Chatterbox Web UI</a> | | |
| <a href="https://github.com/devnen/Chatterbox-TTS-Server" target="_blank">Documentation</a> | |
| </div>`; | |
| html += '</div>'; // End container | |
| // Add CSS styles | |
| html += `<style> | |
| .chatterbox-settings-container { | |
| padding: 10px; | |
| } | |
| .chatterbox-settings-header { | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| margin-bottom: 15px; | |
| } | |
| .chatterbox-settings-header h3 { | |
| margin: 0; | |
| } | |
| .chatterbox-settings-container .status-indicator { | |
| font-weight: bold; | |
| } | |
| #chatterbox-status.ready { color: #4CAF50; } | |
| #chatterbox-status.offline { color: #f44336; } | |
| #chatterbox-status.processing { color: #2196F3; } | |
| .chatterbox-setting-row { | |
| margin-bottom: 10px; | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| } | |
| .chatterbox-setting-row label { | |
| flex: 0 0 150px; | |
| } | |
| .chatterbox-setting-row label.checkbox_label { | |
| flex-basis: auto; | |
| } | |
| .chatterbox-setting-row input[type="text"], | |
| .chatterbox-setting-row input[type="number"], | |
| .chatterbox-setting-row select { | |
| flex: 1; | |
| } | |
| .chatterbox-setting-row input[type="range"] { | |
| flex: 1; | |
| } | |
| .chatterbox-params-section { | |
| margin-top: 15px; | |
| padding-top: 15px; | |
| border-top: 1px solid #ccc; | |
| } | |
| .chatterbox-params-section h4 { | |
| margin-top: 0; | |
| margin-bottom: 10px; | |
| } | |
| .chatterbox-footer { | |
| margin-top: 15px; | |
| padding-top: 15px; | |
| border-top: 1px solid #ccc; | |
| text-align: center; | |
| font-size: 0.9em; | |
| } | |
| </style>`; | |
| return html; | |
| } | |
| //######################// | |
| // Startup & Initialize // | |
| //######################// | |
| async loadSettings(settings) { | |
| this.updateStatus('Offline'); | |
| if (Object.keys(settings).length === 0) { | |
| console.info('Using default Chatterbox TTS Provider settings'); | |
| } else { | |
| // Populate settings with provided values | |
| for (const key in settings) { | |
| if (key in this.settings) { | |
| this.settings[key] = settings[key]; | |
| } | |
| } | |
| } | |
| // Update UI elements | |
| this.updateUIFromSettings(); | |
| console.debug('ChatterboxTTS: Settings loaded'); | |
| try { | |
| // Check if TTS provider is ready | |
| await this.checkReady(); | |
| if (this.ready) { | |
| // Fetch all voice types for the voice map | |
| await this.fetchTtsVoiceObjects(); | |
| this.updateStatus('Ready'); | |
| } | |
| this.setupEventListeners(); | |
| } catch (error) { | |
| console.error('Error loading Chatterbox settings:', error); | |
| this.updateStatus('Offline'); | |
| } | |
| } | |
| updateUIFromSettings() { | |
| $('#chatterbox-endpoint').val(this.settings.provider_endpoint); | |
| $('#chatterbox-language').val(this.settings.language); | |
| $('#chatterbox-temperature').val(this.settings.temperature); | |
| $('#chatterbox-temperature-value').text(this.settings.temperature); | |
| $('#chatterbox-exaggeration').val(this.settings.exaggeration); | |
| $('#chatterbox-exaggeration-value').text(this.settings.exaggeration); | |
| $('#chatterbox-cfg-weight').val(this.settings.cfg_weight); | |
| $('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight); | |
| $('#chatterbox-speed').val(this.settings.speed_factor); | |
| $('#chatterbox-speed-value').text(this.settings.speed_factor); | |
| $('#chatterbox-seed').val(this.settings.seed); | |
| $('#chatterbox-split-text').prop('checked', this.settings.split_text); | |
| $('#chatterbox-chunk-size').val(this.settings.chunk_size); | |
| $('#chatterbox-format').val(this.settings.output_format); | |
| // Show/hide chunk size based on split text | |
| if (this.settings.split_text) { | |
| $('#chunk-size-row').show(); | |
| } else { | |
| $('#chunk-size-row').hide(); | |
| } | |
| } | |
| //##############################// | |
| // Check Server is Available // | |
| //##############################// | |
| async checkReady() { | |
| try { | |
| const response = await fetch(`${this.settings.provider_endpoint}/api/ui/initial-data`); | |
| if (!response.ok) { | |
| throw new Error(`HTTP Error Response: ${response.status} ${response.statusText}`); | |
| } | |
| const data = await response.json(); | |
| // Check if we got valid data | |
| if (data) { | |
| this.ready = true; | |
| console.log('Chatterbox TTS service is ready.'); | |
| } else { | |
| this.ready = false; | |
| console.log('Chatterbox TTS service returned invalid data.'); | |
| } | |
| } catch (error) { | |
| console.error('Error checking Chatterbox TTS service readiness:', error); | |
| this.ready = false; | |
| } | |
| } | |
| //######################// | |
| // Get Available Voices // | |
| //######################// | |
| async fetchTtsVoiceObjects() { | |
| try { | |
| // Always fetch predefined voices | |
| const predefinedResponse = await fetch(`${this.settings.provider_endpoint}/get_predefined_voices`); | |
| if (!predefinedResponse.ok) { | |
| throw new Error(`HTTP ${predefinedResponse.status}: ${predefinedResponse.statusText}`); | |
| } | |
| const predefinedData = await predefinedResponse.json(); | |
| // Transform predefined voices | |
| const predefinedVoices = predefinedData.map(voice => ({ | |
| name: voice.display_name, | |
| voice_id: voice.voice_id || voice.filename, | |
| preview_url: null, | |
| lang: voice.language || 'en', | |
| })); | |
| // Always try to fetch reference voices | |
| let referenceVoices = []; | |
| try { | |
| const refResponse = await fetch(`${this.settings.provider_endpoint}/get_reference_files`); | |
| if (refResponse.ok) { | |
| const refData = await refResponse.json(); | |
| referenceVoices = refData.map(filename => ({ | |
| name: `[Clone] ${filename}`, | |
| voice_id: `ref_${filename}`, | |
| preview_url: null, | |
| lang: 'en', | |
| })); | |
| } | |
| } catch (error) { | |
| console.warn('Failed to fetch reference voices:', error); | |
| } | |
| // Combine all voices | |
| this.voices = [...predefinedVoices, ...referenceVoices]; | |
| console.log(`Loaded ${this.voices.length} voices (${predefinedVoices.length} predefined, ${referenceVoices.length} reference)`); | |
| return this.voices; | |
| } catch (error) { | |
| console.error('Error fetching Chatterbox voices:', error); | |
| this.voices = []; | |
| return []; | |
| } | |
| } | |
| // Alias for internal use | |
| async fetchVoices() { | |
| return this.fetchTtsVoiceObjects(); | |
| } | |
| //###########################// | |
| // Setup Event Listeners // | |
| //###########################// | |
| setupEventListeners() { | |
| // Server endpoint change | |
| $('#chatterbox-endpoint').on('input', () => { | |
| this.settings.provider_endpoint = $('#chatterbox-endpoint').val(); | |
| this.onSettingsChange(); | |
| }); | |
| // Language | |
| $('#chatterbox-language').on('change', (e) => { | |
| this.settings.language = e.target.value; | |
| this.onSettingsChange(); | |
| }); | |
| // Parameter sliders | |
| $('#chatterbox-temperature').on('input', (e) => { | |
| this.settings.temperature = parseFloat(e.target.value); | |
| $('#chatterbox-temperature-value').text(this.settings.temperature); | |
| this.onSettingsChange(); | |
| }); | |
| $('#chatterbox-exaggeration').on('input', (e) => { | |
| this.settings.exaggeration = parseFloat(e.target.value); | |
| $('#chatterbox-exaggeration-value').text(this.settings.exaggeration); | |
| this.onSettingsChange(); | |
| }); | |
| $('#chatterbox-cfg-weight').on('input', (e) => { | |
| this.settings.cfg_weight = parseFloat(e.target.value); | |
| $('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight); | |
| this.onSettingsChange(); | |
| }); | |
| $('#chatterbox-speed').on('input', (e) => { | |
| this.settings.speed_factor = parseFloat(e.target.value); | |
| $('#chatterbox-speed-value').text(this.settings.speed_factor); | |
| this.onSettingsChange(); | |
| }); | |
| // Seed | |
| $('#chatterbox-seed').on('change', (e) => { | |
| this.settings.seed = parseInt(e.target.value); | |
| this.onSettingsChange(); | |
| }); | |
| // Text splitting | |
| $('#chatterbox-split-text').on('change', (e) => { | |
| this.settings.split_text = e.target.checked; | |
| if (e.target.checked) { | |
| $('#chunk-size-row').show(); | |
| } else { | |
| $('#chunk-size-row').hide(); | |
| } | |
| this.onSettingsChange(); | |
| }); | |
| $('#chatterbox-chunk-size').on('change', (e) => { | |
| this.settings.chunk_size = parseInt(e.target.value); | |
| this.onSettingsChange(); | |
| }); | |
| // Output format | |
| $('#chatterbox-format').on('change', (e) => { | |
| this.settings.output_format = e.target.value; | |
| this.onSettingsChange(); | |
| }); | |
| } | |
| //#############################// | |
| // Store ST interface settings // | |
| //#############################// | |
| onSettingsChange() { | |
| // Save the updated settings | |
| saveTtsProviderSettings(); | |
| } | |
| //#########################// | |
| // Handle Reload button // | |
| //#########################// | |
| async onRefreshClick() { | |
| try { | |
| this.updateStatus('Processing'); | |
| await this.checkReady(); | |
| if (this.ready) { | |
| await this.fetchTtsVoiceObjects(); | |
| this.updateStatus('Ready'); | |
| } else { | |
| this.updateStatus('Offline'); | |
| } | |
| } catch (error) { | |
| console.error('Error during refresh:', error); | |
| this.updateStatus('Offline'); | |
| } | |
| } | |
| //##################// | |
| // Preview Voice // | |
| //##################// | |
| async previewTtsVoice(voiceId) { | |
| try { | |
| this.updateStatus('Processing'); | |
| const previewText = 'Hello! This is a preview of the selected voice.'; | |
| // Determine if this is a reference voice | |
| let isReferenceVoice = false; | |
| let actualVoiceId = voiceId; | |
| if (voiceId && voiceId.startsWith('ref_')) { | |
| isReferenceVoice = true; | |
| actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix | |
| } | |
| // Generate preview using the main TTS endpoint | |
| const requestBody = { | |
| text: previewText, | |
| voice_mode: isReferenceVoice ? 'clone' : 'predefined', | |
| temperature: this.settings.temperature, | |
| exaggeration: this.settings.exaggeration, | |
| cfg_weight: this.settings.cfg_weight, | |
| seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1 | |
| speed_factor: this.settings.speed_factor, | |
| language: this.settings.language, | |
| split_text: false, // Don't split for preview | |
| output_format: this.settings.output_format, | |
| }; | |
| // Add voice-specific parameters | |
| if (isReferenceVoice) { | |
| requestBody.reference_audio_filename = actualVoiceId; | |
| } else { | |
| requestBody.predefined_voice_id = actualVoiceId; | |
| } | |
| const response = await fetch(`${this.settings.provider_endpoint}/tts`, { | |
| method: 'POST', | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| }, | |
| body: JSON.stringify(requestBody), | |
| }); | |
| if (!response.ok) { | |
| throw new Error(`HTTP ${response.status}: ${response.statusText}`); | |
| } | |
| // Get the audio blob and play it | |
| const audioBlob = await response.blob(); | |
| const audioUrl = URL.createObjectURL(audioBlob); | |
| const audio = new Audio(audioUrl); | |
| audio.addEventListener('ended', () => { | |
| URL.revokeObjectURL(audioUrl); | |
| this.updateStatus('Ready'); | |
| }); | |
| await audio.play(); | |
| } catch (error) { | |
| console.error('Error previewing voice:', error); | |
| this.updateStatus('Ready'); | |
| throw error; | |
| } | |
| } | |
| //#####################// | |
| // Get Voice Object // | |
| //#####################// | |
| async getVoice(voiceName) { | |
| // Ensure voices are loaded | |
| if (this.voices.length === 0) { | |
| await this.fetchTtsVoiceObjects(); | |
| } | |
| // Find the voice object by name or voice_id | |
| let match = this.voices.find(voice => | |
| voice.name === voiceName || | |
| voice.voice_id === voiceName || | |
| voice.display_name === voiceName, | |
| ); | |
| if (!match) { | |
| console.warn(`Voice not found: ${voiceName}`); | |
| // Check if it's a reference voice that wasn't in the list | |
| if (voiceName && voiceName.startsWith('ref_')) { | |
| const filename = voiceName.substring(4); | |
| return { | |
| name: `[Clone] ${filename}`, | |
| voice_id: voiceName, | |
| preview_url: null, | |
| lang: 'en', | |
| }; | |
| } | |
| // Return a default voice object | |
| return { | |
| name: voiceName || 'Default', | |
| voice_id: voiceName || this.settings.predefined_voice || 'S1', | |
| preview_url: null, | |
| lang: 'en', | |
| }; | |
| } | |
| return match; | |
| } | |
| //##################// | |
| // Generate TTS // | |
| //##################// | |
| async generateTts(inputText, voiceId) { | |
| try { | |
| this.updateStatus('Processing'); | |
| // Determine if this is a reference voice | |
| let isReferenceVoice = false; | |
| let actualVoiceId = voiceId; | |
| if (voiceId && voiceId.startsWith('ref_')) { | |
| isReferenceVoice = true; | |
| actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix | |
| } | |
| // Prepare the request body | |
| const requestBody = { | |
| text: inputText, | |
| voice_mode: isReferenceVoice ? 'clone' : 'predefined', | |
| temperature: this.settings.temperature, | |
| exaggeration: this.settings.exaggeration, | |
| cfg_weight: this.settings.cfg_weight, | |
| seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1 | |
| speed_factor: this.settings.speed_factor, | |
| language: this.settings.language, | |
| split_text: this.settings.split_text, | |
| chunk_size: this.settings.chunk_size, | |
| output_format: this.settings.output_format, | |
| }; | |
| // Add voice-specific parameters | |
| if (isReferenceVoice) { | |
| requestBody.reference_audio_filename = actualVoiceId; | |
| } else { | |
| requestBody.predefined_voice_id = actualVoiceId || this.settings.predefined_voice; | |
| } | |
| console.log('Generating TTS with params:', requestBody); | |
| const response = await fetch(`${this.settings.provider_endpoint}/tts`, { | |
| method: 'POST', | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| 'Cache-Control': 'no-cache', | |
| }, | |
| body: JSON.stringify(requestBody), | |
| }); | |
| if (!response.ok) { | |
| const errorText = await response.text(); | |
| console.error('TTS generation error:', errorText); | |
| throw new Error(`HTTP ${response.status}: ${errorText}`); | |
| } | |
| this.updateStatus('Ready'); | |
| // Return the response directly - SillyTavern expects a Response object | |
| return response; | |
| } catch (error) { | |
| console.error('Error in generateTts:', error); | |
| this.updateStatus('Ready'); | |
| throw error; | |
| } | |
| } | |
| //######################// | |
| // Update Status // | |
| //######################// | |
| updateStatus(status) { | |
| const statusElement = document.getElementById('chatterbox-status'); | |
| if (statusElement) { | |
| statusElement.textContent = status; | |
| statusElement.className = status.toLowerCase(); | |
| } | |
| } | |
| } | |