File size: 4,698 Bytes
c120a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import { getRequestHeaders } from '../../../script.js';
import { splitRecursive } from '../../utils.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';

export class PollinationsTtsProvider {
    settings;
    voices = [];
    separator = ' . ';
    audioElement = document.createElement('audio');

    defaultSettings = {
        // TODO: Make this configurable
        model: 'openai-audio',
        voiceMap: {},
    };

    get settingsHtml() {
        return '';
    }

    onSettingsChange() {
        this.voices = [];
        saveTtsProviderSettings();
    }

    async loadSettings(settings) {
        // Populate Provider UI given input settings
        if (Object.keys(settings).length == 0) {
            console.info('Using default TTS Provider settings');
        }

        // Only accept keys defined in defaultSettings
        this.settings = this.defaultSettings;

        for (const key in settings) {
            if (key in this.settings) {
                this.settings[key] = settings[key];
            } else {
                throw `Invalid setting passed to TTS Provider: ${key}`;
            }
        }

        try {
            await this.checkReady();
            console.debug('Pollinations TTS: Settings loaded');
        } catch {
            console.debug('Pollinations TTS: Settings loaded, but not ready');
        }
    }

    // Perform a simple readiness check by trying to fetch voiceIds
    async checkReady() {
        await this.fetchTtsVoiceObjects();
    }

    async onRefreshClick() {
        await this.checkReady();
    }

    //#################//
    //  TTS Interfaces //
    //#################//

    async getVoice(voiceName) {
        if (this.voices.length == 0) {
            this.voices = await this.fetchTtsVoiceObjects();
        }
        const match = this.voices.filter(
            voice => voice.name == voiceName || voice.voice_id == voiceName,
        )[0];
        if (!match) {
            throw `TTS Voice name ${voiceName} not found`;
        }
        return match;
    }

    /**
     * Generate TTS audio for the given text using the specified voice.
     * @param {string} text Text to generate
     * @param {string} voiceId Voice ID
     * @returns {AsyncGenerator<Response>} Audio response generator
     */
    generateTts(text, voiceId) {
        return this.fetchTtsGeneration(text, voiceId);
    }

    //###########//
    // API CALLS //
    //###########//
    async fetchTtsVoiceObjects() {
        const response = await fetch('/api/speech/pollinations/voices', {
            method: 'POST',
            headers: getRequestHeaders(),
            body: JSON.stringify({ model: this.settings.model }),
        });

        if (!response.ok) {
            throw new Error(`HTTP ${response.status}: ${await response.text()}`);
        }
        const responseJson = await response.json();
        return responseJson
            .sort()
            .map(x => ({ name: x, voice_id: x, preview_url: false, lang: 'en-US' }));
    }

    /**
     * Preview TTS for a given voice ID.
     * @param {string} id Voice ID
     */
    async previewTtsVoice(id) {
        this.audioElement.pause();
        this.audioElement.currentTime = 0;
        const voice = await this.getVoice(id);
        const text = getPreviewString(voice.lang);
        for await (const response of this.generateTts(text, id)) {
            const audio = await response.blob();
            const url = URL.createObjectURL(audio);
            await new Promise(resolve => {
                const audioElement = new Audio();
                audioElement.src = url;
                audioElement.play();
                audioElement.onended = () => resolve();
            });
            URL.revokeObjectURL(url);
        }
    }

    async* fetchTtsGeneration(text, voiceId) {
        const MAX_LENGTH = 1000;
        console.info(`Generating new TTS for voice_id ${voiceId}`);
        const chunks = splitRecursive(text, MAX_LENGTH);
        for (const chunk of chunks) {
            const response = await fetch('/api/speech/pollinations/generate', {
                method: 'POST',
                headers: getRequestHeaders(),
                body: JSON.stringify({
                    model: this.settings.model,
                    text: 'Say exactly this and nothing else:' + '\n' + chunk,
                    voice: voiceId,
                }),
            });

            if (!response.ok) {
                toastr.error(response.statusText, 'TTS Generation Failed');
                throw new Error(`HTTP ${response.status}: ${await response.text()}`);
            }

            yield response;
        }
    }
}