File size: 9,666 Bytes
c120a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import express from 'express';
import fetch from 'node-fetch';
import { readSecret, SECRET_KEYS } from './secrets.js';

export const router = express.Router();

// Audio format MIME type mapping
const getAudioMimeType = (format) => {
    const mimeTypes = {
        'mp3': 'audio/mpeg',
        'wav': 'audio/wav',
        'pcm': 'audio/pcm',
        'flac': 'audio/flac',
        'aac': 'audio/aac',
    };
    return mimeTypes[format] || 'audio/mpeg';
};

router.post('/generate-voice', async (request, response) => {
    try {
        const {
            text,
            voiceId,
            apiHost = 'https://api.minimax.io',
            model = 'speech-02-hd',
            speed = 1.0,
            volume = 1.0,
            pitch = 1.0,
            audioSampleRate = 32000,
            bitrate = 128000,
            format = 'mp3',
            language,
        } = request.body;

        const apiKey = readSecret(request.user.directories, SECRET_KEYS.MINIMAX);
        const groupId = readSecret(request.user.directories, SECRET_KEYS.MINIMAX_GROUP_ID);

        // Validate required parameters
        if (!text || !voiceId || !apiKey || !groupId) {
            console.warn('MiniMax TTS: Missing required parameters');
            return response.status(400).json({ error: 'Missing required parameters: text, voiceId, apiKey, and groupId are required' });
        }

        const requestBody = {
            model: model,
            text: text,
            stream: false,
            voice_setting: {
                voice_id: voiceId,
                speed: Number(speed),
                vol: Number(volume),
                pitch: Number(pitch),
            },
            audio_setting: {
                sample_rate: Number(audioSampleRate),
                bitrate: Number(bitrate),
                format: format,
                channel: 1,
            },
        };

        // Add language parameter if provided
        if (language) {
            requestBody.lang = language;
        }

        const apiUrl = `${apiHost}/v1/t2a_v2?GroupId=${groupId}`;

        console.debug('MiniMax TTS Request:', {
            url: apiUrl,
            body: { ...requestBody, voice_setting: { ...requestBody.voice_setting, voice_id: '[REDACTED]' } },
        });

        const apiResponse = await fetch(apiUrl, {
            method: 'POST',
            headers: {
                'Authorization': `Bearer ${apiKey}`,
                'Content-Type': 'application/json',
                'MM-API-Source': 'SillyTavern-TTS',
            },
            body: JSON.stringify(requestBody),
        });

        if (!apiResponse.ok) {
            let errorMessage = `HTTP ${apiResponse.status}`;

            try {
                // Try to parse JSON error response
                /** @type {any} */
                const errorData = await apiResponse.json();
                console.error('MiniMax TTS API error (JSON):', errorData);

                // Check for MiniMax specific error format
                const baseResp = errorData?.base_resp;
                if (baseResp && baseResp.status_code !== 0) {
                    if (baseResp.status_code === 1004) {
                        errorMessage = 'Authentication failed - Please check your API key and API host';
                    } else {
                        errorMessage = `API Error: ${baseResp.status_msg}`;
                    }
                } else {
                    errorMessage = errorData.error?.message || errorData.message || errorData.detail || `HTTP ${apiResponse.status}`;
                }
            } catch (jsonError) {
                // If not JSON, try to read text
                try {
                    const errorText = await apiResponse.text();
                    console.error('MiniMax TTS API error (Text):', errorText);
                    if (errorText && errorText.length > 500) {
                        errorMessage = `HTTP ${apiResponse.status}: Response too large (${errorText.length} characters)`;
                    } else {
                        errorMessage = errorText || `HTTP ${apiResponse.status}`;
                    }
                } catch (textError) {
                    console.error('MiniMax TTS: Failed to read error response:', textError);
                    errorMessage = `HTTP ${apiResponse.status}: Unable to read error details`;
                }
            }

            console.error('MiniMax TTS API request failed:', errorMessage);
            return response.status(500).json({ error: errorMessage });
        }

        // Parse the response
        /** @type {any} */
        let responseData;
        try {
            responseData = await apiResponse.json();
            console.debug('MiniMax TTS Response received');
        } catch (jsonError) {
            console.error('MiniMax TTS: Failed to parse response as JSON:', jsonError);
            return response.status(500).json({ error: 'Invalid response format from MiniMax API' });
        }

        // Check for API error codes in response data
        const baseResp = responseData?.base_resp;
        if (baseResp && baseResp.status_code !== 0) {
            let errorMessage;
            if (baseResp.status_code === 1004) {
                errorMessage = 'Authentication failed - Please check your API key and API host';
            } else {
                errorMessage = `API Error: ${baseResp.status_msg}`;
            }
            console.error('MiniMax TTS API error:', baseResp);
            return response.status(500).json({ error: errorMessage });
        }

        // Process the audio data
        if (responseData.data && responseData.data.audio) {
            // Process hex-encoded audio data
            const hexAudio = responseData.data.audio;

            if (!hexAudio || typeof hexAudio !== 'string') {
                console.error('MiniMax TTS: Invalid audio data format');
                return response.status(500).json({ error: 'Invalid audio data format' });
            }

            // Remove possible prefix and spaces
            const cleanHex = hexAudio.replace(/^0x/, '').replace(/\s/g, '');

            // Validate hex string format
            if (!/^[0-9a-fA-F]*$/.test(cleanHex)) {
                console.error('MiniMax TTS: Invalid hex string format');
                return response.status(500).json({ error: 'Invalid audio data format' });
            }

            // Ensure hex string length is even
            const paddedHex = cleanHex.length % 2 === 0 ? cleanHex : '0' + cleanHex;

            try {
                // Convert hex string to byte array
                const hexMatches = paddedHex.match(/.{1,2}/g);
                if (!hexMatches) {
                    console.error('MiniMax TTS: Failed to parse hex string');
                    return response.status(500).json({ error: 'Invalid hex string format' });
                }
                const audioBytes = new Uint8Array(hexMatches.map(byte => parseInt(byte, 16)));

                if (audioBytes.length === 0) {
                    console.error('MiniMax TTS: Audio conversion resulted in empty array');
                    return response.status(500).json({ error: 'Audio data conversion failed' });
                }

                console.debug(`MiniMax TTS: Converted ${paddedHex.length} hex characters to ${audioBytes.length} bytes`);

                // Set appropriate headers and send audio data
                const mimeType = getAudioMimeType(format);
                response.setHeader('Content-Type', mimeType);
                response.setHeader('Content-Length', audioBytes.length);

                return response.send(Buffer.from(audioBytes));

            } catch (conversionError) {
                console.error('MiniMax TTS: Audio conversion error:', conversionError);
                return response.status(500).json({ error: `Audio data conversion failed: ${conversionError.message}` });
            }
        } else if (responseData.data && responseData.data.url) {
            // Handle URL-based audio response
            console.debug('MiniMax TTS: Received audio URL:', responseData.data.url);

            try {
                const audioResponse = await fetch(responseData.data.url);
                if (!audioResponse.ok) {
                    console.error('MiniMax TTS: Failed to fetch audio from URL:', audioResponse.status);
                    return response.status(500).json({ error: `Failed to fetch audio from URL: ${audioResponse.status}` });
                }

                const audioBuffer = await audioResponse.arrayBuffer();
                const mimeType = getAudioMimeType(format);

                response.setHeader('Content-Type', mimeType);
                response.setHeader('Content-Length', audioBuffer.byteLength);

                return response.send(Buffer.from(audioBuffer));
            } catch (urlError) {
                console.error('MiniMax TTS: Error fetching audio from URL:', urlError);
                return response.status(500).json({ error: `Failed to fetch audio: ${urlError.message}` });
            }
        } else {
            // Handle error response
            const errorMessage = responseData.base_resp?.status_msg || responseData.error?.message || 'Unknown error';
            console.error('MiniMax TTS: No valid audio data in response:', responseData);
            return response.status(500).json({ error: `API Error: ${errorMessage}` });
        }

    } catch (error) {
        console.error('MiniMax TTS generation failed:', error);
        return response.status(500).json({ error: 'Internal server error' });
    }
});