File size: 4,671 Bytes
c120a1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import { Buffer } from 'node:buffer';
import express from 'express';
import wavefile from 'wavefile';
import fetch from 'node-fetch';
import { getPipeline } from '../transformers.js';
import { forwardFetchResponse } from '../util.js';
export const router = express.Router();
/**
* Gets the audio data from a base64-encoded audio file.
* @param {string} audio Base64-encoded audio
* @returns {Float64Array} Audio data
*/
function getWaveFile(audio) {
const wav = new wavefile.WaveFile();
wav.fromDataURI(audio);
wav.toBitDepth('32f');
wav.toSampleRate(16000);
let audioData = wav.getSamples();
if (Array.isArray(audioData)) {
if (audioData.length > 1) {
const SCALING_FACTOR = Math.sqrt(2);
// Merge channels (into first channel to save memory)
for (let i = 0; i < audioData[0].length; ++i) {
audioData[0][i] = SCALING_FACTOR * (audioData[0][i] + audioData[1][i]) / 2;
}
}
// Select first channel
audioData = audioData[0];
}
return audioData;
}
router.post('/recognize', async (req, res) => {
try {
const TASK = 'automatic-speech-recognition';
const { model, audio, lang } = req.body;
const pipe = await getPipeline(TASK, model);
const wav = getWaveFile(audio);
const start = performance.now();
const result = await pipe(wav, { language: lang || null, task: 'transcribe' });
const end = performance.now();
console.info(`Execution duration: ${(end - start) / 1000} seconds`);
console.info('Transcribed audio:', result.text);
return res.json({ text: result.text });
} catch (error) {
console.error(error);
return res.sendStatus(500);
}
});
router.post('/synthesize', async (req, res) => {
try {
const TASK = 'text-to-speech';
const { text, model, speaker } = req.body;
const pipe = await getPipeline(TASK, model);
const speaker_embeddings = speaker
? new Float32Array(new Uint8Array(Buffer.from(speaker.startsWith('data:') ? speaker.split(',')[1] : speaker, 'base64')).buffer)
: null;
const start = performance.now();
const result = await pipe(text, { speaker_embeddings: speaker_embeddings });
const end = performance.now();
console.debug(`Execution duration: ${(end - start) / 1000} seconds`);
const wav = new wavefile.WaveFile();
wav.fromScratch(1, result.sampling_rate, '32f', result.audio);
const buffer = wav.toBuffer();
res.set('Content-Type', 'audio/wav');
return res.send(Buffer.from(buffer));
} catch (error) {
console.error(error);
return res.sendStatus(500);
}
});
const pollinations = express.Router();
pollinations.post('/voices', async (req, res) => {
try {
const model = req.body.model || 'openai-audio';
const response = await fetch('https://text.pollinations.ai/models');
if (!response.ok) {
throw new Error('Failed to fetch Pollinations models');
}
const data = await response.json();
if (!Array.isArray(data)) {
throw new Error('Invalid data format received from Pollinations');
}
const audioModelData = data.find(m => m.name === model);
if (!audioModelData || !Array.isArray(audioModelData.voices)) {
throw new Error('No voices found for the specified model');
}
const voices = audioModelData.voices;
return res.json(voices);
} catch (error) {
console.error(error);
return res.sendStatus(500);
}
});
pollinations.post('/generate', async (req, res) => {
try {
const text = req.body.text;
const model = req.body.model || 'openai-audio';
const voice = req.body.voice || 'alloy';
const url = new URL(`https://text.pollinations.ai/generate/${encodeURIComponent(text)}`);
url.searchParams.append('model', model);
url.searchParams.append('voice', voice);
url.searchParams.append('referrer', 'sillytavern');
console.info('Pollinations request URL:', url.toString());
const response = await fetch(url);
if (!response.ok) {
const text = await response.text();
throw new Error(`Failed to generate audio from Pollinations: ${text}`);
}
res.set('Content-Type', 'audio/mpeg');
forwardFetchResponse(response, res);
} catch (error) {
console.error(error);
return res.sendStatus(500);
}
});
router.use('/pollinations', pollinations);
|