Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import VitsModel, AutoTokenizer | |
| import torch | |
| import scipy.io.wavfile | |
| import numpy as np | |
| import librosa | |
| import soundfile as sf | |
| import tempfile | |
| # Load the model and tokenizer | |
| model = VitsModel.from_pretrained("facebook/mms-tts-eng") | |
| tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng") | |
| def pitch_shift_np(audio_np, sampling_rate, pitch_shift): | |
| # Correcting the function call | |
| return librosa.effects.pitch_shift(audio_np, sr=sampling_rate, n_steps=pitch_shift) | |
| def synthesize_speech(text, pitch_shift): | |
| # Tokenize the input text | |
| inputs = tokenizer(text, return_tensors="pt") | |
| # Generate waveform | |
| with torch.no_grad(): | |
| output = model(**inputs).waveform.squeeze().numpy() | |
| # Pitch shift | |
| shifted_audio = pitch_shift_np(output, model.config.sampling_rate, pitch_shift) | |
| # Save to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp: | |
| sf.write(fp.name, shifted_audio, model.config.sampling_rate) | |
| temp_file_path = fp.name | |
| return temp_file_path | |
| # Create the Gradio interface | |
| interface = gr.Interface( | |
| fn=synthesize_speech, | |
| inputs=[ | |
| gr.components.Textbox(lines=2, placeholder="Type your text here..."), | |
| gr.components.Slider(minimum=-2, maximum=2, step=0.1, label="Pitch Shift (Semitones)") | |
| ], | |
| outputs=gr.components.Audio(type="filepath", label="Generated Speech"), | |
| title="Text to Speech Synthesis", | |
| description="Type text and convert it to speech using a TTS model. Use the slider to adjust the pitch." | |
| ) | |
| # Launch the application | |
| interface.launch() | |