Upload usage_example.py with huggingface_hub
Browse files- usage_example.py +43 -0
usage_example.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Saudi TTS Usage Example
|
| 2 |
+
import torch
|
| 3 |
+
import torchaudio
|
| 4 |
+
from TTS.tts.configs.xtts_config import XttsConfig
|
| 5 |
+
from TTS.tts.models.xtts import Xtts
|
| 6 |
+
|
| 7 |
+
def load_model():
|
| 8 |
+
"""Load the Saudi TTS model"""
|
| 9 |
+
config = XttsConfig()
|
| 10 |
+
config.load_json("config.json")
|
| 11 |
+
|
| 12 |
+
model = Xtts.init_from_config(config)
|
| 13 |
+
model.load_checkpoint(config, checkpoint_dir="./", vocab_path="vocab.json")
|
| 14 |
+
|
| 15 |
+
if torch.cuda.is_available():
|
| 16 |
+
model.cuda()
|
| 17 |
+
|
| 18 |
+
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
|
| 19 |
+
audio_path=["speaker.wav"]
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
return model, gpt_cond_latent, speaker_embedding
|
| 23 |
+
|
| 24 |
+
def generate_speech(model, gpt_cond_latent, speaker_embedding, text, language="ar", temperature=0.75):
|
| 25 |
+
"""Generate speech from text"""
|
| 26 |
+
out = model.inference(text, language, gpt_cond_latent, speaker_embedding, temperature=temperature)
|
| 27 |
+
return out["wav"]
|
| 28 |
+
|
| 29 |
+
# Example usage
|
| 30 |
+
if __name__ == "__main__":
|
| 31 |
+
model, gpt_cond_latent, speaker_embedding = load_model()
|
| 32 |
+
|
| 33 |
+
# Arabic example
|
| 34 |
+
arabic_text = "مرحباً بكم"
|
| 35 |
+
audio = generate_speech(model, gpt_cond_latent, speaker_embedding, arabic_text, "ar")
|
| 36 |
+
torchaudio.save("arabic_output.wav", torch.tensor(audio).unsqueeze(0), 24000)
|
| 37 |
+
|
| 38 |
+
# English example
|
| 39 |
+
english_text = "Hello world"
|
| 40 |
+
audio = generate_speech(model, gpt_cond_latent, speaker_embedding, english_text, "en")
|
| 41 |
+
torchaudio.save("english_output.wav", torch.tensor(audio).unsqueeze(0), 24000)
|
| 42 |
+
|
| 43 |
+
print("Audio files generated!")
|