Update app.py
Browse files
app.py
CHANGED
|
@@ -48,36 +48,14 @@ def generate_audio(text, model_id, language):
|
|
| 48 |
pipe_dict["language"] = language
|
| 49 |
pipe_dict["original_pipe"] = pipeline("text-to-speech", model=default_model_per_language[language], device=0)
|
| 50 |
|
| 51 |
-
# if pipe_dict["current_model"] != model_id:
|
| 52 |
-
# gr.Warning("Model has changed - loading new model")
|
| 53 |
-
# pipe_dict["pipe"] = pipeline("text-to-speech", model=model_id, device=0)
|
| 54 |
-
# pipe_dict["current_model"] = model_id
|
| 55 |
-
|
| 56 |
num_speakers = pipe_dict["pipe"].model.config.num_speakers
|
| 57 |
|
| 58 |
out = []
|
| 59 |
-
|
| 60 |
output = pipe_dict["original_pipe"](text)
|
| 61 |
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label=f"Finetuned model prediction {default_model_per_language[language]}", show_label=True,
|
| 62 |
visible=True)
|
| 63 |
|
| 64 |
-
# out.extend([gr.Audio(visible=False)])
|
| 65 |
-
|
| 66 |
-
# if num_speakers>1:
|
| 67 |
-
# for i in range(min(num_speakers, max_speakers - 1)):
|
| 68 |
-
# forward_params = {"speaker_id": i}
|
| 69 |
-
# output = pipe_dict["pipe"](text, forward_params=forward_params)
|
| 70 |
-
|
| 71 |
-
# output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label=f"Generated Audio - speaker {i}", show_label=True,
|
| 72 |
-
# visible=True)
|
| 73 |
-
# out.append(output)
|
| 74 |
-
# out.extend([gr.Audio(visible=False)]*(max_speakers-num_speakers))
|
| 75 |
-
# else:
|
| 76 |
-
# output = pipe_dict["pipe"](text)
|
| 77 |
-
# output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label="Generated Audio - Mono speaker", show_label=True,
|
| 78 |
-
# visible=True)
|
| 79 |
-
# out.append(output)
|
| 80 |
-
# out.extend([gr.Audio(visible=False)]*(max_speakers-2))
|
| 81 |
return output
|
| 82 |
|
| 83 |
|
|
|
|
| 48 |
pipe_dict["language"] = language
|
| 49 |
pipe_dict["original_pipe"] = pipeline("text-to-speech", model=default_model_per_language[language], device=0)
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
num_speakers = pipe_dict["pipe"].model.config.num_speakers
|
| 52 |
|
| 53 |
out = []
|
| 54 |
+
|
| 55 |
output = pipe_dict["original_pipe"](text)
|
| 56 |
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label=f"Finetuned model prediction {default_model_per_language[language]}", show_label=True,
|
| 57 |
visible=True)
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
return output
|
| 60 |
|
| 61 |
|