Spaces:

Ane4ka
/

422_MTDDP

Running

App Files Files Community

ASureevaA commited on 9 days ago

Commit

c6a3c71

1 Parent(s): fb68e9f

fix mms

Browse files

Files changed (1) hide show

app.py +14 -19

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import tempfile
 from typing import List, Tuple, Any
 import gradio as gr
-import soundfile as soundfile_module
 import torch
 import torch.nn.functional as torch_functional
 from gtts import gTTS
@@ -202,7 +202,7 @@ def get_mms_tts_components():
     if "mms_tts_pipeline" not in MODEL_STORE:
         tts_pipeline = pipeline(
             task="text-to-speech",
-            model="kakao-enterprise/vits-ljs",
         )
         MODEL_STORE["mms_tts_pipeline"] = tts_pipeline
@@ -279,22 +279,17 @@ def synthesize_speech(text_value: str, model_key: str):
             text_to_speech_engine = gTTS(text=text_value, lang="ru")
             text_to_speech_engine.save(file_object.name)
             return file_object.name
-    if model_key == "vits-ljs":
-        tts_pipeline = get_mms_tts_components()
-        tts_output = tts_pipeline(text_value)
-        audio_array = tts_output["audio"]
-        sampling_rate_value = tts_output["sampling_rate"]
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as file_object:
-            soundfile_module.write(
-                file_object.name,
-                audio_array,
-                sampling_rate_value,
-            )
-            return file_object.name
     raise ValueError(f"Неизвестная модель: {model_key}")
@@ -692,11 +687,11 @@ def build_interface():
                     lines=3,
                 )
                 tts_model_selector = gr.Dropdown(
-                    choices=["vits-ljs", "Google TTS"],
                     label="Выберите модель",
-                    value="vits-ljs",
                     info=(
-                        "kakao-enterprise/vits-ljs\n"
                         "Google TTS"
                     ),
                 )

 from typing import List, Tuple, Any
 import gradio as gr
+import soundfile as sf
 import torch
 import torch.nn.functional as torch_functional
 from gtts import gTTS
     if "mms_tts_pipeline" not in MODEL_STORE:
         tts_pipeline = pipeline(
             task="text-to-speech",
+            model="facebook/mms-tts-rus",
         )
         MODEL_STORE["mms_tts_pipeline"] = tts_pipeline
             text_to_speech_engine = gTTS(text=text_value, lang="ru")
             text_to_speech_engine.save(file_object.name)
             return file_object.name
+    elif model_key == "mms":
+        model = VitsModel.from_pretrained("facebook/mms-tts-rus")
+        tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-rus")
+        inputs = tokenizer(text_value, return_tensors="pt")
+        with torch.no_grad():
+            output = model(**inputs).waveform
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            sf.write(f.name, output.numpy().squeeze(), model.config.sampling_rate)
+            return f.name
     raise ValueError(f"Неизвестная модель: {model_key}")
                     lines=3,
                 )
                 tts_model_selector = gr.Dropdown(
+                    choices=["mms", "Google TTS"],
                     label="Выберите модель",
+                    value="mms",
                     info=(
+                        "facebook/mms-tts-rus\n"
                         "Google TTS"
                     ),
                 )