Spaces:

OOI-FrontierTech
/

tts_mockingbird

Paused

App Files Files Community

khof312 commited on Oct 7, 2024

Commit

444aecb

1 Parent(s): 32515b5

Add piper support via k2-fsa.

Browse files

Files changed (1) hide show

app.py +7 -11

app.py CHANGED Viewed

@@ -50,12 +50,8 @@ type=['wav'])
             base_mms = synth_mms(tts_text, models[tts_lang]['mms'])
             base_coqui= synth_coqui(tts_text, models[tts_lang]['coqui'])
             base_espeakng= synth_espeakng(tts_text, models[tts_lang]['espeakng'])
-            #models[tts_lang]['toucan']
             base_toucan= synth_toucan(tts_text, models[tts_lang]['toucan'])
-            #for m in  models[tts_lang]['toucan']:
-            #    base_toucan= synth_toucan(tts_text, models[tts_lang]['toucan'][0])
             if tts_lang=="swh":
                 finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-swh-female-1")
@@ -78,7 +74,7 @@ type=['wav'])
             row3 = st.columns([1,1,2])
             row4 = st.columns([1,1,2])
             row5 = st.columns([1,1,2])
-            #row6 = st.columns([1,1,2])
             row1[0].write("**Model**")
             row1[1].write("**Configuration**")
@@ -103,10 +99,10 @@ type=['wav'])
             row5[1].write("default")
             row5[2].audio(base_toucan[0], sample_rate = base_toucan[1])
-            #if base_piper is not None:
-            #    row6[0].write(f"[Piper](https://github.com/rhasspy/piper)")
-            #    row6[1].write("default")
-            #    row6[2].audio(base_piper[0], sample_rate = base_piper[1])
             #################################################################
             if tts_lang == "swh":
@@ -252,7 +248,7 @@ On a case-by-case basis, for different languages of interest, I have added:
 - Specific fine-tuned variants of Meta's MMS (either fine-tuned by [Yoach Lacombe](https://huggingface.co/ylacombe), or fine-tuned by me using his scripts).
 I am in the process of adding support for:
-- [**Piper**](https://github.com/rhasspy/piper), a TTS system that supports multiple voices per language and approximately 30 languages.[^5]
 - [**African Voices**](https://github.com/neulab/AfricanVoices), a CMU research project that fine-tuned synthesizers for different African languages. The site hosting the synthesizers is deprecated but they can be downloaded from Google's Wayback Machine. [^6]

             base_mms = synth_mms(tts_text, models[tts_lang]['mms'])
             base_coqui= synth_coqui(tts_text, models[tts_lang]['coqui'])
             base_espeakng= synth_espeakng(tts_text, models[tts_lang]['espeakng'])
             base_toucan= synth_toucan(tts_text, models[tts_lang]['toucan'])
+            base_piper synth_piper(tts_text, models[tts_lang]['piper'])
             if tts_lang=="swh":
                 finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-swh-female-1")
             row3 = st.columns([1,1,2])
             row4 = st.columns([1,1,2])
             row5 = st.columns([1,1,2])
+            row6 = st.columns([1,1,2])
             row1[0].write("**Model**")
             row1[1].write("**Configuration**")
             row5[1].write("default")
             row5[2].audio(base_toucan[0], sample_rate = base_toucan[1])
+            if base_piper is not None:
+                row6[0].write(f"[Piper](https://github.com/rhasspy/piper)")
+                row6[1].write("default")
+                row6[2].audio(base_piper[0], sample_rate = base_piper[1])
             #################################################################
             if tts_lang == "swh":
 - Specific fine-tuned variants of Meta's MMS (either fine-tuned by [Yoach Lacombe](https://huggingface.co/ylacombe), or fine-tuned by me using his scripts).
 I am in the process of adding support for:
+- [**Piper**](https://github.com/rhasspy/piper), a TTS system that supports multiple voices per language and approximately 30 languages. To test different voices, please see the [Huggingface demo](https://huggingface.co/spaces/k2-fsa/text-to-speech).[^5]
 - [**African Voices**](https://github.com/neulab/AfricanVoices), a CMU research project that fine-tuned synthesizers for different African languages. The site hosting the synthesizers is deprecated but they can be downloaded from Google's Wayback Machine. [^6]