Add piper support via k2-fsa.
Browse files
app.py
CHANGED
|
@@ -50,12 +50,8 @@ type=['wav'])
|
|
| 50 |
base_mms = synth_mms(tts_text, models[tts_lang]['mms'])
|
| 51 |
base_coqui= synth_coqui(tts_text, models[tts_lang]['coqui'])
|
| 52 |
base_espeakng= synth_espeakng(tts_text, models[tts_lang]['espeakng'])
|
| 53 |
-
|
| 54 |
-
#models[tts_lang]['toucan']
|
| 55 |
base_toucan= synth_toucan(tts_text, models[tts_lang]['toucan'])
|
| 56 |
-
|
| 57 |
-
#for m in models[tts_lang]['toucan']:
|
| 58 |
-
# base_toucan= synth_toucan(tts_text, models[tts_lang]['toucan'][0])
|
| 59 |
|
| 60 |
if tts_lang=="swh":
|
| 61 |
finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-swh-female-1")
|
|
@@ -78,7 +74,7 @@ type=['wav'])
|
|
| 78 |
row3 = st.columns([1,1,2])
|
| 79 |
row4 = st.columns([1,1,2])
|
| 80 |
row5 = st.columns([1,1,2])
|
| 81 |
-
|
| 82 |
|
| 83 |
row1[0].write("**Model**")
|
| 84 |
row1[1].write("**Configuration**")
|
|
@@ -103,10 +99,10 @@ type=['wav'])
|
|
| 103 |
row5[1].write("default")
|
| 104 |
row5[2].audio(base_toucan[0], sample_rate = base_toucan[1])
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
|
| 111 |
#################################################################
|
| 112 |
if tts_lang == "swh":
|
|
@@ -252,7 +248,7 @@ On a case-by-case basis, for different languages of interest, I have added:
|
|
| 252 |
- Specific fine-tuned variants of Meta's MMS (either fine-tuned by [Yoach Lacombe](https://huggingface.co/ylacombe), or fine-tuned by me using his scripts).
|
| 253 |
|
| 254 |
I am in the process of adding support for:
|
| 255 |
-
- [**Piper**](https://github.com/rhasspy/piper), a TTS system that supports multiple voices per language and approximately 30 languages.[^5]
|
| 256 |
- [**African Voices**](https://github.com/neulab/AfricanVoices), a CMU research project that fine-tuned synthesizers for different African languages. The site hosting the synthesizers is deprecated but they can be downloaded from Google's Wayback Machine. [^6]
|
| 257 |
|
| 258 |
|
|
|
|
| 50 |
base_mms = synth_mms(tts_text, models[tts_lang]['mms'])
|
| 51 |
base_coqui= synth_coqui(tts_text, models[tts_lang]['coqui'])
|
| 52 |
base_espeakng= synth_espeakng(tts_text, models[tts_lang]['espeakng'])
|
|
|
|
|
|
|
| 53 |
base_toucan= synth_toucan(tts_text, models[tts_lang]['toucan'])
|
| 54 |
+
base_piper synth_piper(tts_text, models[tts_lang]['piper'])
|
|
|
|
|
|
|
| 55 |
|
| 56 |
if tts_lang=="swh":
|
| 57 |
finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-swh-female-1")
|
|
|
|
| 74 |
row3 = st.columns([1,1,2])
|
| 75 |
row4 = st.columns([1,1,2])
|
| 76 |
row5 = st.columns([1,1,2])
|
| 77 |
+
row6 = st.columns([1,1,2])
|
| 78 |
|
| 79 |
row1[0].write("**Model**")
|
| 80 |
row1[1].write("**Configuration**")
|
|
|
|
| 99 |
row5[1].write("default")
|
| 100 |
row5[2].audio(base_toucan[0], sample_rate = base_toucan[1])
|
| 101 |
|
| 102 |
+
if base_piper is not None:
|
| 103 |
+
row6[0].write(f"[Piper](https://github.com/rhasspy/piper)")
|
| 104 |
+
row6[1].write("default")
|
| 105 |
+
row6[2].audio(base_piper[0], sample_rate = base_piper[1])
|
| 106 |
|
| 107 |
#################################################################
|
| 108 |
if tts_lang == "swh":
|
|
|
|
| 248 |
- Specific fine-tuned variants of Meta's MMS (either fine-tuned by [Yoach Lacombe](https://huggingface.co/ylacombe), or fine-tuned by me using his scripts).
|
| 249 |
|
| 250 |
I am in the process of adding support for:
|
| 251 |
+
- [**Piper**](https://github.com/rhasspy/piper), a TTS system that supports multiple voices per language and approximately 30 languages. To test different voices, please see the [Huggingface demo](https://huggingface.co/spaces/k2-fsa/text-to-speech).[^5]
|
| 252 |
- [**African Voices**](https://github.com/neulab/AfricanVoices), a CMU research project that fine-tuned synthesizers for different African languages. The site hosting the synthesizers is deprecated but they can be downloaded from Google's Wayback Machine. [^6]
|
| 253 |
|
| 254 |
|