Spaces:
Running
Running
Added Microsoft Edge TTS HF Space; model basenames
Browse files
app.py
CHANGED
|
@@ -63,6 +63,9 @@ AVAILABLE_MODELS = {
|
|
| 63 |
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29.0 4.42.0
|
| 64 |
'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29.0 4.42.0
|
| 65 |
|
|
|
|
|
|
|
|
|
|
| 66 |
# TTS w issues
|
| 67 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
| 68 |
# 'amphion/Text-to-Speech': '/predict#0', # old running space, takes a whole minute to synthesize
|
|
@@ -77,63 +80,63 @@ AVAILABLE_MODELS = {
|
|
| 77 |
HF_SPACES = {
|
| 78 |
# XTTS v2
|
| 79 |
'coqui/xtts': {
|
| 80 |
-
'name': '
|
| 81 |
'function': '1',
|
| 82 |
'text_param_index': 0,
|
| 83 |
'return_audio_index': 1,
|
| 84 |
},
|
| 85 |
# WhisperSpeech
|
| 86 |
'collabora/WhisperSpeech': {
|
| 87 |
-
'name': '
|
| 88 |
'function': '/whisper_speech_demo',
|
| 89 |
'text_param_index': 0,
|
| 90 |
'return_audio_index': 0,
|
| 91 |
},
|
| 92 |
# OpenVoice (MyShell.ai)
|
| 93 |
'myshell-ai/OpenVoice': {
|
| 94 |
-
'name':'
|
| 95 |
'function': '1',
|
| 96 |
'text_param_index': 0,
|
| 97 |
'return_audio_index': 1,
|
| 98 |
},
|
| 99 |
# OpenVoice v2 (MyShell.ai)
|
| 100 |
'myshell-ai/OpenVoiceV2': {
|
| 101 |
-
'name':'
|
| 102 |
'function': '1',
|
| 103 |
'text_param_index': 0,
|
| 104 |
'return_audio_index': 1,
|
| 105 |
},
|
| 106 |
# MetaVoice
|
| 107 |
'mrfakename/MetaVoice-1B-v0.1': {
|
| 108 |
-
'name':'
|
| 109 |
'function': '/tts',
|
| 110 |
'text_param_index': 0,
|
| 111 |
'return_audio_index': 0,
|
| 112 |
},
|
| 113 |
# xVASynth (CPU)
|
| 114 |
'Pendrokar/xVASynth': {
|
| 115 |
-
'name': '
|
| 116 |
'function': '/predict',
|
| 117 |
'text_param_index': 0,
|
| 118 |
'return_audio_index': 0,
|
| 119 |
},
|
| 120 |
# CoquiTTS (CPU)
|
| 121 |
'coqui/CoquiTTS': {
|
| 122 |
-
'name': '
|
| 123 |
'function': '0',
|
| 124 |
'text_param_index': 0,
|
| 125 |
'return_audio_index': 0,
|
| 126 |
},
|
| 127 |
# HierSpeech_TTS
|
| 128 |
'LeeSangHoon/HierSpeech_TTS': {
|
| 129 |
-
'name': '
|
| 130 |
'function': '/predict',
|
| 131 |
'text_param_index': 0,
|
| 132 |
'return_audio_index': 0,
|
| 133 |
},
|
| 134 |
# MeloTTS (MyShell.ai)
|
| 135 |
'mrfakename/MeloTTS': {
|
| 136 |
-
'name': '
|
| 137 |
'function': '/synthesize',
|
| 138 |
'text_param_index': 0,
|
| 139 |
'return_audio_index': 0,
|
|
@@ -141,26 +144,33 @@ HF_SPACES = {
|
|
| 141 |
|
| 142 |
# Parler
|
| 143 |
'parler-tts/parler_tts': {
|
| 144 |
-
'name': '
|
| 145 |
'function': '/gen_tts',
|
| 146 |
'text_param_index': 0,
|
| 147 |
'return_audio_index': 0,
|
| 148 |
},
|
| 149 |
# Parler
|
| 150 |
'parler-tts/parler_tts_mini': {
|
| 151 |
-
'name': '
|
| 152 |
'function': '/gen_tts',
|
| 153 |
'text_param_index': 0,
|
| 154 |
'return_audio_index': 0,
|
| 155 |
},
|
| 156 |
# Parler, using Expresso dataset
|
| 157 |
'parler-tts/parler-tts-expresso': {
|
| 158 |
-
'name': '
|
| 159 |
'function': '/gen_tts',
|
| 160 |
'text_param_index': 0,
|
| 161 |
'return_audio_index': 0,
|
| 162 |
},
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
# TTS w issues
|
| 166 |
# 'PolyAI/pheme': '/predict#0', #sleepy HF Space
|
|
@@ -239,6 +249,11 @@ OVERRIDE_INPUTS = {
|
|
| 239 |
'parler-tts/parler-tts-expresso': {
|
| 240 |
1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
|
| 241 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
}
|
| 243 |
|
| 244 |
hf_clients = {}
|
|
@@ -579,12 +594,24 @@ def get_leaderboard(reveal_prelim = False):
|
|
| 579 |
def make_link_to_space(model_name):
|
| 580 |
# create a anchor link if a HF space
|
| 581 |
style = 'text-decoration: underline;text-decoration-style: dotted;'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
if model_name in AVAILABLE_MODELS:
|
| 583 |
style += 'color: var(--link-text-color);'
|
|
|
|
| 584 |
else:
|
| 585 |
style += 'font-style: italic;'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 586 |
if '/' in model_name:
|
| 587 |
-
return '🤗 <a style="'
|
| 588 |
|
| 589 |
# otherwise just return the model name
|
| 590 |
return model_name
|
|
@@ -817,7 +844,6 @@ def synthandreturn(text):
|
|
| 817 |
log_text(text)
|
| 818 |
print("[debug] Using", mdl1, mdl2)
|
| 819 |
def predict_and_update_result(text, model, result_storage):
|
| 820 |
-
print(model)
|
| 821 |
# 3 attempts
|
| 822 |
attempt_count = 0
|
| 823 |
while attempt_count < 3:
|
|
@@ -829,7 +855,7 @@ def synthandreturn(text):
|
|
| 829 |
hf_clients[model] = Client(model, hf_token=hf_token)
|
| 830 |
mdl_space = hf_clients[model]
|
| 831 |
|
| 832 |
-
print(f"{model}: Fetching endpoints of HF Space")
|
| 833 |
# assume the index is one of the first 9 return params
|
| 834 |
return_audio_index = int(HF_SPACES[model]['return_audio_index'])
|
| 835 |
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
|
|
|
| 63 |
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29.0 4.42.0
|
| 64 |
'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29.0 4.42.0
|
| 65 |
|
| 66 |
+
# Microsoft Edge TTS
|
| 67 |
+
'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech',
|
| 68 |
+
|
| 69 |
# TTS w issues
|
| 70 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
| 71 |
# 'amphion/Text-to-Speech': '/predict#0', # old running space, takes a whole minute to synthesize
|
|
|
|
| 80 |
HF_SPACES = {
|
| 81 |
# XTTS v2
|
| 82 |
'coqui/xtts': {
|
| 83 |
+
'name': 'XTTS v2',
|
| 84 |
'function': '1',
|
| 85 |
'text_param_index': 0,
|
| 86 |
'return_audio_index': 1,
|
| 87 |
},
|
| 88 |
# WhisperSpeech
|
| 89 |
'collabora/WhisperSpeech': {
|
| 90 |
+
'name': 'WhisperSpeech',
|
| 91 |
'function': '/whisper_speech_demo',
|
| 92 |
'text_param_index': 0,
|
| 93 |
'return_audio_index': 0,
|
| 94 |
},
|
| 95 |
# OpenVoice (MyShell.ai)
|
| 96 |
'myshell-ai/OpenVoice': {
|
| 97 |
+
'name':'OpenVoice',
|
| 98 |
'function': '1',
|
| 99 |
'text_param_index': 0,
|
| 100 |
'return_audio_index': 1,
|
| 101 |
},
|
| 102 |
# OpenVoice v2 (MyShell.ai)
|
| 103 |
'myshell-ai/OpenVoiceV2': {
|
| 104 |
+
'name':'OpenVoice v2',
|
| 105 |
'function': '1',
|
| 106 |
'text_param_index': 0,
|
| 107 |
'return_audio_index': 1,
|
| 108 |
},
|
| 109 |
# MetaVoice
|
| 110 |
'mrfakename/MetaVoice-1B-v0.1': {
|
| 111 |
+
'name':'MetaVoice',
|
| 112 |
'function': '/tts',
|
| 113 |
'text_param_index': 0,
|
| 114 |
'return_audio_index': 0,
|
| 115 |
},
|
| 116 |
# xVASynth (CPU)
|
| 117 |
'Pendrokar/xVASynth': {
|
| 118 |
+
'name': 'xVASynth v3',
|
| 119 |
'function': '/predict',
|
| 120 |
'text_param_index': 0,
|
| 121 |
'return_audio_index': 0,
|
| 122 |
},
|
| 123 |
# CoquiTTS (CPU)
|
| 124 |
'coqui/CoquiTTS': {
|
| 125 |
+
'name': 'CoquiTTS',
|
| 126 |
'function': '0',
|
| 127 |
'text_param_index': 0,
|
| 128 |
'return_audio_index': 0,
|
| 129 |
},
|
| 130 |
# HierSpeech_TTS
|
| 131 |
'LeeSangHoon/HierSpeech_TTS': {
|
| 132 |
+
'name': 'HierSpeech++',
|
| 133 |
'function': '/predict',
|
| 134 |
'text_param_index': 0,
|
| 135 |
'return_audio_index': 0,
|
| 136 |
},
|
| 137 |
# MeloTTS (MyShell.ai)
|
| 138 |
'mrfakename/MeloTTS': {
|
| 139 |
+
'name': 'MeloTTS',
|
| 140 |
'function': '/synthesize',
|
| 141 |
'text_param_index': 0,
|
| 142 |
'return_audio_index': 0,
|
|
|
|
| 144 |
|
| 145 |
# Parler
|
| 146 |
'parler-tts/parler_tts': {
|
| 147 |
+
'name': 'Parler',
|
| 148 |
'function': '/gen_tts',
|
| 149 |
'text_param_index': 0,
|
| 150 |
'return_audio_index': 0,
|
| 151 |
},
|
| 152 |
# Parler
|
| 153 |
'parler-tts/parler_tts_mini': {
|
| 154 |
+
'name': 'Parler Mini',
|
| 155 |
'function': '/gen_tts',
|
| 156 |
'text_param_index': 0,
|
| 157 |
'return_audio_index': 0,
|
| 158 |
},
|
| 159 |
# Parler, using Expresso dataset
|
| 160 |
'parler-tts/parler-tts-expresso': {
|
| 161 |
+
'name': 'Parler Expresso',
|
| 162 |
'function': '/gen_tts',
|
| 163 |
'text_param_index': 0,
|
| 164 |
'return_audio_index': 0,
|
| 165 |
},
|
| 166 |
|
| 167 |
+
# Microsoft Edge TTS
|
| 168 |
+
'innoai/Edge-TTS-Text-to-Speech': {
|
| 169 |
+
'name': 'Edge TTS',
|
| 170 |
+
'function': '/predict',
|
| 171 |
+
'text_param_index': 0,
|
| 172 |
+
'return_audio_index': 0,
|
| 173 |
+
},
|
| 174 |
|
| 175 |
# TTS w issues
|
| 176 |
# 'PolyAI/pheme': '/predict#0', #sleepy HF Space
|
|
|
|
| 249 |
'parler-tts/parler-tts-expresso': {
|
| 250 |
1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
|
| 251 |
},
|
| 252 |
+
'innoai/Edge-TTS-Text-to-Speech': {
|
| 253 |
+
1: 'en-US-EmmaMultilingualNeural - en-US (Female)', # voice
|
| 254 |
+
2: 0, # pace rate
|
| 255 |
+
3: 0, # pitch
|
| 256 |
+
},
|
| 257 |
}
|
| 258 |
|
| 259 |
hf_clients = {}
|
|
|
|
| 594 |
def make_link_to_space(model_name):
|
| 595 |
# create a anchor link if a HF space
|
| 596 |
style = 'text-decoration: underline;text-decoration-style: dotted;'
|
| 597 |
+
title = ''
|
| 598 |
+
|
| 599 |
+
# bolden actual name
|
| 600 |
+
# model_name_split = model_name.split('/')
|
| 601 |
+
# model_name_split = model_name_split[:-1].join('/') +'/<strong>'+ model_name_split[-1] +'</strong>'
|
| 602 |
if model_name in AVAILABLE_MODELS:
|
| 603 |
style += 'color: var(--link-text-color);'
|
| 604 |
+
title = model_name
|
| 605 |
else:
|
| 606 |
style += 'font-style: italic;'
|
| 607 |
+
title = 'Disabled for Arena'
|
| 608 |
+
|
| 609 |
+
model_basename = model_name
|
| 610 |
+
if model_name in HF_SPACES:
|
| 611 |
+
model_basename = HF_SPACES[model_name]['name']
|
| 612 |
+
|
| 613 |
if '/' in model_name:
|
| 614 |
+
return '🤗 <a style="'+ style +'" title="'+ title +'" href="'+ 'https://huggingface.co/spaces/'+ model_name +'">'+ model_basename +'</a>'
|
| 615 |
|
| 616 |
# otherwise just return the model name
|
| 617 |
return model_name
|
|
|
|
| 844 |
log_text(text)
|
| 845 |
print("[debug] Using", mdl1, mdl2)
|
| 846 |
def predict_and_update_result(text, model, result_storage):
|
|
|
|
| 847 |
# 3 attempts
|
| 848 |
attempt_count = 0
|
| 849 |
while attempt_count < 3:
|
|
|
|
| 855 |
hf_clients[model] = Client(model, hf_token=hf_token)
|
| 856 |
mdl_space = hf_clients[model]
|
| 857 |
|
| 858 |
+
# print(f"{model}: Fetching endpoints of HF Space")
|
| 859 |
# assume the index is one of the first 9 return params
|
| 860 |
return_audio_index = int(HF_SPACES[model]['return_audio_index'])
|
| 861 |
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|