Spaces:
Running
Running
Working spaces: xVASynth, MetaVoice, CoquiTTS
Browse files
app.py
CHANGED
|
@@ -31,12 +31,26 @@ with open('harvard_sentences.txt') as f:
|
|
| 31 |
# Constants
|
| 32 |
####################################
|
| 33 |
AVAILABLE_MODELS = {
|
| 34 |
-
'XTTSv2': 'xtts',
|
| 35 |
-
'WhisperSpeech': 'whisperspeech',
|
| 36 |
-
'ElevenLabs': 'eleven',
|
| 37 |
-
'OpenVoice': 'openvoice',
|
| 38 |
-
'Pheme': 'pheme',
|
| 39 |
-
'MetaVoice': 'metavoice'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
}
|
| 41 |
|
| 42 |
SPACE_ID = os.getenv('SPACE_ID')
|
|
@@ -118,6 +132,7 @@ if not os.path.isfile(DB_PATH):
|
|
| 118 |
# Create DB table (if doesn't exist)
|
| 119 |
create_db_if_missing()
|
| 120 |
|
|
|
|
| 121 |
# Sync local DB with remote repo every 5 minute (only if a change is detected)
|
| 122 |
scheduler = CommitScheduler(
|
| 123 |
repo_id=DB_DATASET_ID,
|
|
@@ -133,7 +148,7 @@ scheduler = CommitScheduler(
|
|
| 133 |
####################################
|
| 134 |
# Router API
|
| 135 |
####################################
|
| 136 |
-
router = Client("TTS-AGI/tts-router", hf_token=
|
| 137 |
####################################
|
| 138 |
# Gradio app
|
| 139 |
####################################
|
|
@@ -291,6 +306,9 @@ model_licenses = {
|
|
| 291 |
'metavoice': 'Apache 2.0',
|
| 292 |
'elevenlabs': 'Proprietary',
|
| 293 |
'whisperspeech': 'MIT',
|
|
|
|
|
|
|
|
|
|
| 294 |
}
|
| 295 |
model_links = {
|
| 296 |
'styletts2': 'https://github.com/yl4579/StyleTTS2',
|
|
@@ -564,7 +582,50 @@ def synthandreturn(text):
|
|
| 564 |
def predict_and_update_result(text, model, result_storage):
|
| 565 |
try:
|
| 566 |
if model in AVAILABLE_MODELS:
|
| 567 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
else:
|
| 569 |
result = router.predict(text, model.lower(), api_name="/synthesize")
|
| 570 |
except:
|
|
@@ -593,6 +654,30 @@ def synthandreturn(text):
|
|
| 593 |
# doloudnorm(result)
|
| 594 |
# except:
|
| 595 |
# pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 596 |
results = {}
|
| 597 |
thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1, results))
|
| 598 |
thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2, results))
|
|
|
|
| 31 |
# Constants
|
| 32 |
####################################
|
| 33 |
AVAILABLE_MODELS = {
|
| 34 |
+
# 'XTTSv2': 'xtts',
|
| 35 |
+
# 'WhisperSpeech': 'whisperspeech',
|
| 36 |
+
# 'ElevenLabs': 'eleven',
|
| 37 |
+
# 'OpenVoice': 'openvoice',
|
| 38 |
+
# 'Pheme': 'pheme',
|
| 39 |
+
# 'MetaVoice': 'metavoice'
|
| 40 |
+
|
| 41 |
+
# '<Space>': func#<return-index-of-audio-param>
|
| 42 |
+
# 'coqui/xtts': '1#1', #FIXME: Space defaults
|
| 43 |
+
# 'collabora/WhisperSpeech': '/whisper_speech_demo#0', #FIXME: invalid url for third param
|
| 44 |
+
# 'myshell-ai/OpenVoice': '1#1', #FIXME: example audio path
|
| 45 |
+
# 'PolyAI/pheme': 'PolyAI/pheme', #FIXME
|
| 46 |
+
'mrfakename/MetaVoice-1B-v0.1': '/tts#0',
|
| 47 |
+
|
| 48 |
+
# xVASynth (CPU)
|
| 49 |
+
'Pendrokar/xVASynth': '/predict#0',
|
| 50 |
+
|
| 51 |
+
# CoquiTTS (CPU)
|
| 52 |
+
'coqui/CoquiTTS': '0#0',
|
| 53 |
+
# 'pytorch/Tacotron2': '0#0', #old gradio
|
| 54 |
}
|
| 55 |
|
| 56 |
SPACE_ID = os.getenv('SPACE_ID')
|
|
|
|
| 132 |
# Create DB table (if doesn't exist)
|
| 133 |
create_db_if_missing()
|
| 134 |
|
| 135 |
+
hf_token = os.getenv('HF_TOKEN')
|
| 136 |
# Sync local DB with remote repo every 5 minute (only if a change is detected)
|
| 137 |
scheduler = CommitScheduler(
|
| 138 |
repo_id=DB_DATASET_ID,
|
|
|
|
| 148 |
####################################
|
| 149 |
# Router API
|
| 150 |
####################################
|
| 151 |
+
# router = Client("TTS-AGI/tts-router", hf_token=hf_token)
|
| 152 |
####################################
|
| 153 |
# Gradio app
|
| 154 |
####################################
|
|
|
|
| 306 |
'metavoice': 'Apache 2.0',
|
| 307 |
'elevenlabs': 'Proprietary',
|
| 308 |
'whisperspeech': 'MIT',
|
| 309 |
+
|
| 310 |
+
'Pendrokar/xVASynth': 'GPT3',
|
| 311 |
+
'Pendrokar/xVASynthStreaming': 'GPT3',
|
| 312 |
}
|
| 313 |
model_links = {
|
| 314 |
'styletts2': 'https://github.com/yl4579/StyleTTS2',
|
|
|
|
| 582 |
def predict_and_update_result(text, model, result_storage):
|
| 583 |
try:
|
| 584 |
if model in AVAILABLE_MODELS:
|
| 585 |
+
if '/' in model:
|
| 586 |
+
# Use public HF Space
|
| 587 |
+
mdl_space = Client(model, hf_token=hf_token)
|
| 588 |
+
# assume the index is one of the first 9 return params
|
| 589 |
+
return_audio_index = int(AVAILABLE_MODELS[model][-1])
|
| 590 |
+
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
| 591 |
+
|
| 592 |
+
# has named endpoint
|
| 593 |
+
if '/' == AVAILABLE_MODELS[model][:1]:
|
| 594 |
+
# assume the index is one of the first 9 params
|
| 595 |
+
api_name = AVAILABLE_MODELS[model][:-2]
|
| 596 |
+
|
| 597 |
+
space_inputs = _get_param_examples(
|
| 598 |
+
endpoints['named_endpoints'][api_name]['parameters']
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
# force text to the text input
|
| 602 |
+
space_inputs[0] = text
|
| 603 |
+
|
| 604 |
+
# print(space_inputs)
|
| 605 |
+
results = mdl_space.predict(*space_inputs, api_name=api_name)
|
| 606 |
+
|
| 607 |
+
# has unnamed endpoint
|
| 608 |
+
else:
|
| 609 |
+
# endpoint index is the first character
|
| 610 |
+
fn_index = int(AVAILABLE_MODELS[model][0])
|
| 611 |
+
|
| 612 |
+
space_inputs = _get_param_examples(
|
| 613 |
+
endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
|
| 614 |
+
)
|
| 615 |
+
|
| 616 |
+
# force text
|
| 617 |
+
space_inputs[0] = text
|
| 618 |
+
|
| 619 |
+
# OpenVoice
|
| 620 |
+
# space_inputs[2] = "examples/speaker2.mp3"
|
| 621 |
+
|
| 622 |
+
results = mdl_space.predict(*space_inputs, fn_index=fn_index)
|
| 623 |
+
|
| 624 |
+
# return path to audio
|
| 625 |
+
result = results[return_audio_index] if (not isinstance(results, str)) else results
|
| 626 |
+
else:
|
| 627 |
+
# Use the private HF Space
|
| 628 |
+
result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|
| 629 |
else:
|
| 630 |
result = router.predict(text, model.lower(), api_name="/synthesize")
|
| 631 |
except:
|
|
|
|
| 654 |
# doloudnorm(result)
|
| 655 |
# except:
|
| 656 |
# pass
|
| 657 |
+
|
| 658 |
+
def _get_param_examples(parameters):
|
| 659 |
+
example_inputs = []
|
| 660 |
+
for param_info in parameters:
|
| 661 |
+
if (
|
| 662 |
+
param_info['component'] == 'Radio'
|
| 663 |
+
or param_info['component'] == 'Dropdown'
|
| 664 |
+
or param_info['component'] == 'Audio'
|
| 665 |
+
or param_info['python_type']['type'] == 'str'
|
| 666 |
+
):
|
| 667 |
+
example_inputs.append(str(param_info['example_input']))
|
| 668 |
+
continue
|
| 669 |
+
if param_info['python_type']['type'] == 'int':
|
| 670 |
+
example_inputs.append(int(param_info['example_input']))
|
| 671 |
+
continue
|
| 672 |
+
if param_info['python_type']['type'] == 'float':
|
| 673 |
+
example_inputs.append(float(param_info['example_input']))
|
| 674 |
+
continue
|
| 675 |
+
if param_info['python_type']['type'] == 'bool':
|
| 676 |
+
example_inputs.append(bool(param_info['example_input']))
|
| 677 |
+
continue
|
| 678 |
+
|
| 679 |
+
return example_inputs
|
| 680 |
+
|
| 681 |
results = {}
|
| 682 |
thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1, results))
|
| 683 |
thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2, results))
|