Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import os | |
| import sys | |
| import requests | |
| import json | |
| from huggingface_hub import HfApi | |
| # start xVASynth service (no HTTP) | |
| import resources.app.no_server as xvaserver | |
| from gr_client import BlocksDemo | |
| # TODO: move snapshots to common folder & use an models array | |
| # NVIDIA NeMo models | |
| hf_model_name = "Pendrokar/xvapitch_nvidia" | |
| model_repo = HfApi() | |
| commits = model_repo.list_repo_commits(repo_id=hf_model_name) | |
| latest_commit_sha = commits[0].commit_id | |
| hf_cache_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_nvidia/snapshots/{latest_commit_sha}/' | |
| models_path = hf_cache_models_path | |
| # Expresso models | |
| commits = model_repo.list_repo_commits(repo_id='Pendrokar/xvapitch_expresso') | |
| latest_commit_sha = commits[0].commit_id | |
| hf_cache_expresso_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvapitch_expresso/snapshots/{latest_commit_sha}/' | |
| # Lojban model | |
| commits = model_repo.list_repo_commits(repo_id='Pendrokar/xvasynth_lojban') | |
| latest_commit_sha = commits[0].commit_id | |
| hf_cache_lojban_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvasynth_lojban/snapshots/{latest_commit_sha}/' | |
| # Robotic model | |
| hf_cache_robotic_models_path = '' | |
| try: | |
| commits = model_repo.list_repo_commits(repo_id='Pendrokar/xvasynth_cabal', token=os.getenv('HF_TOKEN')) | |
| latest_commit_sha = commits[0].commit_id | |
| hf_cache_robotic_models_path = f'/home/user/.cache/huggingface/hub/models--Pendrokar--xvasynth_cabal/snapshots/{latest_commit_sha}/' | |
| except: | |
| print('Robotic voice not loaded!') | |
| pass | |
| current_voice_model = None | |
| current_voice_type = None | |
| base_speaker_emb = '' | |
| def load_model(voice_model_name): | |
| global current_voice_model, current_voice_type, base_speaker_emb | |
| if voice_model_name == 'x_selpahi': | |
| # Lojban | |
| model_path = hf_cache_lojban_models_path + voice_model_name | |
| model_type = 'FastPitch1.1' | |
| else: | |
| model_path = models_path + voice_model_name | |
| if voice_model_name == 'cnc_cabal': | |
| model_path = hf_cache_robotic_models_path + voice_model_name | |
| if voice_model_name[:5] == 'x_ex0': | |
| model_path = hf_cache_expresso_models_path + voice_model_name | |
| model_type = 'xVAPitch' | |
| language = 'en' # seems to have no effect if generated text is for a different language | |
| data = { | |
| 'outputs': None, | |
| 'version': '3.0', | |
| 'model': model_path, | |
| 'modelType': model_type, | |
| 'base_lang': language, | |
| 'pluginsContext': '{}', | |
| } | |
| print('Loading voice model...') | |
| try: | |
| json_data = xvaserver.loadModel(data) | |
| current_voice_model = voice_model_name | |
| current_voice_type = model_type | |
| with open(model_path + '.json', 'r', encoding='utf-8') as f: | |
| voice_model_json = json.load(f) | |
| if model_type == 'xVAPitch': | |
| base_speaker_emb = voice_model_json['games'][0]['base_speaker_emb'] | |
| elif model_type == 'FastPitch1.1': | |
| base_speaker_emb = voice_model_json['games'][0]['resemblyzer'] | |
| except requests.exceptions.RequestException as err: | |
| print(f'FAILED to load voice model: {err}') | |
| return base_speaker_emb | |
| class LocalBlocksDemo(BlocksDemo): | |
| def predict( | |
| self, | |
| input_text, | |
| voice, | |
| lang, | |
| pacing, | |
| pitch, | |
| energy, | |
| anger, | |
| happy, | |
| sad, | |
| surprise, | |
| use_deepmoji | |
| ): | |
| global current_voice_model, current_voice_type, base_speaker_emb | |
| # grab only the first 1000 characters | |
| input_text = input_text[:1000] | |
| # load voice model if not the current model | |
| if (current_voice_model != voice): | |
| load_model(voice) | |
| model_type = current_voice_type | |
| pace = pacing if pacing else 1.0 | |
| save_path = '/tmp/xvapitch_audio_sample.wav' | |
| language = lang | |
| use_sr = 0 | |
| use_cleanup = 0 | |
| pluginsContext = {} | |
| pluginsContext["mantella_settings"] = { | |
| "emAngry": (anger if anger > 0 else 0), | |
| "emHappy": (happy if happy > 0 else 0), | |
| "emSad": (sad if sad > 0 else 0), | |
| "emSurprise": (surprise if surprise > 0 else 0), | |
| "run_model": use_deepmoji | |
| } | |
| data = { | |
| 'pluginsContext': json.dumps(pluginsContext), | |
| 'modelType': model_type, | |
| # pad with whitespaces as a workaround to avoid cutoffs | |
| 'sequence': input_text.center(len(input_text) + 2, ' '), | |
| 'pace': pace, | |
| 'outfile': save_path, | |
| 'vocoder': 'n/a', | |
| 'base_lang': language, | |
| 'base_emb': base_speaker_emb, | |
| 'useSR': use_sr, | |
| 'useCleanup': use_cleanup, | |
| } | |
| print('Synthesizing...') | |
| try: | |
| json_data = xvaserver.synthesize(data) | |
| # response = requests.post('http://0.0.0.0:8008/synthesize', json=data, timeout=60) | |
| # response.raise_for_status() # If the response contains an HTTP error status code, raise an exception | |
| # json_data = json.loads(response.text) | |
| except requests.exceptions.RequestException as err: | |
| print('FAILED to synthesize: {err}') | |
| save_path = '' | |
| response = {'text': '{"message": "Failed"}'} | |
| json_data = { | |
| 'arpabet': ['Failed'], | |
| 'durations': [0], | |
| 'em_anger': anger, | |
| 'em_happy': happy, | |
| 'em_sad': sad, | |
| 'em_surprise': surprise, | |
| } | |
| # print('server.log contents:') | |
| # with open('resources/app/server.log', 'r') as f: | |
| # print(f.read()) | |
| arpabet_html = '' | |
| if voice == 'x_selpahi': | |
| em_angry = 0 | |
| em_happy = 0 | |
| em_sad = 0 | |
| em_surprise = 0 | |
| else: | |
| arpabet_html = '<h6>ARPAbet & Durations</h6>' | |
| arpabet_html += '<table style="margin: 0 var(--size-2)"><tbody><tr>' | |
| arpabet_nopad = json_data['arpabet'].split('|PAD|') | |
| arpabet_symbols = json_data['arpabet'].split('|') | |
| wpad_len = len(arpabet_symbols) | |
| nopad_len = len(arpabet_nopad) | |
| total_dur_length = 0 | |
| for symb_i in range(wpad_len): | |
| if (arpabet_symbols[symb_i] == '<PAD>'): | |
| continue | |
| total_dur_length += float(json_data['durations'][symb_i]) | |
| for symb_i in range(wpad_len): | |
| if (arpabet_symbols[symb_i] == '<PAD>'): | |
| continue | |
| arpabet_length = float(json_data['durations'][symb_i]) | |
| cell_width = round(arpabet_length / total_dur_length * 100, 2) | |
| arpabet_html += '<td class="arpabet" style="width: '\ | |
| + str(cell_width)\ | |
| +'%">'\ | |
| + arpabet_symbols[symb_i]\ | |
| + '</td> ' | |
| arpabet_html += '<tr></tbody></table>' | |
| if use_deepmoji: | |
| em_angry = round(json_data['em_angry'][0], 2) | |
| em_happy = round(json_data['em_happy'][0], 2) | |
| em_sad = round(json_data['em_sad'][0], 2) | |
| em_surprise = round(json_data['em_surprise'][0], 2) | |
| else: | |
| em_angry = anger | |
| em_happy = happy | |
| em_sad = sad | |
| em_surprise = surprise | |
| return [ | |
| save_path, | |
| arpabet_html, | |
| em_angry, | |
| em_happy, | |
| em_sad, | |
| em_surprise, | |
| json_data | |
| ] | |
| if __name__ == "__main__": | |
| print('running custom Gradio interface') | |
| demo = LocalBlocksDemo(models_path, hf_cache_lojban_models_path, hf_cache_robotic_models_path, hf_cache_expresso_models_path) | |
| demo.block.launch(share=True, show_api=True, show_error=True) | |
| # MCP needs Gradio 5, which needs a newer version of Python that xVASynth does not support | |
| # demo.block.launch(share=True, show_api=True, show_error=True, mcp_server=True) | |