Spaces:

Pendrokar
/

xVASynth-TTS

Running on CPU Upgrade

App Files Files Community

Pendrokar commited on Mar 8, 2024

Commit

11acbd9

1 Parent(s): d4c907a

component inits

Browse files

Files changed (1) hide show

gr_client.py +135 -98

gr_client.py CHANGED Viewed

@@ -8,27 +8,33 @@ import gradio as gr
 from gradio_client import Client
 voice_models = [
-	("Male #6671", "ccby_nvidia_hifi_6671_M"),
-	("Male #6670", "ccby_nvidia_hifi_6670_M"),
 	("Male #9017", "ccby_nvidia_hifi_9017_M"),
 	("Male #6097", "ccby_nvidia_hifi_6097_M"),
-	("Female #92", "ccby_nvidia_hifi_92_F"),
-	("Female #11697", "ccby_nvidia_hifi_11697_F"),
-	("Female #12787", "ccby_nvidia_hifi_12787_F"),
-	("Female #11614", "ccby_nv_hifi_11614_F"),
 	("Female #8051", "ccby_nvidia_hifi_8051_F"),
 	("Female #9136", "ccby_nvidia_hifi_9136_F"),
 ]
 current_voice_model = None
 # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
 languages = [
-	("🇬🇧 EN", "en"),
 	("🇩🇪 DE", "de"),
 	("🇪🇸 ES", "es"),
-	("🇮🇹 IT", "it"),
 	("🇳🇱 NL", "nl"),
-	("🇵🇹 PT", "pt"),
 	("🇵🇱 PL", "pl"),
 	("🇷🇴 RO", "ro"),
 	("🇸🇪 SV", "sv"),
@@ -38,19 +44,17 @@ languages = [
 	("🇬🇷 EL", "el"),
 	("🇫🇷 FR", "fr"),
 	("🇷🇺 RU", "ru"),
-	("🇺🇦 UK", "uk"),
 	("🇹🇷 TR", "tr"),
 	("🇸🇦 AR", "ar"),
-	("🇮🇳 HI", "hi"),
 	("🇯🇵 JP", "jp"),
 	("🇰🇷 KO", "ko"),
-	("🇨🇳 ZH", "zh"),
 	("🇻🇳 VI", "vi"),
 	("🇻🇦 LA", "la"),
-	("HA", "ha"),
-	("SW", "sw"),
 	("🇳🇬 YO", "yo"),
-	("WO", "wo"),
 ]
 # Translated from English by DeepMind's Gemini Pro
@@ -150,47 +154,98 @@ def predict(
 		round(json_data['em_surprise'][0], 2)
 	]
-input_textbox = gr.Textbox(
-	label="Input Text",
-	value="This is what my voice sounds like.",
-	info="Also accepts ARPAbet symbols placed within {} brackets.",
-	lines=1,
-	max_lines=5,
-	autofocus=True
-)
-pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Duration")
-pitch_slider = gr.Slider(0, 1.0, value=0.5, step=0.05, label="Pitch", visible=False)
-energy_slider = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Energy", visible=False)
-anger_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="😠 Anger", info="Tread lightly beyond 0.9")
-happy_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="😃 Happiness", info="Tread lightly beyond 0.7")
-sad_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="😭 Sadness", info="Duration increased when beyond 0.2")
-surprise_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="😮 Surprise", info="Does not play well with Happiness with either being beyond 0.3")
-voice_radio = gr.Radio(
-	voice_models,
-	value="ccby_nvidia_hifi_6671_M",
-	label="Voice",
-	info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
-)
 def set_default_text(lang, deepmoji_checked):
 	# DeepMoji only works on English Text
 	if lang == 'en':
-		checkbox_enabled = gr.Checkbox(
-			label="Use DeepMoji",
-			info="Auto adjust emotional values",
-			value=deepmoji_checked,
-			interactive=True
-		)
 	else:
-		checkbox_enabled = gr.Checkbox(
-			label="Use DeepMoji",
-			info="Works only with English!",
-			value=False,
-			interactive=False
-		)
 	return default_text[lang], checkbox_enabled  # Return the modified textbox (important for Blocks)
 en_examples = [
 	"This is what my voice sounds like.",
 	"If there is anything else you need, feel free to ask.",
@@ -204,22 +259,26 @@ en_examples = [
 	# ARPAbet example
 	"This { IH1 Z } { W AH1 T } { M AY1 } { V OY1 S } { S AW1 N D Z } like.",
 ]
 def set_example_as_input(example_text):
 	return example_text
 def toggle_example_dropdown(lang):
 	if lang == 'en':
-		return gr.Dropdown(
-			en_examples,
-			value=en_examples[0],
-			label="Example dropdown",
-			show_label=False,
-			info="English Examples",
-			visible=True
-		)
 	else:
-		return gr.Dropdown(visible=False)
 def reset_em_sliders(
 	deepmoji_enabled,
@@ -255,12 +314,13 @@ def toggle_deepmoji(
 			surprise
 		)
-language_radio = gr.Radio(
-	languages,
-	value="en",
-	label="Language",
-	info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
-)
 _DESCRIPTION = '''
 <div>
@@ -277,48 +337,25 @@ with gr.Blocks(css=".arpabet {background-color: gray; border-radius: 5px; font-s
 	with gr.Row():  # Main row for inputs and language selection
 		with gr.Column():  # Input column
-			input_textbox = gr.Textbox(
-				label="Input Text",
-				value="This is what my voice sounds like.",
-				info="Also accepts ARPAbet symbols placed within {} brackets.",
-				lines=1,
-				max_lines=5,
-				autofocus=True
-			)
-			language_radio = gr.Radio(
-				languages,
-				value="en",
-				label="Language",
-				info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
-			)
 			with gr.Row():
 				with gr.Column():
-					en_examples_dropdown = gr.Dropdown(
-						en_examples,
-						value=en_examples[0],
-						label="Example dropdown",
-						show_label=False,
-						info="English Examples"
-					)
 				with gr.Column():
-					pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Duration")
 		with gr.Column():  # Control column
-			voice_radio = gr.Radio(
-				voice_models,
-				value="ccby_nvidia_hifi_6671_M",
-				label="Voice",
-				info="NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
-			)
-			pitch_slider = gr.Slider(0, 1.0, value=0.5, step=0.05, label="Pitch", visible=False)
-			energy_slider = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Energy", visible=False)
 			with gr.Row():  # Main row for inputs and language selection
 				with gr.Column():  # Input column
-					anger_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="😠 Anger", info="Tread lightly beyond 0.9")
-					sad_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="😭 Sadness", info="Duration increased when beyond 0.2")
 				with gr.Column():  # Input column
-					happy_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="😃 Happiness", info="Tread lightly beyond 0.7")
-					surprise_slider = gr.Slider(0, 1.0, value=0, step=0.05, label="😮 Surprise", info="Can oversaturate Happiness")
 			deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values", value=True)
 	# Event handling using click
@@ -432,6 +469,6 @@ with gr.Blocks(css=".arpabet {background-color: gray; border-radius: 5px; font-s
 if __name__ == "__main__":
 	print('running Gradio interface')
 	# gradio_app.launch()
-	client = Client("Pendrokar/xVASynth")
 	demo.launch()

 from gradio_client import Client
 voice_models = [
+	("👨‍🦳 #6671", "ccby_nvidia_hifi_6671_M"),
+	("👱‍♀️ 🇬🇧 #92", "ccby_nvidia_hifi_92_F"),
+]
+voice_models_more = [
+	("🧔 #6670", "ccby_nvidia_hifi_6670_M"),
 	("Male #9017", "ccby_nvidia_hifi_9017_M"),
 	("Male #6097", "ccby_nvidia_hifi_6097_M"),
+	("👩‍🦱 #12787", "ccby_nvidia_hifi_12787_F"),
+	("👵 #11614", "ccby_nv_hifi_11614_F"),
 	("Female #8051", "ccby_nvidia_hifi_8051_F"),
+	("👩‍🦳 #11697", "ccby_nvidia_hifi_11697_F"),
 	("Female #9136", "ccby_nvidia_hifi_9136_F"),
 ]
 current_voice_model = None
 # order ranked by similarity to English due to the xVASynth's use of ARPAbet instead of IPA
 languages = [
+	("🇺🇸 EN", "en"),
 	("🇩🇪 DE", "de"),
 	("🇪🇸 ES", "es"),
+	("🇮🇳 HI", "hi"),
+	("🇨🇳 ZH", "zh"),
+]
+languages_more = [
 	("🇳🇱 NL", "nl"),
+	("🇧🇷 PT", "pt"),
+	("🇮🇹 IT", "it"),
 	("🇵🇱 PL", "pl"),
 	("🇷🇴 RO", "ro"),
 	("🇸🇪 SV", "sv"),
 	("🇬🇷 EL", "el"),
 	("🇫🇷 FR", "fr"),
 	("🇷🇺 RU", "ru"),
+	("🇺🇦 UA", "uk"),
 	("🇹🇷 TR", "tr"),
 	("🇸🇦 AR", "ar"),
 	("🇯🇵 JP", "jp"),
 	("🇰🇷 KO", "ko"),
 	("🇻🇳 VI", "vi"),
 	("🇻🇦 LA", "la"),
 	("🇳🇬 YO", "yo"),
+	("Swahili", "sw"),
+	("Hausa", "ha"),
+	("Wolof", "wo"),
 ]
 # Translated from English by DeepMind's Gemini Pro
 		round(json_data['em_surprise'][0], 2)
 	]
+# Component defaults
+input_textbox_init = {
+	'label': "Input Text",
+	'value': "This is what my voice sounds like.",
+	'info': "Also accepts ARPAbet symbols placed within {} brackets.",
+	'lines': 1,
+	'max_lines': 5,
+	'autofocus': True
+}
+pacing_slider_init = {
+	'value': 1.0,
+	'minimum': 0.5,
+	'maximum': 2.0,
+	'step': 0.1,
+	'label': "Duration"
+}
+pitch_slider_init = {
+	'minimum': 0,
+	'maximum': 1.0,
+	'value': 0.5,
+	'step': 0.05,
+	'label': "Pitch",
+	'visible': False
+}
+energy_slider_init = {
+	'minimum': 0.1,
+	'maximum': 1.0,
+	'value': 1.0,
+	'step': 0.05,
+	'label': "Energy",
+	'visible': False
+}
+anger_slider_init = {
+	'minimum': 0,
+	'maximum': 1.0,
+	'value': 0,
+	'step': 0.05,
+	'label': "😠 Anger",
+	'info': "Tread lightly beyond 0.9"
+}
+happy_slider_init = {
+	'minimum': 0,
+	'maximum': 1.0,
+	'value': 0,
+	'step': 0.05,
+	'label': "😃 Happiness",
+	'info': "Tread lightly beyond 0.7"
+}
+sad_slider_init = {
+	'minimum': 0,
+	'maximum': 1.0,
+	'value': 0,
+	'step': 0.05,
+	'label': "😭 Sadness",
+	'info': "Duration increased when beyond 0.2"
+}
+surprise_slider_init = {
+	'minimum': 0,
+	'maximum': 1.0,
+	'value': 0,
+	'step': 0.05,
+	'label': "😮 Surprise",
+	'info': "Does not play well with Happiness with either being beyond 0.3"
+}
+voice_radio_init = {
+	'choices': [*voice_models, (f'+{len(voice_models_more)}', 'more')],
+	'value': "ccby_nvidia_hifi_6671_M",
+	'label': "Voice",
+	'info': "NVIDIA HIFI CC-BY-4.0 xVAPitch voice model"
+}
+deepmoji_checkbox_init = {
+	'label': "Use DeepMoji",
+	'info': "Auto adjust emotional values",
+	'value': True,
+	'interactive': True
+}
 def set_default_text(lang, deepmoji_checked):
 	# DeepMoji only works on English Text
+	checkbox_init = {**deepmoji_checkbox_init}
 	if lang == 'en':
+		checkbox_init['value'] = deepmoji_checked,
+		checkbox_init['interactive'] = True
 	else:
+		checkbox_init['info'] = "Works only with English!",
+		checkbox_init['value'] = False,
+		checkbox_init['interactive'] = False
+	checkbox_enabled = gr.Checkbox(**checkbox_init)
 	return default_text[lang], checkbox_enabled  # Return the modified textbox (important for Blocks)
+# examples component
 en_examples = [
 	"This is what my voice sounds like.",
 	"If there is anything else you need, feel free to ask.",
 	# ARPAbet example
 	"This { IH1 Z } { W AH1 T } { M AY1 } { V OY1 S } { S AW1 N D Z } like.",
 ]
+en_examples_dropdown_init = {
+	'choices': en_examples,
+	'value': en_examples[0],
+	'label': "Example dropdown",
+	'show_label': False,
+	'info': "English Examples",
+	'visible': True
+}
 def set_example_as_input(example_text):
 	return example_text
 def toggle_example_dropdown(lang):
+	dropdown_init = {**en_examples_dropdown_init}
 	if lang == 'en':
+		dropdown_init['visible'] = True
 	else:
+		dropdown_init['visible'] = False
+	return gr.Dropdown(**dropdown_init)
 def reset_em_sliders(
 	deepmoji_enabled,
 			surprise
 		)
+# languages component
+language_radio_init = {
+	'choices': [*languages, *[(f'+{len(languages_more)}', 'more')]],
+	'value': "en",
+	'label': "Language",
+	'info': "Will be more monotone and have an English accent. Tested mostly by a native Briton."
+}
 _DESCRIPTION = '''
 <div>
 	with gr.Row():  # Main row for inputs and language selection
 		with gr.Column():  # Input column
+			input_textbox = gr.Textbox(**input_textbox_init)
+			language_radio = gr.Radio(**language_radio_init)
 			with gr.Row():
 				with gr.Column():
+					en_examples_dropdown = gr.Dropdown(**en_examples_dropdown_init)
 				with gr.Column():
+					pacing_slider = gr.Slider(**pacing_slider_init)
 		with gr.Column():  # Control column
+			voice_radio = gr.Radio(**voice_radio_init)
+			pitch_slider = gr.Slider(**pitch_slider_init)
+			energy_slider = gr.Slider(**energy_slider_init)
 			with gr.Row():  # Main row for inputs and language selection
 				with gr.Column():  # Input column
+					anger_slider = gr.Slider(**anger_slider_init)
+					sad_slider = gr.Slider(**sad_slider_init)
 				with gr.Column():  # Input column
+					happy_slider = gr.Slider(**happy_slider_init)
+					surprise_slider = gr.Slider(**surprise_slider_init)
 			deepmoji_checkbox = gr.Checkbox(label="Use DeepMoji", info="Auto adjust emotional values", value=True)
 	# Event handling using click
 if __name__ == "__main__":
 	print('running Gradio interface')
 	# gradio_app.launch()
+	# client = Client("Pendrokar/xVASynth")
 	demo.launch()