Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fastpitch diff return response
Browse files- app.py +47 -31
- gr_client.py +22 -9
app.py
CHANGED
|
@@ -26,7 +26,7 @@ current_voice_type = None
|
|
| 26 |
base_speaker_emb = ''
|
| 27 |
|
| 28 |
def load_model(voice_model_name):
|
| 29 |
-
global current_voice_model, current_voice_type
|
| 30 |
|
| 31 |
if voice_model_name == 'x_selpahi':
|
| 32 |
# Lojban
|
|
@@ -47,8 +47,6 @@ def load_model(voice_model_name):
|
|
| 47 |
'pluginsContext': '{}',
|
| 48 |
}
|
| 49 |
|
| 50 |
-
embs = base_speaker_emb
|
| 51 |
-
|
| 52 |
print('Loading voice model...')
|
| 53 |
try:
|
| 54 |
json_data = xvaserver.loadModel(data)
|
|
@@ -59,13 +57,13 @@ def load_model(voice_model_name):
|
|
| 59 |
voice_model_json = json.load(f)
|
| 60 |
|
| 61 |
if model_type == 'xVAPitch':
|
| 62 |
-
|
| 63 |
elif model_type == 'FastPitch1.1':
|
| 64 |
-
|
| 65 |
except requests.exceptions.RequestException as err:
|
| 66 |
print(f'FAILED to load voice model: {err}')
|
| 67 |
|
| 68 |
-
return
|
| 69 |
|
| 70 |
|
| 71 |
class LocalBlocksDemo(BlocksDemo):
|
|
@@ -83,12 +81,14 @@ class LocalBlocksDemo(BlocksDemo):
|
|
| 83 |
surprise,
|
| 84 |
use_deepmoji
|
| 85 |
):
|
|
|
|
|
|
|
| 86 |
# grab only the first 1000 characters
|
| 87 |
input_text = input_text[:1000]
|
| 88 |
|
| 89 |
# load voice model if not the current model
|
| 90 |
if (current_voice_model != voice):
|
| 91 |
-
|
| 92 |
|
| 93 |
model_type = current_voice_type
|
| 94 |
pace = pacing if pacing else 1.0
|
|
@@ -144,34 +144,50 @@ class LocalBlocksDemo(BlocksDemo):
|
|
| 144 |
# with open('resources/app/server.log', 'r') as f:
|
| 145 |
# print(f.read())
|
| 146 |
|
| 147 |
-
arpabet_html = '
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
return [
|
| 169 |
save_path,
|
| 170 |
arpabet_html,
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
json_data
|
| 176 |
]
|
| 177 |
|
|
|
|
| 26 |
base_speaker_emb = ''
|
| 27 |
|
| 28 |
def load_model(voice_model_name):
|
| 29 |
+
global current_voice_model, current_voice_type, base_speaker_emb
|
| 30 |
|
| 31 |
if voice_model_name == 'x_selpahi':
|
| 32 |
# Lojban
|
|
|
|
| 47 |
'pluginsContext': '{}',
|
| 48 |
}
|
| 49 |
|
|
|
|
|
|
|
| 50 |
print('Loading voice model...')
|
| 51 |
try:
|
| 52 |
json_data = xvaserver.loadModel(data)
|
|
|
|
| 57 |
voice_model_json = json.load(f)
|
| 58 |
|
| 59 |
if model_type == 'xVAPitch':
|
| 60 |
+
base_speaker_emb = voice_model_json['games'][0]['base_speaker_emb']
|
| 61 |
elif model_type == 'FastPitch1.1':
|
| 62 |
+
base_speaker_emb = voice_model_json['games'][0]['resemblyzer']
|
| 63 |
except requests.exceptions.RequestException as err:
|
| 64 |
print(f'FAILED to load voice model: {err}')
|
| 65 |
|
| 66 |
+
return base_speaker_emb
|
| 67 |
|
| 68 |
|
| 69 |
class LocalBlocksDemo(BlocksDemo):
|
|
|
|
| 81 |
surprise,
|
| 82 |
use_deepmoji
|
| 83 |
):
|
| 84 |
+
global current_voice_model, current_voice_type, base_speaker_emb
|
| 85 |
+
|
| 86 |
# grab only the first 1000 characters
|
| 87 |
input_text = input_text[:1000]
|
| 88 |
|
| 89 |
# load voice model if not the current model
|
| 90 |
if (current_voice_model != voice):
|
| 91 |
+
load_model(voice)
|
| 92 |
|
| 93 |
model_type = current_voice_type
|
| 94 |
pace = pacing if pacing else 1.0
|
|
|
|
| 144 |
# with open('resources/app/server.log', 'r') as f:
|
| 145 |
# print(f.read())
|
| 146 |
|
| 147 |
+
arpabet_html = ''
|
| 148 |
+
if voice == 'x_selpahi':
|
| 149 |
+
angry = 0
|
| 150 |
+
happy = 0
|
| 151 |
+
sad = 0
|
| 152 |
+
surprise = 0
|
| 153 |
+
else:
|
| 154 |
+
arpabet_html = '<h6>ARPAbet & Durations</h6>'
|
| 155 |
+
arpabet_html += '<table style="margin: 0 var(--size-2)"><tbody><tr>'
|
| 156 |
+
arpabet_nopad = json_data['arpabet'].split('|PAD|')
|
| 157 |
+
arpabet_symbols = json_data['arpabet'].split('|')
|
| 158 |
+
wpad_len = len(arpabet_symbols)
|
| 159 |
+
nopad_len = len(arpabet_nopad)
|
| 160 |
+
total_dur_length = 0
|
| 161 |
+
for symb_i in range(wpad_len):
|
| 162 |
+
if (arpabet_symbols[symb_i] == '<PAD>'):
|
| 163 |
+
continue
|
| 164 |
+
total_dur_length += float(json_data['durations'][symb_i])
|
| 165 |
+
|
| 166 |
+
for symb_i in range(wpad_len):
|
| 167 |
+
if (arpabet_symbols[symb_i] == '<PAD>'):
|
| 168 |
+
continue
|
| 169 |
+
|
| 170 |
+
arpabet_length = float(json_data['durations'][symb_i])
|
| 171 |
+
cell_width = round(arpabet_length / total_dur_length * 100, 2)
|
| 172 |
+
arpabet_html += '<td class="arpabet" style="width: '\
|
| 173 |
+
+ str(cell_width)\
|
| 174 |
+
+'%">'\
|
| 175 |
+
+ arpabet_symbols[symb_i]\
|
| 176 |
+
+ '</td> '
|
| 177 |
+
arpabet_html += '<tr></tbody></table>'
|
| 178 |
+
|
| 179 |
+
angry = round(json_data['em_angry'][0], 2),
|
| 180 |
+
happy = round(json_data['em_happy'][0], 2),
|
| 181 |
+
sad = round(json_data['em_sad'][0], 2),
|
| 182 |
+
surprise = round(json_data['em_surprise'][0], 2),
|
| 183 |
|
| 184 |
return [
|
| 185 |
save_path,
|
| 186 |
arpabet_html,
|
| 187 |
+
angry,
|
| 188 |
+
happy,
|
| 189 |
+
sad,
|
| 190 |
+
surprise,
|
| 191 |
json_data
|
| 192 |
]
|
| 193 |
|
gr_client.py
CHANGED
|
@@ -502,6 +502,7 @@ class BlocksDemo:
|
|
| 502 |
inputs=voice_radio,
|
| 503 |
outputs=output_wav,
|
| 504 |
queue=True,
|
|
|
|
| 505 |
)
|
| 506 |
|
| 507 |
# Switched to Lojban voice
|
|
@@ -546,9 +547,16 @@ class BlocksDemo:
|
|
| 546 |
|
| 547 |
json_data = json.loads(response.replace("'", '"'))
|
| 548 |
|
| 549 |
-
arpabet_html = '
|
| 550 |
-
|
| 551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
arpabet_symbols = json_data['arpabet'].split('|')
|
| 553 |
wpad_len = len(arpabet_symbols)
|
| 554 |
nopad_len = len(arpabet_nopad)
|
|
@@ -568,16 +576,21 @@ class BlocksDemo:
|
|
| 568 |
+ str(cell_width)\
|
| 569 |
+'%">'\
|
| 570 |
+ arpabet_symbols[symb_i]\
|
| 571 |
-
|
| 572 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
|
| 574 |
return [
|
| 575 |
wav_path,
|
| 576 |
arpabet_html,
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
response
|
| 582 |
]
|
| 583 |
|
|
|
|
| 502 |
inputs=voice_radio,
|
| 503 |
outputs=output_wav,
|
| 504 |
queue=True,
|
| 505 |
+
trigger_mode='once',
|
| 506 |
)
|
| 507 |
|
| 508 |
# Switched to Lojban voice
|
|
|
|
| 547 |
|
| 548 |
json_data = json.loads(response.replace("'", '"'))
|
| 549 |
|
| 550 |
+
arpabet_html = ''
|
| 551 |
+
if voice == 'x_selpahi':
|
| 552 |
+
angry = 0
|
| 553 |
+
happy = 0
|
| 554 |
+
sad = 0
|
| 555 |
+
surprise = 0
|
| 556 |
+
else:
|
| 557 |
+
arpabet_html = '<h6>ARPAbet & Durations</h6>'
|
| 558 |
+
arpabet_html += '<table style="margin: 0 var(--size-2)"><tbody><tr>'
|
| 559 |
+
arpabet_nopad = json_data['arpabet'].split('|PAD|')
|
| 560 |
arpabet_symbols = json_data['arpabet'].split('|')
|
| 561 |
wpad_len = len(arpabet_symbols)
|
| 562 |
nopad_len = len(arpabet_nopad)
|
|
|
|
| 576 |
+ str(cell_width)\
|
| 577 |
+'%">'\
|
| 578 |
+ arpabet_symbols[symb_i]\
|
| 579 |
+
+ '</td> '
|
| 580 |
+
arpabet_html += '<tr></tbody></table>'
|
| 581 |
+
|
| 582 |
+
angry = round(json_data['em_angry'][0], 2),
|
| 583 |
+
happy = round(json_data['em_happy'][0], 2),
|
| 584 |
+
sad = round(json_data['em_sad'][0], 2),
|
| 585 |
+
surprise = round(json_data['em_surprise'][0], 2),
|
| 586 |
|
| 587 |
return [
|
| 588 |
wav_path,
|
| 589 |
arpabet_html,
|
| 590 |
+
angry,
|
| 591 |
+
happy,
|
| 592 |
+
sad,
|
| 593 |
+
surprise,
|
| 594 |
response
|
| 595 |
]
|
| 596 |
|