Spaces:
Running
Running
tianfengping.tfp
commited on
Commit
·
bd90cd9
1
Parent(s):
26613c2
delete speed
Browse files
app.py
CHANGED
|
@@ -237,9 +237,9 @@ def generate_speech_speakerminus(tts_text, speed, speaker, key, ref_audio, ref_t
|
|
| 237 |
# speaker=speaker,
|
| 238 |
prompt_speech_16k = ref_audio,
|
| 239 |
key = emo.get(key),
|
| 240 |
-
|
| 241 |
# ref_audio = ref_audio,
|
| 242 |
-
speed=speed
|
| 243 |
|
| 244 |
)
|
| 245 |
print("sample_rate:", sample_rate, "full_audio:", full_audio.min(), full_audio.max())
|
|
@@ -257,7 +257,6 @@ def generate_speech_speakerminus(tts_text, speed, speaker, key, ref_audio, ref_t
|
|
| 257 |
|
| 258 |
|
| 259 |
def generate_speech_sft(tts_text, speed, speaker, key, ref_audio, ref_text):
|
| 260 |
-
# import pdb;pdb.set_trace()
|
| 261 |
global tts_sft_global, local_model_path_enhenced
|
| 262 |
# Ensure models are downloaded (this may take time on first use)
|
| 263 |
if local_model_path_enhenced is None:
|
|
@@ -323,9 +322,9 @@ def generate_speech_sft(tts_text, speed, speaker, key, ref_audio, ref_text):
|
|
| 323 |
# speaker=speaker,
|
| 324 |
prompt_speech_16k = ref_audio,
|
| 325 |
key = emo.get(key),
|
| 326 |
-
|
| 327 |
# ref_audio = ref_audio,
|
| 328 |
-
speed=speed
|
| 329 |
|
| 330 |
)
|
| 331 |
print("sample_rate:", sample_rate, "full_audio:", full_audio.min(), full_audio.max())
|
|
|
|
| 237 |
# speaker=speaker,
|
| 238 |
prompt_speech_16k = ref_audio,
|
| 239 |
key = emo.get(key),
|
| 240 |
+
emotion_embedding=emotion_info,
|
| 241 |
# ref_audio = ref_audio,
|
| 242 |
+
# speed=speed
|
| 243 |
|
| 244 |
)
|
| 245 |
print("sample_rate:", sample_rate, "full_audio:", full_audio.min(), full_audio.max())
|
|
|
|
| 257 |
|
| 258 |
|
| 259 |
def generate_speech_sft(tts_text, speed, speaker, key, ref_audio, ref_text):
|
|
|
|
| 260 |
global tts_sft_global, local_model_path_enhenced
|
| 261 |
# Ensure models are downloaded (this may take time on first use)
|
| 262 |
if local_model_path_enhenced is None:
|
|
|
|
| 322 |
# speaker=speaker,
|
| 323 |
prompt_speech_16k = ref_audio,
|
| 324 |
key = emo.get(key),
|
| 325 |
+
emotion_embedding=emotion_info,
|
| 326 |
# ref_audio = ref_audio,
|
| 327 |
+
# speed=speed
|
| 328 |
|
| 329 |
)
|
| 330 |
print("sample_rate:", sample_rate, "full_audio:", full_audio.min(), full_audio.max())
|