Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,6 @@ import subprocess
|
|
| 12 |
import re
|
| 13 |
import spaces
|
| 14 |
|
| 15 |
-
# 创建一个全局变量来跟踪已下载的资源
|
| 16 |
# Create a global variable to track downloaded resources
|
| 17 |
downloaded_resources = {
|
| 18 |
"configs": False,
|
|
@@ -38,10 +37,6 @@ def install_espeak():
|
|
| 38 |
print("espeak-ng and its data packages installed successfully!")
|
| 39 |
else:
|
| 40 |
print("espeak-ng is already installed in the system.")
|
| 41 |
-
# Even if already installed, try to update data to ensure integrity (optional but sometimes helpful)
|
| 42 |
-
# print("Attempting to update espeak-ng data...")
|
| 43 |
-
# subprocess.run(["apt-get", "update"], check=True)
|
| 44 |
-
# subprocess.run(["apt-get", "install", "--only-upgrade", "-y", "espeak-ng-data"], check=True)
|
| 45 |
|
| 46 |
# Verify Chinese support (optional)
|
| 47 |
try:
|
|
@@ -605,8 +600,8 @@ def vevo_style(content_wav, style_wav):
|
|
| 605 |
print(f"Style audio shape: {style_tensor.shape}, sample rate: {style_sr}")
|
| 606 |
|
| 607 |
# Save audio
|
| 608 |
-
torchaudio.save(temp_content_path, content_tensor, content_sr)
|
| 609 |
-
torchaudio.save(temp_style_path, style_tensor, style_sr)
|
| 610 |
|
| 611 |
try:
|
| 612 |
# Get pipeline
|
|
@@ -700,8 +695,8 @@ def vevo_timbre(content_wav, reference_wav):
|
|
| 700 |
print(f"Reference audio shape: {reference_tensor.shape}, sample rate: {reference_sr}")
|
| 701 |
|
| 702 |
# Save uploaded audio
|
| 703 |
-
torchaudio.save(temp_content_path, content_tensor, content_sr)
|
| 704 |
-
torchaudio.save(temp_reference_path, reference_tensor, reference_sr)
|
| 705 |
|
| 706 |
try:
|
| 707 |
# Get pipeline
|
|
@@ -820,9 +815,9 @@ def vevo_voice(content_wav, style_reference_wav, timbre_reference_wav):
|
|
| 820 |
print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
|
| 821 |
|
| 822 |
# Save uploaded audio
|
| 823 |
-
torchaudio.save(temp_content_path, content_tensor, content_sr)
|
| 824 |
-
torchaudio.save(temp_style_path, style_tensor, style_sr)
|
| 825 |
-
torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr)
|
| 826 |
|
| 827 |
try:
|
| 828 |
# Get pipeline
|
|
@@ -893,7 +888,7 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
|
|
| 893 |
print(f"Style reference text: {style_ref_text}, language: {style_ref_text_language}")
|
| 894 |
|
| 895 |
# Save uploaded audio
|
| 896 |
-
torchaudio.save(temp_ref_path, ref_tensor, ref_sr)
|
| 897 |
|
| 898 |
if timbre_ref_wav is not None:
|
| 899 |
if isinstance(timbre_ref_wav, tuple) and len(timbre_ref_wav) == 2:
|
|
@@ -918,7 +913,7 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
|
|
| 918 |
timbre_tensor = timbre_tensor / (torch.max(torch.abs(timbre_tensor)) + 1e-6) * 0.95
|
| 919 |
|
| 920 |
print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
|
| 921 |
-
torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr)
|
| 922 |
else:
|
| 923 |
raise ValueError("Invalid timbre reference audio format")
|
| 924 |
else:
|
|
@@ -1046,4 +1041,4 @@ with gr.Blocks(title="Vevo: Controllable Zero-Shot Voice Imitation with Self-Sup
|
|
| 1046 |
""")
|
| 1047 |
|
| 1048 |
# Launch application
|
| 1049 |
-
demo.launch()
|
|
|
|
| 12 |
import re
|
| 13 |
import spaces
|
| 14 |
|
|
|
|
| 15 |
# Create a global variable to track downloaded resources
|
| 16 |
downloaded_resources = {
|
| 17 |
"configs": False,
|
|
|
|
| 37 |
print("espeak-ng and its data packages installed successfully!")
|
| 38 |
else:
|
| 39 |
print("espeak-ng is already installed in the system.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
# Verify Chinese support (optional)
|
| 42 |
try:
|
|
|
|
| 600 |
print(f"Style audio shape: {style_tensor.shape}, sample rate: {style_sr}")
|
| 601 |
|
| 602 |
# Save audio
|
| 603 |
+
torchaudio.save(temp_content_path, content_tensor, content_sr, backend="soundfile")
|
| 604 |
+
torchaudio.save(temp_style_path, style_tensor, style_sr, backend="soundfile")
|
| 605 |
|
| 606 |
try:
|
| 607 |
# Get pipeline
|
|
|
|
| 695 |
print(f"Reference audio shape: {reference_tensor.shape}, sample rate: {reference_sr}")
|
| 696 |
|
| 697 |
# Save uploaded audio
|
| 698 |
+
torchaudio.save(temp_content_path, content_tensor, content_sr, backend="soundfile")
|
| 699 |
+
torchaudio.save(temp_reference_path, reference_tensor, reference_sr, backend="soundfile")
|
| 700 |
|
| 701 |
try:
|
| 702 |
# Get pipeline
|
|
|
|
| 815 |
print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
|
| 816 |
|
| 817 |
# Save uploaded audio
|
| 818 |
+
torchaudio.save(temp_content_path, content_tensor, content_sr, backend="soundfile")
|
| 819 |
+
torchaudio.save(temp_style_path, style_tensor, style_sr, backend="soundfile")
|
| 820 |
+
torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr, backend="soundfile")
|
| 821 |
|
| 822 |
try:
|
| 823 |
# Get pipeline
|
|
|
|
| 888 |
print(f"Style reference text: {style_ref_text}, language: {style_ref_text_language}")
|
| 889 |
|
| 890 |
# Save uploaded audio
|
| 891 |
+
torchaudio.save(temp_ref_path, ref_tensor, ref_sr, backend="soundfile")
|
| 892 |
|
| 893 |
if timbre_ref_wav is not None:
|
| 894 |
if isinstance(timbre_ref_wav, tuple) and len(timbre_ref_wav) == 2:
|
|
|
|
| 913 |
timbre_tensor = timbre_tensor / (torch.max(torch.abs(timbre_tensor)) + 1e-6) * 0.95
|
| 914 |
|
| 915 |
print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
|
| 916 |
+
torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr, backend="soundfile")
|
| 917 |
else:
|
| 918 |
raise ValueError("Invalid timbre reference audio format")
|
| 919 |
else:
|
|
|
|
| 1041 |
""")
|
| 1042 |
|
| 1043 |
# Launch application
|
| 1044 |
+
demo.launch()
|