Spaces:

Opera8
/

Sada

Running on Zero

App Files Files Community

Opera8 commited on 21 days ago

Commit

380e75f

verified ·

1 Parent(s): 8a530c4

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -15

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ import subprocess
 import re
 import spaces
-# 创建一个全局变量来跟踪已下载的资源
 # Create a global variable to track downloaded resources
 downloaded_resources = {
     "configs": False,
@@ -38,10 +37,6 @@ def install_espeak():
             print("espeak-ng and its data packages installed successfully!")
         else:
             print("espeak-ng is already installed in the system.")
-            # Even if already installed, try to update data to ensure integrity (optional but sometimes helpful)
-            # print("Attempting to update espeak-ng data...")
-            # subprocess.run(["apt-get", "update"], check=True)
-            # subprocess.run(["apt-get", "install", "--only-upgrade", "-y", "espeak-ng-data"], check=True)
         # Verify Chinese support (optional)
         try:
@@ -605,8 +600,8 @@ def vevo_style(content_wav, style_wav):
     print(f"Style audio shape: {style_tensor.shape}, sample rate: {style_sr}")
     # Save audio
-    torchaudio.save(temp_content_path, content_tensor, content_sr)
-    torchaudio.save(temp_style_path, style_tensor, style_sr)
     try:
         # Get pipeline
@@ -700,8 +695,8 @@ def vevo_timbre(content_wav, reference_wav):
     print(f"Reference audio shape: {reference_tensor.shape}, sample rate: {reference_sr}")
     # Save uploaded audio
-    torchaudio.save(temp_content_path, content_tensor, content_sr)
-    torchaudio.save(temp_reference_path, reference_tensor, reference_sr)
     try:
         # Get pipeline
@@ -820,9 +815,9 @@ def vevo_voice(content_wav, style_reference_wav, timbre_reference_wav):
     print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
     # Save uploaded audio
-    torchaudio.save(temp_content_path, content_tensor, content_sr)
-    torchaudio.save(temp_style_path, style_tensor, style_sr)
-    torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr)
     try:
         # Get pipeline
@@ -893,7 +888,7 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
         print(f"Style reference text: {style_ref_text}, language: {style_ref_text_language}")
     # Save uploaded audio
-    torchaudio.save(temp_ref_path, ref_tensor, ref_sr)
     if timbre_ref_wav is not None:
         if isinstance(timbre_ref_wav, tuple) and len(timbre_ref_wav) == 2:
@@ -918,7 +913,7 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
             timbre_tensor = timbre_tensor / (torch.max(torch.abs(timbre_tensor)) + 1e-6) * 0.95
             print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
-            torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr)
         else:
             raise ValueError("Invalid timbre reference audio format")
     else:
@@ -1046,4 +1041,4 @@ with gr.Blocks(title="Vevo: Controllable Zero-Shot Voice Imitation with Self-Sup
     """)
 # Launch application
-demo.launch()

 import re
 import spaces
 # Create a global variable to track downloaded resources
 downloaded_resources = {
     "configs": False,
             print("espeak-ng and its data packages installed successfully!")
         else:
             print("espeak-ng is already installed in the system.")
         # Verify Chinese support (optional)
         try:
     print(f"Style audio shape: {style_tensor.shape}, sample rate: {style_sr}")
     # Save audio
+    torchaudio.save(temp_content_path, content_tensor, content_sr, backend="soundfile")
+    torchaudio.save(temp_style_path, style_tensor, style_sr, backend="soundfile")
     try:
         # Get pipeline
     print(f"Reference audio shape: {reference_tensor.shape}, sample rate: {reference_sr}")
     # Save uploaded audio
+    torchaudio.save(temp_content_path, content_tensor, content_sr, backend="soundfile")
+    torchaudio.save(temp_reference_path, reference_tensor, reference_sr, backend="soundfile")
     try:
         # Get pipeline
     print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
     # Save uploaded audio
+    torchaudio.save(temp_content_path, content_tensor, content_sr, backend="soundfile")
+    torchaudio.save(temp_style_path, style_tensor, style_sr, backend="soundfile")
+    torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr, backend="soundfile")
     try:
         # Get pipeline
         print(f"Style reference text: {style_ref_text}, language: {style_ref_text_language}")
     # Save uploaded audio
+    torchaudio.save(temp_ref_path, ref_tensor, ref_sr, backend="soundfile")
     if timbre_ref_wav is not None:
         if isinstance(timbre_ref_wav, tuple) and len(timbre_ref_wav) == 2:
             timbre_tensor = timbre_tensor / (torch.max(torch.abs(timbre_tensor)) + 1e-6) * 0.95
             print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
+            torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr, backend="soundfile")
         else:
             raise ValueError("Invalid timbre reference audio format")
     else:
     """)
 # Launch application
+demo.launch()