Opera8 commited on
Commit
380e75f
·
verified ·
1 Parent(s): 8a530c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -12,7 +12,6 @@ import subprocess
12
  import re
13
  import spaces
14
 
15
- # 创建一个全局变量来跟踪已下载的资源
16
  # Create a global variable to track downloaded resources
17
  downloaded_resources = {
18
  "configs": False,
@@ -38,10 +37,6 @@ def install_espeak():
38
  print("espeak-ng and its data packages installed successfully!")
39
  else:
40
  print("espeak-ng is already installed in the system.")
41
- # Even if already installed, try to update data to ensure integrity (optional but sometimes helpful)
42
- # print("Attempting to update espeak-ng data...")
43
- # subprocess.run(["apt-get", "update"], check=True)
44
- # subprocess.run(["apt-get", "install", "--only-upgrade", "-y", "espeak-ng-data"], check=True)
45
 
46
  # Verify Chinese support (optional)
47
  try:
@@ -605,8 +600,8 @@ def vevo_style(content_wav, style_wav):
605
  print(f"Style audio shape: {style_tensor.shape}, sample rate: {style_sr}")
606
 
607
  # Save audio
608
- torchaudio.save(temp_content_path, content_tensor, content_sr)
609
- torchaudio.save(temp_style_path, style_tensor, style_sr)
610
 
611
  try:
612
  # Get pipeline
@@ -700,8 +695,8 @@ def vevo_timbre(content_wav, reference_wav):
700
  print(f"Reference audio shape: {reference_tensor.shape}, sample rate: {reference_sr}")
701
 
702
  # Save uploaded audio
703
- torchaudio.save(temp_content_path, content_tensor, content_sr)
704
- torchaudio.save(temp_reference_path, reference_tensor, reference_sr)
705
 
706
  try:
707
  # Get pipeline
@@ -820,9 +815,9 @@ def vevo_voice(content_wav, style_reference_wav, timbre_reference_wav):
820
  print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
821
 
822
  # Save uploaded audio
823
- torchaudio.save(temp_content_path, content_tensor, content_sr)
824
- torchaudio.save(temp_style_path, style_tensor, style_sr)
825
- torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr)
826
 
827
  try:
828
  # Get pipeline
@@ -893,7 +888,7 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
893
  print(f"Style reference text: {style_ref_text}, language: {style_ref_text_language}")
894
 
895
  # Save uploaded audio
896
- torchaudio.save(temp_ref_path, ref_tensor, ref_sr)
897
 
898
  if timbre_ref_wav is not None:
899
  if isinstance(timbre_ref_wav, tuple) and len(timbre_ref_wav) == 2:
@@ -918,7 +913,7 @@ def vevo_tts(text, ref_wav, timbre_ref_wav=None, style_ref_text=None, src_langua
918
  timbre_tensor = timbre_tensor / (torch.max(torch.abs(timbre_tensor)) + 1e-6) * 0.95
919
 
920
  print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
921
- torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr)
922
  else:
923
  raise ValueError("Invalid timbre reference audio format")
924
  else:
@@ -1046,4 +1041,4 @@ with gr.Blocks(title="Vevo: Controllable Zero-Shot Voice Imitation with Self-Sup
1046
  """)
1047
 
1048
  # Launch application
1049
- demo.launch()
 
12
  import re
13
  import spaces
14
 
 
15
  # Create a global variable to track downloaded resources
16
  downloaded_resources = {
17
  "configs": False,
 
37
  print("espeak-ng and its data packages installed successfully!")
38
  else:
39
  print("espeak-ng is already installed in the system.")
 
 
 
 
40
 
41
  # Verify Chinese support (optional)
42
  try:
 
600
  print(f"Style audio shape: {style_tensor.shape}, sample rate: {style_sr}")
601
 
602
  # Save audio
603
+ torchaudio.save(temp_content_path, content_tensor, content_sr, backend="soundfile")
604
+ torchaudio.save(temp_style_path, style_tensor, style_sr, backend="soundfile")
605
 
606
  try:
607
  # Get pipeline
 
695
  print(f"Reference audio shape: {reference_tensor.shape}, sample rate: {reference_sr}")
696
 
697
  # Save uploaded audio
698
+ torchaudio.save(temp_content_path, content_tensor, content_sr, backend="soundfile")
699
+ torchaudio.save(temp_reference_path, reference_tensor, reference_sr, backend="soundfile")
700
 
701
  try:
702
  # Get pipeline
 
815
  print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
816
 
817
  # Save uploaded audio
818
+ torchaudio.save(temp_content_path, content_tensor, content_sr, backend="soundfile")
819
+ torchaudio.save(temp_style_path, style_tensor, style_sr, backend="soundfile")
820
+ torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr, backend="soundfile")
821
 
822
  try:
823
  # Get pipeline
 
888
  print(f"Style reference text: {style_ref_text}, language: {style_ref_text_language}")
889
 
890
  # Save uploaded audio
891
+ torchaudio.save(temp_ref_path, ref_tensor, ref_sr, backend="soundfile")
892
 
893
  if timbre_ref_wav is not None:
894
  if isinstance(timbre_ref_wav, tuple) and len(timbre_ref_wav) == 2:
 
913
  timbre_tensor = timbre_tensor / (torch.max(torch.abs(timbre_tensor)) + 1e-6) * 0.95
914
 
915
  print(f"Timbre reference audio shape: {timbre_tensor.shape}, sample rate: {timbre_sr}")
916
+ torchaudio.save(temp_timbre_path, timbre_tensor, timbre_sr, backend="soundfile")
917
  else:
918
  raise ValueError("Invalid timbre reference audio format")
919
  else:
 
1041
  """)
1042
 
1043
  # Launch application
1044
+ demo.launch()