Spaces:

atalink
/

TTS-Talker

Runtime error

congcuong-cse commited on Sep 21

Commit

c4568cc

1 Parent(s): 7a6836d

Add logging for infer_batch_process

Files changed (1) hide show

src/f5_tts/infer/utils_infer.py CHANGED Viewed

@@ -456,20 +456,34 @@ def infer_batch_process(
     fix_duration=None,
     device=None,
 ):
     audio, sr = ref_audio
     if audio.shape[0] > 1:
         audio = torch.mean(audio, dim=0, keepdim=True)
     rms = torch.sqrt(torch.mean(torch.square(audio)))
     if rms < target_rms:
         audio = audio * target_rms / rms
     if sr != target_sample_rate:
         resampler = torchaudio.transforms.Resample(sr, target_sample_rate)
         audio = resampler(audio)
     audio = audio.to(device)
     generated_waves = []
     spectrograms = []
     if len(ref_text[-1].encode("utf-8")) == 1:
         ref_text = ref_text + " "

     fix_duration=None,
     device=None,
 ):
+    print("Starting audio preprocessing...")
     audio, sr = ref_audio
+    print(f"Original audio shape: {audio.shape}, sample rate: {sr}")
     if audio.shape[0] > 1:
+        print("Converting multi-channel audio to mono...")
         audio = torch.mean(audio, dim=0, keepdim=True)
+        print(f"Converted audio shape: {audio.shape}")
     rms = torch.sqrt(torch.mean(torch.square(audio)))
+    print(f"Calculated RMS: {rms}")
     if rms < target_rms:
+        print(f"Normalizing audio RMS to target RMS: {target_rms}")
         audio = audio * target_rms / rms
     if sr != target_sample_rate:
+        print(f"Resampling audio from {sr} Hz to {target_sample_rate} Hz...")
         resampler = torchaudio.transforms.Resample(sr, target_sample_rate)
         audio = resampler(audio)
+        print("Resampling complete.")
     audio = audio.to(device)
+    print(f"Audio moved to device: {device}")
     generated_waves = []
     spectrograms = []
+    print("Initialized containers for generated waves and spectrograms.")
     if len(ref_text[-1].encode("utf-8")) == 1:
         ref_text = ref_text + " "