Opera8 commited on
Commit
c8c87cd
·
verified ·
1 Parent(s): 6517d62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -14,7 +14,7 @@ import spaces
14
  import uuid
15
  import soundfile as sf
16
 
17
- # فقط منابع ضروری
18
  downloaded_resources = {
19
  "configs": False,
20
  "tokenizer_vq8192": False,
@@ -86,7 +86,7 @@ os.makedirs("ckpts/Vevo", exist_ok=True)
86
 
87
  from models.vc.vevo.vevo_utils import VevoInferencePipeline
88
 
89
- # تابع ذخیره سازی امن
90
  def my_save_audio(waveform, output_path, sample_rate=24000):
91
  try:
92
  if isinstance(waveform, torch.Tensor):
@@ -169,6 +169,7 @@ def get_pipeline():
169
 
170
  @spaces.GPU()
171
  def vevo_timbre(content_wav, reference_wav):
 
172
  session_id = str(uuid.uuid4())[:8]
173
  temp_content_path = f"wav/c_{session_id}.wav"
174
  temp_reference_path = f"wav/r_{session_id}.wav"
@@ -178,6 +179,7 @@ def vevo_timbre(content_wav, reference_wav):
178
  raise ValueError("Please upload audio files")
179
 
180
  try:
 
181
  if isinstance(content_wav, tuple):
182
  content_sr, content_data = content_wav if isinstance(content_wav[0], int) else (content_wav[1], content_wav[0])
183
  else:
@@ -187,12 +189,15 @@ def vevo_timbre(content_wav, reference_wav):
187
  content_data = np.mean(content_data, axis=1)
188
 
189
  content_tensor = torch.FloatTensor(content_data).unsqueeze(0)
 
 
190
  if content_sr != 24000:
191
  content_tensor = torchaudio.functional.resample(content_tensor, content_sr, 24000)
192
  content_sr = 24000
193
 
194
  content_tensor = content_tensor / (torch.max(torch.abs(content_tensor)) + 1e-6) * 0.95
195
 
 
196
  if isinstance(reference_wav, tuple):
197
  ref_sr, ref_data = reference_wav if isinstance(reference_wav[0], int) else (reference_wav[1], reference_wav[0])
198
  else:
@@ -208,11 +213,11 @@ def vevo_timbre(content_wav, reference_wav):
208
 
209
  ref_tensor = ref_tensor / (torch.max(torch.abs(ref_tensor)) + 1e-6) * 0.95
210
 
211
- # استفاده از soundfile برای ذخیره موقت
212
  sf.write(temp_content_path, content_tensor.squeeze().cpu().numpy(), content_sr)
213
  sf.write(temp_reference_path, ref_tensor.squeeze().cpu().numpy(), ref_sr)
214
 
215
- print(f"[{session_id}] Processing Audio...")
216
 
217
  pipeline = get_pipeline()
218
 
@@ -226,6 +231,7 @@ def vevo_timbre(content_wav, reference_wav):
226
  print("Warning: NaN fixed")
227
  gen_audio = torch.nan_to_num(gen_audio, nan=0.0, posinf=0.95, neginf=-0.95)
228
 
 
229
  my_save_audio(gen_audio, output_path=output_path)
230
  return output_path
231
 
 
14
  import uuid
15
  import soundfile as sf
16
 
17
+ # منابع ضروری
18
  downloaded_resources = {
19
  "configs": False,
20
  "tokenizer_vq8192": False,
 
86
 
87
  from models.vc.vevo.vevo_utils import VevoInferencePipeline
88
 
89
+ # تابع ذخیره سازی امن (جایگزین torchaudio)
90
  def my_save_audio(waveform, output_path, sample_rate=24000):
91
  try:
92
  if isinstance(waveform, torch.Tensor):
 
169
 
170
  @spaces.GPU()
171
  def vevo_timbre(content_wav, reference_wav):
172
+ # تولید نام فایل امن
173
  session_id = str(uuid.uuid4())[:8]
174
  temp_content_path = f"wav/c_{session_id}.wav"
175
  temp_reference_path = f"wav/r_{session_id}.wav"
 
179
  raise ValueError("Please upload audio files")
180
 
181
  try:
182
+ # --- پردازش صدای اصلی ---
183
  if isinstance(content_wav, tuple):
184
  content_sr, content_data = content_wav if isinstance(content_wav[0], int) else (content_wav[1], content_wav[0])
185
  else:
 
189
  content_data = np.mean(content_data, axis=1)
190
 
191
  content_tensor = torch.FloatTensor(content_data).unsqueeze(0)
192
+
193
+ # ریسمپل با torchaudio (اینجا ارور نمیده چون ذخیره نمیکنیم، فقط پردازش میکنیم)
194
  if content_sr != 24000:
195
  content_tensor = torchaudio.functional.resample(content_tensor, content_sr, 24000)
196
  content_sr = 24000
197
 
198
  content_tensor = content_tensor / (torch.max(torch.abs(content_tensor)) + 1e-6) * 0.95
199
 
200
+ # --- پردازش صدای رفرنس ---
201
  if isinstance(reference_wav, tuple):
202
  ref_sr, ref_data = reference_wav if isinstance(reference_wav[0], int) else (reference_wav[1], reference_wav[0])
203
  else:
 
213
 
214
  ref_tensor = ref_tensor / (torch.max(torch.abs(ref_tensor)) + 1e-6) * 0.95
215
 
216
+ # ذخیره موقت با soundfile (برای جلوگیری از ارور TorchCodec)
217
  sf.write(temp_content_path, content_tensor.squeeze().cpu().numpy(), content_sr)
218
  sf.write(temp_reference_path, ref_tensor.squeeze().cpu().numpy(), ref_sr)
219
 
220
+ print(f"[{session_id}] Processing...")
221
 
222
  pipeline = get_pipeline()
223
 
 
231
  print("Warning: NaN fixed")
232
  gen_audio = torch.nan_to_num(gen_audio, nan=0.0, posinf=0.95, neginf=-0.95)
233
 
234
+ # ذخیره نهایی با soundfile
235
  my_save_audio(gen_audio, output_path=output_path)
236
  return output_path
237