yangpeng08 commited on
Commit
4620bde
·
1 Parent(s): 3f373d0

add volume-normalization to avoid audio clippig after multiple editing

Browse files
Files changed (1) hide show
  1. tts.py +6 -0
tts.py CHANGED
@@ -367,6 +367,12 @@ class StepAudioTTS:
367
  prompt_wav, prompt_wav_sr = torchaudio.load(prompt_wav_path)
368
  if prompt_wav.shape[0] > 1:
369
  prompt_wav = prompt_wav.mean(dim=0, keepdim=True) # 将多通道音频转换为单通道
 
 
 
 
 
 
370
  speech_feat, speech_feat_len = self.cosy_model.frontend.extract_speech_feat(
371
  prompt_wav, prompt_wav_sr
372
  )
 
367
  prompt_wav, prompt_wav_sr = torchaudio.load(prompt_wav_path)
368
  if prompt_wav.shape[0] > 1:
369
  prompt_wav = prompt_wav.mean(dim=0, keepdim=True) # 将多通道音频转换为单通道
370
+
371
+ # volume-normalize avoid clipping
372
+ norm = torch.max(torch.abs(prompt_wav), dim=1, keepdim=True)[0]
373
+ if norm > 0.6: # hard code; max absolute value is 0.6
374
+ prompt_wav = prompt_wav / norm * 0.6
375
+
376
  speech_feat, speech_feat_len = self.cosy_model.frontend.extract_speech_feat(
377
  prompt_wav, prompt_wav_sr
378
  )