hugofloresgarcia commited on
Commit
ace23de
·
1 Parent(s): 2760947

Fix audio saving: use soundfile instead of torchaudio to avoid torchcodec dependency

Browse files
Files changed (2) hide show
  1. app.py +12 -5
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import torch
2
- import torchaudio
3
  import gradio as gr
4
  import os
 
 
5
  from stable_audio_tools import get_pretrained_model
6
  from stable_audio_tools.inference.generation import generate_diffusion_cond
7
  from huggingface_hub import login
@@ -77,16 +78,22 @@ def generate_audio(prompt, seconds_total=11):
77
  # Extract single variation: [channels, samples]
78
  audio = output[i] # Shape: [channels, samples]
79
 
80
- # Peak normalize, clip, convert to int16
81
  audio = audio.to(torch.float32)
82
  audio_max = torch.max(torch.abs(audio))
83
  if audio_max > 0:
84
  audio = audio.div(audio_max)
85
- audio = audio.clamp(-1, 1).mul(32767).to(torch.int16).cpu()
86
 
87
- # Save to temporary file
 
 
 
 
 
 
88
  filename = f"output_variation_{i+1}.wav"
89
- torchaudio.save(filename, audio, sample_rate)
90
  audio_files.append(filename)
91
 
92
  return audio_files, f"Generated 4 variations for: '{prompt}'"
 
1
  import torch
 
2
  import gradio as gr
3
  import os
4
+ import soundfile as sf
5
+ import numpy as np
6
  from stable_audio_tools import get_pretrained_model
7
  from stable_audio_tools.inference.generation import generate_diffusion_cond
8
  from huggingface_hub import login
 
78
  # Extract single variation: [channels, samples]
79
  audio = output[i] # Shape: [channels, samples]
80
 
81
+ # Peak normalize, clip, convert to float32 numpy array
82
  audio = audio.to(torch.float32)
83
  audio_max = torch.max(torch.abs(audio))
84
  if audio_max > 0:
85
  audio = audio.div(audio_max)
86
+ audio = audio.clamp(-1, 1).cpu().numpy()
87
 
88
+ # Transpose to [samples, channels] for soundfile
89
+ if audio.ndim == 1:
90
+ audio = audio.reshape(-1, 1)
91
+ else:
92
+ audio = audio.T # [channels, samples] -> [samples, channels]
93
+
94
+ # Save to temporary file using soundfile
95
  filename = f"output_variation_{i+1}.wav"
96
+ sf.write(filename, audio, sample_rate)
97
  audio_files.append(filename)
98
 
99
  return audio_files, f"Generated 4 variations for: '{prompt}'"
requirements.txt CHANGED
@@ -2,6 +2,8 @@
2
  torch>=2.5.1
3
  torchaudio>=2.5.1
4
  gradio>=5.20.0
 
 
5
  einops
6
  einops-exts
7
  safetensors
 
2
  torch>=2.5.1
3
  torchaudio>=2.5.1
4
  gradio>=5.20.0
5
+ soundfile
6
+ numpy
7
  einops
8
  einops-exts
9
  safetensors