Commit
·
ace23de
1
Parent(s):
2760947
Fix audio saving: use soundfile instead of torchaudio to avoid torchcodec dependency
Browse files- app.py +12 -5
- requirements.txt +2 -0
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import torch
|
| 2 |
-
import torchaudio
|
| 3 |
import gradio as gr
|
| 4 |
import os
|
|
|
|
|
|
|
| 5 |
from stable_audio_tools import get_pretrained_model
|
| 6 |
from stable_audio_tools.inference.generation import generate_diffusion_cond
|
| 7 |
from huggingface_hub import login
|
|
@@ -77,16 +78,22 @@ def generate_audio(prompt, seconds_total=11):
|
|
| 77 |
# Extract single variation: [channels, samples]
|
| 78 |
audio = output[i] # Shape: [channels, samples]
|
| 79 |
|
| 80 |
-
# Peak normalize, clip, convert to
|
| 81 |
audio = audio.to(torch.float32)
|
| 82 |
audio_max = torch.max(torch.abs(audio))
|
| 83 |
if audio_max > 0:
|
| 84 |
audio = audio.div(audio_max)
|
| 85 |
-
audio = audio.clamp(-1, 1).
|
| 86 |
|
| 87 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
filename = f"output_variation_{i+1}.wav"
|
| 89 |
-
|
| 90 |
audio_files.append(filename)
|
| 91 |
|
| 92 |
return audio_files, f"Generated 4 variations for: '{prompt}'"
|
|
|
|
| 1 |
import torch
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import os
|
| 4 |
+
import soundfile as sf
|
| 5 |
+
import numpy as np
|
| 6 |
from stable_audio_tools import get_pretrained_model
|
| 7 |
from stable_audio_tools.inference.generation import generate_diffusion_cond
|
| 8 |
from huggingface_hub import login
|
|
|
|
| 78 |
# Extract single variation: [channels, samples]
|
| 79 |
audio = output[i] # Shape: [channels, samples]
|
| 80 |
|
| 81 |
+
# Peak normalize, clip, convert to float32 numpy array
|
| 82 |
audio = audio.to(torch.float32)
|
| 83 |
audio_max = torch.max(torch.abs(audio))
|
| 84 |
if audio_max > 0:
|
| 85 |
audio = audio.div(audio_max)
|
| 86 |
+
audio = audio.clamp(-1, 1).cpu().numpy()
|
| 87 |
|
| 88 |
+
# Transpose to [samples, channels] for soundfile
|
| 89 |
+
if audio.ndim == 1:
|
| 90 |
+
audio = audio.reshape(-1, 1)
|
| 91 |
+
else:
|
| 92 |
+
audio = audio.T # [channels, samples] -> [samples, channels]
|
| 93 |
+
|
| 94 |
+
# Save to temporary file using soundfile
|
| 95 |
filename = f"output_variation_{i+1}.wav"
|
| 96 |
+
sf.write(filename, audio, sample_rate)
|
| 97 |
audio_files.append(filename)
|
| 98 |
|
| 99 |
return audio_files, f"Generated 4 variations for: '{prompt}'"
|
requirements.txt
CHANGED
|
@@ -2,6 +2,8 @@
|
|
| 2 |
torch>=2.5.1
|
| 3 |
torchaudio>=2.5.1
|
| 4 |
gradio>=5.20.0
|
|
|
|
|
|
|
| 5 |
einops
|
| 6 |
einops-exts
|
| 7 |
safetensors
|
|
|
|
| 2 |
torch>=2.5.1
|
| 3 |
torchaudio>=2.5.1
|
| 4 |
gradio>=5.20.0
|
| 5 |
+
soundfile
|
| 6 |
+
numpy
|
| 7 |
einops
|
| 8 |
einops-exts
|
| 9 |
safetensors
|