Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,24 +15,24 @@ class AIHumanizer:
|
|
| 15 |
try:
|
| 16 |
print(f"Loading audio from: {audio_path}")
|
| 17 |
|
| 18 |
-
# Load the full song
|
| 19 |
y, sr = librosa.load(audio_path, sr=None, mono=False)
|
| 20 |
|
| 21 |
-
print(f"Audio loaded: shape={y.shape
|
| 22 |
|
| 23 |
# If stereo, process both channels
|
| 24 |
if len(y.shape) > 1:
|
| 25 |
print("Processing stereo audio...")
|
| 26 |
processed_channels = []
|
| 27 |
-
for i
|
| 28 |
print(f"Processing channel {i+1}...")
|
| 29 |
-
processed_channel = self.process_channel(
|
| 30 |
processed_channels.append(processed_channel)
|
| 31 |
y_processed = np.array(processed_channels)
|
| 32 |
else:
|
| 33 |
print("Processing mono audio...")
|
| 34 |
y_processed = self.process_channel(y, sr, intensity)
|
| 35 |
-
y_processed = np.array([y_processed])
|
| 36 |
|
| 37 |
print("Audio processing completed successfully")
|
| 38 |
return y_processed, sr
|
|
@@ -43,232 +43,235 @@ class AIHumanizer:
|
|
| 43 |
|
| 44 |
def process_channel(self, y, sr, intensity):
|
| 45 |
"""Process a single audio channel to remove AI artifacts"""
|
| 46 |
-
print(f"Processing channel: {len(y)} samples
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
# 1. Reduce robotic frequencies
|
| 49 |
-
|
| 50 |
|
| 51 |
# 2. Add timing variations
|
| 52 |
-
|
| 53 |
|
| 54 |
# 3. Add pitch variations
|
| 55 |
-
|
| 56 |
|
| 57 |
# 4. Add room ambiance
|
| 58 |
-
|
| 59 |
|
| 60 |
# 5. Add analog warmth
|
| 61 |
-
|
| 62 |
|
| 63 |
# 6. Reduce perfect quantization
|
| 64 |
-
|
| 65 |
|
| 66 |
-
return
|
| 67 |
|
| 68 |
def reduce_ai_artifacts(self, y, sr, intensity):
|
| 69 |
"""Reduce common AI audio artifacts"""
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
| 78 |
return y
|
| 79 |
|
| 80 |
def add_timing_variations(self, y, sr, intensity):
|
| 81 |
"""Add subtle timing variations"""
|
| 82 |
-
if intensity < 0.
|
| 83 |
return y
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
segment = y[i:i+segment_size]
|
| 91 |
-
if len(segment) > 100: # Only process if segment is long enough
|
| 92 |
-
# Small speed variation
|
| 93 |
-
speed_var = 1.0 + np.random.normal(0, 0.004 * intensity)
|
| 94 |
-
new_length = int(len(segment) / speed_var)
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
#
|
| 103 |
-
if len(
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied)))
|
| 108 |
|
| 109 |
-
segments.append(
|
| 110 |
else:
|
| 111 |
segments.append(segment)
|
| 112 |
-
|
| 113 |
-
segments.append(segment)
|
| 114 |
-
|
| 115 |
-
if segments:
|
| 116 |
return np.concatenate(segments)
|
| 117 |
-
|
|
|
|
| 118 |
|
| 119 |
def add_pitch_variations(self, y, sr, intensity):
|
| 120 |
"""Add subtle pitch variations"""
|
| 121 |
-
if intensity < 0.
|
| 122 |
return y
|
| 123 |
|
| 124 |
try:
|
| 125 |
-
#
|
| 126 |
-
n_steps = np.random.
|
| 127 |
-
y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps
|
| 128 |
|
| 129 |
-
# Blend
|
| 130 |
-
blend_factor = 0.
|
| 131 |
return y * (1 - blend_factor) + y_shifted * blend_factor
|
| 132 |
except:
|
| 133 |
return y
|
| 134 |
|
| 135 |
def add_room_ambiance(self, y, sr, intensity):
|
| 136 |
"""Add natural room reverb"""
|
| 137 |
-
if intensity < 0.
|
| 138 |
-
return y
|
| 139 |
-
|
| 140 |
-
# Simple impulse response for natural room
|
| 141 |
-
impulse_length = int(0.2 * sr) # 200ms reverb
|
| 142 |
-
if impulse_length < 10:
|
| 143 |
return y
|
| 144 |
|
| 145 |
-
impulse = np.zeros(impulse_length)
|
| 146 |
-
|
| 147 |
-
# Early reflections
|
| 148 |
-
early_reflections = int(0.01 * sr) # 10ms
|
| 149 |
-
if early_reflections < len(impulse):
|
| 150 |
-
impulse[early_reflections] = 0.6
|
| 151 |
-
|
| 152 |
-
# Late reverb tail
|
| 153 |
-
reverb_start = min(early_reflections + 1, len(impulse))
|
| 154 |
-
if reverb_start < len(impulse):
|
| 155 |
-
tail_length = len(impulse) - reverb_start
|
| 156 |
-
decay = np.exp(-np.linspace(0, 8, tail_length))
|
| 157 |
-
impulse[reverb_start:] = decay * 0.3
|
| 158 |
-
|
| 159 |
-
# Normalize impulse
|
| 160 |
-
if np.max(np.abs(impulse)) > 0:
|
| 161 |
-
impulse = impulse / np.max(np.abs(impulse))
|
| 162 |
-
|
| 163 |
-
# Apply convolution
|
| 164 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
y_reverb = signal.convolve(y, impulse, mode='same')
|
| 166 |
-
# Normalize
|
| 167 |
if np.max(np.abs(y_reverb)) > 0:
|
| 168 |
y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
|
| 169 |
|
| 170 |
-
# Blend
|
| 171 |
-
blend_factor = 0.
|
| 172 |
return y * (1 - blend_factor) + y_reverb * blend_factor
|
| 173 |
except:
|
| 174 |
return y
|
| 175 |
|
| 176 |
def add_analog_warmth(self, y, sr, intensity):
|
| 177 |
"""Add analog-style warmth"""
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
# Add subtle warmth with EQ
|
| 183 |
try:
|
| 184 |
-
#
|
| 185 |
-
|
| 186 |
-
y_warm =
|
| 187 |
|
| 188 |
-
#
|
| 189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
return y * (1 - blend_factor) + y_warm * blend_factor
|
| 191 |
except:
|
| 192 |
-
return
|
| 193 |
|
| 194 |
def reduce_perfect_quantization(self, y, sr, intensity):
|
| 195 |
-
"""Reduce perfectly quantized timing
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
#
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
# Combine variations
|
| 208 |
-
total_variation = amplitude_variation * random_variation
|
| 209 |
|
| 210 |
-
|
|
|
|
| 211 |
|
| 212 |
-
def humanize_song(
|
| 213 |
"""Main humanization function"""
|
| 214 |
-
if
|
| 215 |
return None, "Please upload an audio file"
|
| 216 |
|
| 217 |
humanizer = AIHumanizer()
|
| 218 |
|
| 219 |
try:
|
| 220 |
-
print("Starting humanization
|
| 221 |
|
| 222 |
-
#
|
| 223 |
-
|
| 224 |
|
| 225 |
-
|
|
|
|
| 226 |
|
| 227 |
-
|
| 228 |
-
output_path = tempfile.mktemp(suffix='_humanized.wav')
|
| 229 |
|
| 230 |
-
#
|
| 231 |
-
|
| 232 |
-
audio_data = audio_data.T # Transpose for soundfile
|
| 233 |
|
| 234 |
-
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
-
|
|
|
|
| 238 |
|
| 239 |
except Exception as e:
|
| 240 |
error_msg = f"β Error: {str(e)}"
|
| 241 |
-
print(error_msg)
|
| 242 |
return None, error_msg
|
| 243 |
|
| 244 |
-
# Simple and
|
| 245 |
with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
|
| 246 |
gr.Markdown("""
|
| 247 |
# π΅ AI Song Humanizer
|
| 248 |
-
**Remove AI Detection - Make
|
| 249 |
|
| 250 |
-
*Upload
|
| 251 |
""")
|
| 252 |
|
| 253 |
with gr.Row():
|
| 254 |
with gr.Column(scale=1):
|
| 255 |
gr.Markdown("### 1. Upload AI Song")
|
| 256 |
input_audio = gr.Audio(
|
| 257 |
-
sources=["upload"
|
| 258 |
type="filepath",
|
| 259 |
-
label="Upload your
|
| 260 |
-
editable=True
|
| 261 |
)
|
| 262 |
|
| 263 |
gr.Markdown("### 2. Humanization Strength")
|
| 264 |
intensity = gr.Slider(
|
| 265 |
0.1, 1.0, value=0.7,
|
| 266 |
-
label="How much human feel to add"
|
| 267 |
-
info="Lower = subtle, Higher = more natural/organic"
|
| 268 |
)
|
| 269 |
|
| 270 |
process_btn = gr.Button(
|
| 271 |
-
"
|
| 272 |
variant="primary",
|
| 273 |
size="lg"
|
| 274 |
)
|
|
@@ -276,43 +279,27 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
|
|
| 276 |
with gr.Column(scale=1):
|
| 277 |
gr.Markdown("### 3. Download Result")
|
| 278 |
output_audio = gr.Audio(
|
| 279 |
-
label="
|
| 280 |
-
type="filepath"
|
| 281 |
-
interactive=False
|
| 282 |
)
|
| 283 |
|
| 284 |
status = gr.Textbox(
|
| 285 |
label="Status",
|
| 286 |
-
interactive=False
|
| 287 |
-
max_lines=3
|
| 288 |
)
|
| 289 |
|
| 290 |
-
with gr.Accordion("
|
| 291 |
gr.Markdown("""
|
| 292 |
-
**This tool
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
-
|
| 296 |
-
-
|
| 297 |
-
-
|
| 298 |
-
- Everything you created
|
| 299 |
-
|
| 300 |
-
ποΈ **Removes AI Artifacts:**
|
| 301 |
-
- Robotic/metallic frequencies
|
| 302 |
-
- Perfect digital quantization
|
| 303 |
-
- Sterile, artificial sound
|
| 304 |
-
- AI-generated frequency patterns
|
| 305 |
-
|
| 306 |
-
π΅ **Adds Human Elements:**
|
| 307 |
-
- Natural timing variations
|
| 308 |
-
- Subtle pitch fluctuations
|
| 309 |
-
- Room ambiance and warmth
|
| 310 |
-
- Analog-style character
|
| 311 |
|
| 312 |
-
**
|
| 313 |
""")
|
| 314 |
|
| 315 |
-
# Processing function
|
| 316 |
process_btn.click(
|
| 317 |
fn=humanize_song,
|
| 318 |
inputs=[input_audio, intensity],
|
|
@@ -320,4 +307,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
|
|
| 320 |
)
|
| 321 |
|
| 322 |
if __name__ == "__main__":
|
| 323 |
-
demo.launch(
|
|
|
|
| 15 |
try:
|
| 16 |
print(f"Loading audio from: {audio_path}")
|
| 17 |
|
| 18 |
+
# Load the full song
|
| 19 |
y, sr = librosa.load(audio_path, sr=None, mono=False)
|
| 20 |
|
| 21 |
+
print(f"Audio loaded: shape={y.shape if hasattr(y, 'shape') else 'mono'}, sr={sr}")
|
| 22 |
|
| 23 |
# If stereo, process both channels
|
| 24 |
if len(y.shape) > 1:
|
| 25 |
print("Processing stereo audio...")
|
| 26 |
processed_channels = []
|
| 27 |
+
for i in range(y.shape[0]):
|
| 28 |
print(f"Processing channel {i+1}...")
|
| 29 |
+
processed_channel = self.process_channel(y[i], sr, intensity)
|
| 30 |
processed_channels.append(processed_channel)
|
| 31 |
y_processed = np.array(processed_channels)
|
| 32 |
else:
|
| 33 |
print("Processing mono audio...")
|
| 34 |
y_processed = self.process_channel(y, sr, intensity)
|
| 35 |
+
y_processed = np.array([y_processed])
|
| 36 |
|
| 37 |
print("Audio processing completed successfully")
|
| 38 |
return y_processed, sr
|
|
|
|
| 43 |
|
| 44 |
def process_channel(self, y, sr, intensity):
|
| 45 |
"""Process a single audio channel to remove AI artifacts"""
|
| 46 |
+
print(f"Processing channel: {len(y)} samples")
|
| 47 |
+
|
| 48 |
+
# Store original for blending
|
| 49 |
+
y_original = y.copy()
|
| 50 |
|
| 51 |
# 1. Reduce robotic frequencies
|
| 52 |
+
y = self.reduce_ai_artifacts(y, sr, intensity)
|
| 53 |
|
| 54 |
# 2. Add timing variations
|
| 55 |
+
y = self.add_timing_variations(y, sr, intensity)
|
| 56 |
|
| 57 |
# 3. Add pitch variations
|
| 58 |
+
y = self.add_pitch_variations(y, sr, intensity)
|
| 59 |
|
| 60 |
# 4. Add room ambiance
|
| 61 |
+
y = self.add_room_ambiance(y, sr, intensity)
|
| 62 |
|
| 63 |
# 5. Add analog warmth
|
| 64 |
+
y = self.add_analog_warmth(y, sr, intensity)
|
| 65 |
|
| 66 |
# 6. Reduce perfect quantization
|
| 67 |
+
y = self.reduce_perfect_quantization(y, sr, intensity)
|
| 68 |
|
| 69 |
+
return y
|
| 70 |
|
| 71 |
def reduce_ai_artifacts(self, y, sr, intensity):
|
| 72 |
"""Reduce common AI audio artifacts"""
|
| 73 |
+
if sr > 4000 and intensity > 0.1:
|
| 74 |
+
try:
|
| 75 |
+
# Reduce harsh frequencies in the 2kHz-6kHz range
|
| 76 |
+
sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
|
| 77 |
+
y_filtered = signal.sosfilt(sos, y)
|
| 78 |
+
|
| 79 |
+
# Blend with original
|
| 80 |
+
blend_factor = 0.3 * intensity
|
| 81 |
+
return y * (1 - blend_factor) + y_filtered * blend_factor
|
| 82 |
+
except:
|
| 83 |
+
return y
|
| 84 |
return y
|
| 85 |
|
| 86 |
def add_timing_variations(self, y, sr, intensity):
|
| 87 |
"""Add subtle timing variations"""
|
| 88 |
+
if intensity < 0.2:
|
| 89 |
return y
|
| 90 |
|
| 91 |
+
try:
|
| 92 |
+
# Simple approach: small random stretches
|
| 93 |
+
segment_size = int(sr * 1.0) # 1-second segments
|
| 94 |
+
if len(y) < segment_size * 2:
|
| 95 |
+
return y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
+
segments = []
|
| 98 |
+
for i in range(0, len(y), segment_size):
|
| 99 |
+
segment = y[i:i+segment_size]
|
| 100 |
+
if len(segment) == segment_size:
|
| 101 |
+
# Small random stretch
|
| 102 |
+
stretch = 1.0 + np.random.uniform(-0.01, 0.01) * intensity
|
| 103 |
+
new_len = int(segment_size * stretch)
|
| 104 |
+
|
| 105 |
+
# Resample
|
| 106 |
+
x_old = np.linspace(0, 1, segment_size)
|
| 107 |
+
x_new = np.linspace(0, 1, new_len)
|
| 108 |
+
segment_stretched = np.interp(x_new, x_old, segment)
|
| 109 |
|
| 110 |
+
# Trim or pad to original length
|
| 111 |
+
if len(segment_stretched) > segment_size:
|
| 112 |
+
segment_stretched = segment_stretched[:segment_size]
|
| 113 |
+
else:
|
| 114 |
+
segment_stretched = np.pad(segment_stretched, (0, segment_size - len(segment_stretched)))
|
|
|
|
| 115 |
|
| 116 |
+
segments.append(segment_stretched)
|
| 117 |
else:
|
| 118 |
segments.append(segment)
|
| 119 |
+
|
|
|
|
|
|
|
|
|
|
| 120 |
return np.concatenate(segments)
|
| 121 |
+
except:
|
| 122 |
+
return y
|
| 123 |
|
| 124 |
def add_pitch_variations(self, y, sr, intensity):
|
| 125 |
"""Add subtle pitch variations"""
|
| 126 |
+
if intensity < 0.3:
|
| 127 |
return y
|
| 128 |
|
| 129 |
try:
|
| 130 |
+
# Small random pitch shifts
|
| 131 |
+
n_steps = np.random.uniform(-0.2, 0.2) * intensity
|
| 132 |
+
y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
|
| 133 |
|
| 134 |
+
# Blend
|
| 135 |
+
blend_factor = 0.2 * intensity
|
| 136 |
return y * (1 - blend_factor) + y_shifted * blend_factor
|
| 137 |
except:
|
| 138 |
return y
|
| 139 |
|
| 140 |
def add_room_ambiance(self, y, sr, intensity):
|
| 141 |
"""Add natural room reverb"""
|
| 142 |
+
if intensity < 0.2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
return y
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
try:
|
| 146 |
+
# Simple reverb impulse
|
| 147 |
+
impulse_len = int(0.15 * sr)
|
| 148 |
+
if impulse_len < 10:
|
| 149 |
+
return y
|
| 150 |
+
|
| 151 |
+
impulse = np.zeros(impulse_len)
|
| 152 |
+
# Early reflection
|
| 153 |
+
early = int(0.01 * sr)
|
| 154 |
+
if early < impulse_len:
|
| 155 |
+
impulse[early] = 0.8
|
| 156 |
+
# Reverb tail
|
| 157 |
+
tail_start = min(early + 1, impulse_len)
|
| 158 |
+
if tail_start < impulse_len:
|
| 159 |
+
tail_len = impulse_len - tail_start
|
| 160 |
+
decay = np.exp(-np.linspace(0, 6, tail_len))
|
| 161 |
+
impulse[tail_start:] = decay * 0.4
|
| 162 |
+
|
| 163 |
+
# Apply convolution
|
| 164 |
y_reverb = signal.convolve(y, impulse, mode='same')
|
| 165 |
+
# Normalize
|
| 166 |
if np.max(np.abs(y_reverb)) > 0:
|
| 167 |
y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
|
| 168 |
|
| 169 |
+
# Blend
|
| 170 |
+
blend_factor = 0.1 * intensity
|
| 171 |
return y * (1 - blend_factor) + y_reverb * blend_factor
|
| 172 |
except:
|
| 173 |
return y
|
| 174 |
|
| 175 |
def add_analog_warmth(self, y, sr, intensity):
|
| 176 |
"""Add analog-style warmth"""
|
| 177 |
+
if intensity < 0.1:
|
| 178 |
+
return y
|
| 179 |
+
|
|
|
|
|
|
|
| 180 |
try:
|
| 181 |
+
# Soft clipping
|
| 182 |
+
saturation = 1.0 + 0.4 * intensity
|
| 183 |
+
y_warm = np.tanh(y * saturation) / saturation
|
| 184 |
|
| 185 |
+
# Gentle low boost
|
| 186 |
+
if sr > 1000:
|
| 187 |
+
sos = signal.butter(2, 100, 'high', fs=sr, output='sos')
|
| 188 |
+
y_warm = signal.sosfilt(sos, y_warm)
|
| 189 |
+
|
| 190 |
+
blend_factor = 0.15 * intensity
|
| 191 |
return y * (1 - blend_factor) + y_warm * blend_factor
|
| 192 |
except:
|
| 193 |
+
return y
|
| 194 |
|
| 195 |
def reduce_perfect_quantization(self, y, sr, intensity):
|
| 196 |
+
"""Reduce perfectly quantized timing"""
|
| 197 |
+
if intensity < 0.1:
|
| 198 |
+
return y
|
| 199 |
+
|
| 200 |
+
# Add subtle amplitude variations
|
| 201 |
+
t = np.arange(len(y)) / sr
|
| 202 |
+
# Slow LFO for natural dynamics
|
| 203 |
+
lfo1 = 1.0 + np.sin(2 * np.pi * 0.3 * t) * 0.02 * intensity
|
| 204 |
+
# Faster LFO for micro-variations
|
| 205 |
+
lfo2 = 1.0 + np.sin(2 * np.pi * 2.0 * t) * 0.01 * intensity
|
| 206 |
+
# Random noise
|
| 207 |
+
noise = 1.0 + np.random.normal(0, 0.005 * intensity, len(y))
|
|
|
|
|
|
|
| 208 |
|
| 209 |
+
combined = lfo1 * lfo2 * noise
|
| 210 |
+
return y * combined
|
| 211 |
|
| 212 |
+
def humanize_song(input_audio, intensity):
|
| 213 |
"""Main humanization function"""
|
| 214 |
+
if input_audio is None:
|
| 215 |
return None, "Please upload an audio file"
|
| 216 |
|
| 217 |
humanizer = AIHumanizer()
|
| 218 |
|
| 219 |
try:
|
| 220 |
+
print("Starting humanization...")
|
| 221 |
|
| 222 |
+
# Get the file path from the audio input
|
| 223 |
+
audio_path = input_audio
|
| 224 |
|
| 225 |
+
# Process the audio
|
| 226 |
+
audio_data, sr = humanizer.humanize_audio(audio_path, intensity)
|
| 227 |
|
| 228 |
+
print(f"Processing complete. Saving audio...")
|
|
|
|
| 229 |
|
| 230 |
+
# Save as WAV
|
| 231 |
+
output_path = tempfile.mktemp(suffix='_humanized.wav')
|
|
|
|
| 232 |
|
| 233 |
+
# Handle stereo/mono properly
|
| 234 |
+
if audio_data.shape[0] == 1:
|
| 235 |
+
# Mono
|
| 236 |
+
sf.write(output_path, audio_data[0], sr)
|
| 237 |
+
else:
|
| 238 |
+
# Stereo - transpose for soundfile
|
| 239 |
+
sf.write(output_path, audio_data.T, sr)
|
| 240 |
|
| 241 |
+
print(f"Saved to: {output_path}")
|
| 242 |
+
return output_path, "β
Success! Your song now sounds human-made. Download below."
|
| 243 |
|
| 244 |
except Exception as e:
|
| 245 |
error_msg = f"β Error: {str(e)}"
|
| 246 |
+
print(f"Error details: {error_msg}")
|
| 247 |
return None, error_msg
|
| 248 |
|
| 249 |
+
# Simple and compatible interface
|
| 250 |
with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
|
| 251 |
gr.Markdown("""
|
| 252 |
# π΅ AI Song Humanizer
|
| 253 |
+
**Remove AI Detection - Make Songs Sound Human-Made**
|
| 254 |
|
| 255 |
+
*Upload AI song β Remove robotic sound β Download human version*
|
| 256 |
""")
|
| 257 |
|
| 258 |
with gr.Row():
|
| 259 |
with gr.Column(scale=1):
|
| 260 |
gr.Markdown("### 1. Upload AI Song")
|
| 261 |
input_audio = gr.Audio(
|
| 262 |
+
sources=["upload"],
|
| 263 |
type="filepath",
|
| 264 |
+
label="Upload your AI-generated song"
|
|
|
|
| 265 |
)
|
| 266 |
|
| 267 |
gr.Markdown("### 2. Humanization Strength")
|
| 268 |
intensity = gr.Slider(
|
| 269 |
0.1, 1.0, value=0.7,
|
| 270 |
+
label="How much human feel to add"
|
|
|
|
| 271 |
)
|
| 272 |
|
| 273 |
process_btn = gr.Button(
|
| 274 |
+
"β¨ Humanize Song",
|
| 275 |
variant="primary",
|
| 276 |
size="lg"
|
| 277 |
)
|
|
|
|
| 279 |
with gr.Column(scale=1):
|
| 280 |
gr.Markdown("### 3. Download Result")
|
| 281 |
output_audio = gr.Audio(
|
| 282 |
+
label="Human-Sounding Version",
|
| 283 |
+
type="filepath"
|
|
|
|
| 284 |
)
|
| 285 |
|
| 286 |
status = gr.Textbox(
|
| 287 |
label="Status",
|
| 288 |
+
interactive=False
|
|
|
|
| 289 |
)
|
| 290 |
|
| 291 |
+
with gr.Accordion("π How It Works", open=False):
|
| 292 |
gr.Markdown("""
|
| 293 |
+
**This tool:**
|
| 294 |
+
- Takes your complete AI-generated song
|
| 295 |
+
- Removes robotic/AI artifacts
|
| 296 |
+
- Adds natural human performance variations
|
| 297 |
+
- Keeps your original music intact
|
| 298 |
+
- Makes it sound like humans performed it
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
|
| 300 |
+
**Perfect for making AI music undetectable!**
|
| 301 |
""")
|
| 302 |
|
|
|
|
| 303 |
process_btn.click(
|
| 304 |
fn=humanize_song,
|
| 305 |
inputs=[input_audio, intensity],
|
|
|
|
| 307 |
)
|
| 308 |
|
| 309 |
if __name__ == "__main__":
|
| 310 |
+
demo.launch()
|