Spaces:
Sleeping
Sleeping
File size: 10,755 Bytes
07bbd8c 6f55663 14849b4 20ef8ef 07bbd8c 14849b4 07bbd8c 14849b4 71c15a0 14849b4 07bbd8c 20ef8ef 058c00b 20ef8ef 058c00b 71c15a0 14849b4 20ef8ef 14849b4 058c00b 20ef8ef 058c00b 14849b4 20ef8ef 14849b4 20ef8ef 14849b4 058c00b 71c15a0 20ef8ef 14849b4 71c15a0 20ef8ef 14849b4 6b3afc0 14849b4 058c00b 20ef8ef 058c00b 14849b4 20ef8ef 058c00b 14849b4 20ef8ef 058c00b 14849b4 20ef8ef 058c00b 14849b4 20ef8ef 058c00b 14849b4 058c00b 14849b4 058c00b 6b3afc0 14849b4 058c00b 20ef8ef 6b3afc0 14849b4 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 6b3afc0 14849b4 20ef8ef 058c00b 20ef8ef 14849b4 058c00b 20ef8ef 058c00b 20ef8ef 14849b4 6b3afc0 14849b4 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 0e91831 14849b4 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 74e496a 14849b4 058c00b 14849b4 058c00b 07bbd8c 058c00b 14849b4 058c00b 20ef8ef 07bbd8c 14849b4 07bbd8c 74e496a 058c00b 20ef8ef 058c00b 74e496a 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 20ef8ef 058c00b 74e496a 058c00b 74e496a 20ef8ef 058c00b 20ef8ef 07bbd8c 058c00b 14849b4 07bbd8c 14849b4 058c00b 6b3afc0 058c00b 07bbd8c 74e496a 20ef8ef 74e496a 058c00b 74e496a 058c00b 07bbd8c 71c15a0 20ef8ef 3ce0be1 14849b4 058c00b 3ce0be1 71c15a0 14849b4 058c00b 74e496a 07bbd8c 74e496a 20ef8ef 74e496a 058c00b 07bbd8c 2d81959 20ef8ef 058c00b 2d81959 07bbd8c 058c00b 280cba5 058c00b 20ef8ef 058c00b 280cba5 14849b4 280cba5 07bbd8c 058c00b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 |
import gradio as gr
import numpy as np
import tempfile
import librosa
import soundfile as sf
from scipy import signal
import os
class AIHumanizer:
def __init__(self):
pass
def humanize_audio(self, audio_path, intensity=0.7):
"""Remove AI artifacts and make audio sound human-made"""
try:
print(f"Loading audio from: {audio_path}")
# Load the full song
y, sr = librosa.load(audio_path, sr=None, mono=False)
print(f"Audio loaded: shape={y.shape if hasattr(y, 'shape') else 'mono'}, sr={sr}")
# If stereo, process both channels
if len(y.shape) > 1:
print("Processing stereo audio...")
processed_channels = []
for i in range(y.shape[0]):
print(f"Processing channel {i+1}...")
processed_channel = self.process_channel(y[i], sr, intensity)
processed_channels.append(processed_channel)
y_processed = np.array(processed_channels)
else:
print("Processing mono audio...")
y_processed = self.process_channel(y, sr, intensity)
y_processed = np.array([y_processed])
print("Audio processing completed successfully")
return y_processed, sr
except Exception as e:
print(f"Error in humanize_audio: {str(e)}")
raise Exception(f"Humanization failed: {str(e)}")
def process_channel(self, y, sr, intensity):
"""Process a single audio channel to remove AI artifacts"""
print(f"Processing channel: {len(y)} samples")
# Store original for blending
y_original = y.copy()
# 1. Reduce robotic frequencies
y = self.reduce_ai_artifacts(y, sr, intensity)
# 2. Add timing variations
y = self.add_timing_variations(y, sr, intensity)
# 3. Add pitch variations
y = self.add_pitch_variations(y, sr, intensity)
# 4. Add room ambiance
y = self.add_room_ambiance(y, sr, intensity)
# 5. Add analog warmth
y = self.add_analog_warmth(y, sr, intensity)
# 6. Reduce perfect quantization
y = self.reduce_perfect_quantization(y, sr, intensity)
return y
def reduce_ai_artifacts(self, y, sr, intensity):
"""Reduce common AI audio artifacts"""
if sr > 4000 and intensity > 0.1:
try:
# Reduce harsh frequencies in the 2kHz-6kHz range
sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
y_filtered = signal.sosfilt(sos, y)
# Blend with original
blend_factor = 0.3 * intensity
return y * (1 - blend_factor) + y_filtered * blend_factor
except:
return y
return y
def add_timing_variations(self, y, sr, intensity):
"""Add subtle timing variations"""
if intensity < 0.2:
return y
try:
# Simple approach: small random stretches
segment_size = int(sr * 1.0) # 1-second segments
if len(y) < segment_size * 2:
return y
segments = []
for i in range(0, len(y), segment_size):
segment = y[i:i+segment_size]
if len(segment) == segment_size:
# Small random stretch
stretch = 1.0 + np.random.uniform(-0.01, 0.01) * intensity
new_len = int(segment_size * stretch)
# Resample
x_old = np.linspace(0, 1, segment_size)
x_new = np.linspace(0, 1, new_len)
segment_stretched = np.interp(x_new, x_old, segment)
# Trim or pad to original length
if len(segment_stretched) > segment_size:
segment_stretched = segment_stretched[:segment_size]
else:
segment_stretched = np.pad(segment_stretched, (0, segment_size - len(segment_stretched)))
segments.append(segment_stretched)
else:
segments.append(segment)
return np.concatenate(segments)
except:
return y
def add_pitch_variations(self, y, sr, intensity):
"""Add subtle pitch variations"""
if intensity < 0.3:
return y
try:
# Small random pitch shifts
n_steps = np.random.uniform(-0.2, 0.2) * intensity
y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
# Blend
blend_factor = 0.2 * intensity
return y * (1 - blend_factor) + y_shifted * blend_factor
except:
return y
def add_room_ambiance(self, y, sr, intensity):
"""Add natural room reverb"""
if intensity < 0.2:
return y
try:
# Simple reverb impulse
impulse_len = int(0.15 * sr)
if impulse_len < 10:
return y
impulse = np.zeros(impulse_len)
# Early reflection
early = int(0.01 * sr)
if early < impulse_len:
impulse[early] = 0.8
# Reverb tail
tail_start = min(early + 1, impulse_len)
if tail_start < impulse_len:
tail_len = impulse_len - tail_start
decay = np.exp(-np.linspace(0, 6, tail_len))
impulse[tail_start:] = decay * 0.4
# Apply convolution
y_reverb = signal.convolve(y, impulse, mode='same')
# Normalize
if np.max(np.abs(y_reverb)) > 0:
y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
# Blend
blend_factor = 0.1 * intensity
return y * (1 - blend_factor) + y_reverb * blend_factor
except:
return y
def add_analog_warmth(self, y, sr, intensity):
"""Add analog-style warmth"""
if intensity < 0.1:
return y
try:
# Soft clipping
saturation = 1.0 + 0.4 * intensity
y_warm = np.tanh(y * saturation) / saturation
# Gentle low boost
if sr > 1000:
sos = signal.butter(2, 100, 'high', fs=sr, output='sos')
y_warm = signal.sosfilt(sos, y_warm)
blend_factor = 0.15 * intensity
return y * (1 - blend_factor) + y_warm * blend_factor
except:
return y
def reduce_perfect_quantization(self, y, sr, intensity):
"""Reduce perfectly quantized timing"""
if intensity < 0.1:
return y
# Add subtle amplitude variations
t = np.arange(len(y)) / sr
# Slow LFO for natural dynamics
lfo1 = 1.0 + np.sin(2 * np.pi * 0.3 * t) * 0.02 * intensity
# Faster LFO for micro-variations
lfo2 = 1.0 + np.sin(2 * np.pi * 2.0 * t) * 0.01 * intensity
# Random noise
noise = 1.0 + np.random.normal(0, 0.005 * intensity, len(y))
combined = lfo1 * lfo2 * noise
return y * combined
def humanize_song(input_audio, intensity):
"""Main humanization function"""
if input_audio is None:
return None, "Please upload an audio file"
humanizer = AIHumanizer()
try:
print("Starting humanization...")
# Get the file path from the audio input
audio_path = input_audio
# Process the audio
audio_data, sr = humanizer.humanize_audio(audio_path, intensity)
print(f"Processing complete. Saving audio...")
# Save as WAV
output_path = tempfile.mktemp(suffix='_humanized.wav')
# Handle stereo/mono properly
if audio_data.shape[0] == 1:
# Mono
sf.write(output_path, audio_data[0], sr)
else:
# Stereo - transpose for soundfile
sf.write(output_path, audio_data.T, sr)
print(f"Saved to: {output_path}")
return output_path, "β
Success! Your song now sounds human-made. Download below."
except Exception as e:
error_msg = f"β Error: {str(e)}"
print(f"Error details: {error_msg}")
return None, error_msg
# Simple and compatible interface
with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
gr.Markdown("""
# π΅ AI Song Humanizer
**Remove AI Detection - Make Songs Sound Human-Made**
*Upload AI song β Remove robotic sound β Download human version*
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1. Upload AI Song")
input_audio = gr.Audio(
sources=["upload"],
type="filepath",
label="Upload your AI-generated song"
)
gr.Markdown("### 2. Humanization Strength")
intensity = gr.Slider(
0.1, 1.0, value=0.7,
label="How much human feel to add"
)
process_btn = gr.Button(
"β¨ Humanize Song",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
gr.Markdown("### 3. Download Result")
output_audio = gr.Audio(
label="Human-Sounding Version",
type="filepath"
)
status = gr.Textbox(
label="Status",
interactive=False
)
with gr.Accordion("π How It Works", open=False):
gr.Markdown("""
**This tool:**
- Takes your complete AI-generated song
- Removes robotic/AI artifacts
- Adds natural human performance variations
- Keeps your original music intact
- Makes it sound like humans performed it
**Perfect for making AI music undetectable!**
""")
process_btn.click(
fn=humanize_song,
inputs=[input_audio, intensity],
outputs=[output_audio, status]
)
if __name__ == "__main__":
demo.launch() |