import gradio as gr import numpy as np import tempfile import librosa import soundfile as sf from scipy import signal import os class AIHumanizer: def __init__(self): pass def humanize_audio(self, audio_path, intensity=0.7): """Remove AI artifacts and make audio sound human-made""" try: print(f"Loading audio from: {audio_path}") # Load the full song y, sr = librosa.load(audio_path, sr=None, mono=False) print(f"Audio loaded: shape={y.shape if hasattr(y, 'shape') else 'mono'}, sr={sr}") # If stereo, process both channels if len(y.shape) > 1: print("Processing stereo audio...") processed_channels = [] for i in range(y.shape[0]): print(f"Processing channel {i+1}...") processed_channel = self.process_channel(y[i], sr, intensity) processed_channels.append(processed_channel) y_processed = np.array(processed_channels) else: print("Processing mono audio...") y_processed = self.process_channel(y, sr, intensity) y_processed = np.array([y_processed]) print("Audio processing completed successfully") return y_processed, sr except Exception as e: print(f"Error in humanize_audio: {str(e)}") raise Exception(f"Humanization failed: {str(e)}") def process_channel(self, y, sr, intensity): """Process a single audio channel to remove AI artifacts""" print(f"Processing channel: {len(y)} samples") # Store original for blending y_original = y.copy() # 1. Reduce robotic frequencies y = self.reduce_ai_artifacts(y, sr, intensity) # 2. Add timing variations y = self.add_timing_variations(y, sr, intensity) # 3. Add pitch variations y = self.add_pitch_variations(y, sr, intensity) # 4. Add room ambiance y = self.add_room_ambiance(y, sr, intensity) # 5. Add analog warmth y = self.add_analog_warmth(y, sr, intensity) # 6. Reduce perfect quantization y = self.reduce_perfect_quantization(y, sr, intensity) return y def reduce_ai_artifacts(self, y, sr, intensity): """Reduce common AI audio artifacts""" if sr > 4000 and intensity > 0.1: try: # Reduce harsh frequencies in the 2kHz-6kHz range sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos') y_filtered = signal.sosfilt(sos, y) # Blend with original blend_factor = 0.3 * intensity return y * (1 - blend_factor) + y_filtered * blend_factor except: return y return y def add_timing_variations(self, y, sr, intensity): """Add subtle timing variations""" if intensity < 0.2: return y try: # Simple approach: small random stretches segment_size = int(sr * 1.0) # 1-second segments if len(y) < segment_size * 2: return y segments = [] for i in range(0, len(y), segment_size): segment = y[i:i+segment_size] if len(segment) == segment_size: # Small random stretch stretch = 1.0 + np.random.uniform(-0.01, 0.01) * intensity new_len = int(segment_size * stretch) # Resample x_old = np.linspace(0, 1, segment_size) x_new = np.linspace(0, 1, new_len) segment_stretched = np.interp(x_new, x_old, segment) # Trim or pad to original length if len(segment_stretched) > segment_size: segment_stretched = segment_stretched[:segment_size] else: segment_stretched = np.pad(segment_stretched, (0, segment_size - len(segment_stretched))) segments.append(segment_stretched) else: segments.append(segment) return np.concatenate(segments) except: return y def add_pitch_variations(self, y, sr, intensity): """Add subtle pitch variations""" if intensity < 0.3: return y try: # Small random pitch shifts n_steps = np.random.uniform(-0.2, 0.2) * intensity y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps) # Blend blend_factor = 0.2 * intensity return y * (1 - blend_factor) + y_shifted * blend_factor except: return y def add_room_ambiance(self, y, sr, intensity): """Add natural room reverb""" if intensity < 0.2: return y try: # Simple reverb impulse impulse_len = int(0.15 * sr) if impulse_len < 10: return y impulse = np.zeros(impulse_len) # Early reflection early = int(0.01 * sr) if early < impulse_len: impulse[early] = 0.8 # Reverb tail tail_start = min(early + 1, impulse_len) if tail_start < impulse_len: tail_len = impulse_len - tail_start decay = np.exp(-np.linspace(0, 6, tail_len)) impulse[tail_start:] = decay * 0.4 # Apply convolution y_reverb = signal.convolve(y, impulse, mode='same') # Normalize if np.max(np.abs(y_reverb)) > 0: y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y)) # Blend blend_factor = 0.1 * intensity return y * (1 - blend_factor) + y_reverb * blend_factor except: return y def add_analog_warmth(self, y, sr, intensity): """Add analog-style warmth""" if intensity < 0.1: return y try: # Soft clipping saturation = 1.0 + 0.4 * intensity y_warm = np.tanh(y * saturation) / saturation # Gentle low boost if sr > 1000: sos = signal.butter(2, 100, 'high', fs=sr, output='sos') y_warm = signal.sosfilt(sos, y_warm) blend_factor = 0.15 * intensity return y * (1 - blend_factor) + y_warm * blend_factor except: return y def reduce_perfect_quantization(self, y, sr, intensity): """Reduce perfectly quantized timing""" if intensity < 0.1: return y # Add subtle amplitude variations t = np.arange(len(y)) / sr # Slow LFO for natural dynamics lfo1 = 1.0 + np.sin(2 * np.pi * 0.3 * t) * 0.02 * intensity # Faster LFO for micro-variations lfo2 = 1.0 + np.sin(2 * np.pi * 2.0 * t) * 0.01 * intensity # Random noise noise = 1.0 + np.random.normal(0, 0.005 * intensity, len(y)) combined = lfo1 * lfo2 * noise return y * combined def humanize_song(input_audio, intensity): """Main humanization function""" if input_audio is None: return None, "Please upload an audio file" humanizer = AIHumanizer() try: print("Starting humanization...") # Get the file path from the audio input audio_path = input_audio # Process the audio audio_data, sr = humanizer.humanize_audio(audio_path, intensity) print(f"Processing complete. Saving audio...") # Save as WAV output_path = tempfile.mktemp(suffix='_humanized.wav') # Handle stereo/mono properly if audio_data.shape[0] == 1: # Mono sf.write(output_path, audio_data[0], sr) else: # Stereo - transpose for soundfile sf.write(output_path, audio_data.T, sr) print(f"Saved to: {output_path}") return output_path, "✅ Success! Your song now sounds human-made. Download below." except Exception as e: error_msg = f"❌ Error: {str(e)}" print(f"Error details: {error_msg}") return None, error_msg # Simple and compatible interface with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo: gr.Markdown(""" # 🎵 AI Song Humanizer **Remove AI Detection - Make Songs Sound Human-Made** *Upload AI song → Remove robotic sound → Download human version* """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 1. Upload AI Song") input_audio = gr.Audio( sources=["upload"], type="filepath", label="Upload your AI-generated song" ) gr.Markdown("### 2. Humanization Strength") intensity = gr.Slider( 0.1, 1.0, value=0.7, label="How much human feel to add" ) process_btn = gr.Button( "✨ Humanize Song", variant="primary", size="lg" ) with gr.Column(scale=1): gr.Markdown("### 3. Download Result") output_audio = gr.Audio( label="Human-Sounding Version", type="filepath" ) status = gr.Textbox( label="Status", interactive=False ) with gr.Accordion("📖 How It Works", open=False): gr.Markdown(""" **This tool:** - Takes your complete AI-generated song - Removes robotic/AI artifacts - Adds natural human performance variations - Keeps your original music intact - Makes it sound like humans performed it **Perfect for making AI music undetectable!** """) process_btn.click( fn=humanize_song, inputs=[input_audio, intensity], outputs=[output_audio, status] ) if __name__ == "__main__": demo.launch()