Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import tempfile | |
| import librosa | |
| import soundfile as sf | |
| from scipy import signal | |
| import os | |
| class AIHumanizer: | |
| def __init__(self): | |
| pass | |
| def humanize_audio(self, audio_path, intensity=0.7): | |
| """Remove AI artifacts and make audio sound human-made""" | |
| try: | |
| print(f"Loading audio from: {audio_path}") | |
| # Load the full song | |
| y, sr = librosa.load(audio_path, sr=None, mono=False) | |
| print(f"Audio loaded: shape={y.shape if hasattr(y, 'shape') else 'mono'}, sr={sr}") | |
| # If stereo, process both channels | |
| if len(y.shape) > 1: | |
| print("Processing stereo audio...") | |
| processed_channels = [] | |
| for i in range(y.shape[0]): | |
| print(f"Processing channel {i+1}...") | |
| processed_channel = self.process_channel(y[i], sr, intensity) | |
| processed_channels.append(processed_channel) | |
| y_processed = np.array(processed_channels) | |
| else: | |
| print("Processing mono audio...") | |
| y_processed = self.process_channel(y, sr, intensity) | |
| y_processed = np.array([y_processed]) | |
| print("Audio processing completed successfully") | |
| return y_processed, sr | |
| except Exception as e: | |
| print(f"Error in humanize_audio: {str(e)}") | |
| raise Exception(f"Humanization failed: {str(e)}") | |
| def process_channel(self, y, sr, intensity): | |
| """Process a single audio channel to remove AI artifacts""" | |
| print(f"Processing channel: {len(y)} samples") | |
| # Store original for blending | |
| y_original = y.copy() | |
| # 1. Reduce robotic frequencies | |
| y = self.reduce_ai_artifacts(y, sr, intensity) | |
| # 2. Add timing variations | |
| y = self.add_timing_variations(y, sr, intensity) | |
| # 3. Add pitch variations | |
| y = self.add_pitch_variations(y, sr, intensity) | |
| # 4. Add room ambiance | |
| y = self.add_room_ambiance(y, sr, intensity) | |
| # 5. Add analog warmth | |
| y = self.add_analog_warmth(y, sr, intensity) | |
| # 6. Reduce perfect quantization | |
| y = self.reduce_perfect_quantization(y, sr, intensity) | |
| return y | |
| def reduce_ai_artifacts(self, y, sr, intensity): | |
| """Reduce common AI audio artifacts""" | |
| if sr > 4000 and intensity > 0.1: | |
| try: | |
| # Reduce harsh frequencies in the 2kHz-6kHz range | |
| sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos') | |
| y_filtered = signal.sosfilt(sos, y) | |
| # Blend with original | |
| blend_factor = 0.3 * intensity | |
| return y * (1 - blend_factor) + y_filtered * blend_factor | |
| except: | |
| return y | |
| return y | |
| def add_timing_variations(self, y, sr, intensity): | |
| """Add subtle timing variations""" | |
| if intensity < 0.2: | |
| return y | |
| try: | |
| # Simple approach: small random stretches | |
| segment_size = int(sr * 1.0) # 1-second segments | |
| if len(y) < segment_size * 2: | |
| return y | |
| segments = [] | |
| for i in range(0, len(y), segment_size): | |
| segment = y[i:i+segment_size] | |
| if len(segment) == segment_size: | |
| # Small random stretch | |
| stretch = 1.0 + np.random.uniform(-0.01, 0.01) * intensity | |
| new_len = int(segment_size * stretch) | |
| # Resample | |
| x_old = np.linspace(0, 1, segment_size) | |
| x_new = np.linspace(0, 1, new_len) | |
| segment_stretched = np.interp(x_new, x_old, segment) | |
| # Trim or pad to original length | |
| if len(segment_stretched) > segment_size: | |
| segment_stretched = segment_stretched[:segment_size] | |
| else: | |
| segment_stretched = np.pad(segment_stretched, (0, segment_size - len(segment_stretched))) | |
| segments.append(segment_stretched) | |
| else: | |
| segments.append(segment) | |
| return np.concatenate(segments) | |
| except: | |
| return y | |
| def add_pitch_variations(self, y, sr, intensity): | |
| """Add subtle pitch variations""" | |
| if intensity < 0.3: | |
| return y | |
| try: | |
| # Small random pitch shifts | |
| n_steps = np.random.uniform(-0.2, 0.2) * intensity | |
| y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps) | |
| # Blend | |
| blend_factor = 0.2 * intensity | |
| return y * (1 - blend_factor) + y_shifted * blend_factor | |
| except: | |
| return y | |
| def add_room_ambiance(self, y, sr, intensity): | |
| """Add natural room reverb""" | |
| if intensity < 0.2: | |
| return y | |
| try: | |
| # Simple reverb impulse | |
| impulse_len = int(0.15 * sr) | |
| if impulse_len < 10: | |
| return y | |
| impulse = np.zeros(impulse_len) | |
| # Early reflection | |
| early = int(0.01 * sr) | |
| if early < impulse_len: | |
| impulse[early] = 0.8 | |
| # Reverb tail | |
| tail_start = min(early + 1, impulse_len) | |
| if tail_start < impulse_len: | |
| tail_len = impulse_len - tail_start | |
| decay = np.exp(-np.linspace(0, 6, tail_len)) | |
| impulse[tail_start:] = decay * 0.4 | |
| # Apply convolution | |
| y_reverb = signal.convolve(y, impulse, mode='same') | |
| # Normalize | |
| if np.max(np.abs(y_reverb)) > 0: | |
| y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y)) | |
| # Blend | |
| blend_factor = 0.1 * intensity | |
| return y * (1 - blend_factor) + y_reverb * blend_factor | |
| except: | |
| return y | |
| def add_analog_warmth(self, y, sr, intensity): | |
| """Add analog-style warmth""" | |
| if intensity < 0.1: | |
| return y | |
| try: | |
| # Soft clipping | |
| saturation = 1.0 + 0.4 * intensity | |
| y_warm = np.tanh(y * saturation) / saturation | |
| # Gentle low boost | |
| if sr > 1000: | |
| sos = signal.butter(2, 100, 'high', fs=sr, output='sos') | |
| y_warm = signal.sosfilt(sos, y_warm) | |
| blend_factor = 0.15 * intensity | |
| return y * (1 - blend_factor) + y_warm * blend_factor | |
| except: | |
| return y | |
| def reduce_perfect_quantization(self, y, sr, intensity): | |
| """Reduce perfectly quantized timing""" | |
| if intensity < 0.1: | |
| return y | |
| # Add subtle amplitude variations | |
| t = np.arange(len(y)) / sr | |
| # Slow LFO for natural dynamics | |
| lfo1 = 1.0 + np.sin(2 * np.pi * 0.3 * t) * 0.02 * intensity | |
| # Faster LFO for micro-variations | |
| lfo2 = 1.0 + np.sin(2 * np.pi * 2.0 * t) * 0.01 * intensity | |
| # Random noise | |
| noise = 1.0 + np.random.normal(0, 0.005 * intensity, len(y)) | |
| combined = lfo1 * lfo2 * noise | |
| return y * combined | |
| def humanize_song(input_audio, intensity): | |
| """Main humanization function""" | |
| if input_audio is None: | |
| return None, "Please upload an audio file" | |
| humanizer = AIHumanizer() | |
| try: | |
| print("Starting humanization...") | |
| # Get the file path from the audio input | |
| audio_path = input_audio | |
| # Process the audio | |
| audio_data, sr = humanizer.humanize_audio(audio_path, intensity) | |
| print(f"Processing complete. Saving audio...") | |
| # Save as WAV | |
| output_path = tempfile.mktemp(suffix='_humanized.wav') | |
| # Handle stereo/mono properly | |
| if audio_data.shape[0] == 1: | |
| # Mono | |
| sf.write(output_path, audio_data[0], sr) | |
| else: | |
| # Stereo - transpose for soundfile | |
| sf.write(output_path, audio_data.T, sr) | |
| print(f"Saved to: {output_path}") | |
| return output_path, "β Success! Your song now sounds human-made. Download below." | |
| except Exception as e: | |
| error_msg = f"β Error: {str(e)}" | |
| print(f"Error details: {error_msg}") | |
| return None, error_msg | |
| # Simple and compatible interface | |
| with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo: | |
| gr.Markdown(""" | |
| # π΅ AI Song Humanizer | |
| **Remove AI Detection - Make Songs Sound Human-Made** | |
| *Upload AI song β Remove robotic sound β Download human version* | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 1. Upload AI Song") | |
| input_audio = gr.Audio( | |
| sources=["upload"], | |
| type="filepath", | |
| label="Upload your AI-generated song" | |
| ) | |
| gr.Markdown("### 2. Humanization Strength") | |
| intensity = gr.Slider( | |
| 0.1, 1.0, value=0.7, | |
| label="How much human feel to add" | |
| ) | |
| process_btn = gr.Button( | |
| "β¨ Humanize Song", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 3. Download Result") | |
| output_audio = gr.Audio( | |
| label="Human-Sounding Version", | |
| type="filepath" | |
| ) | |
| status = gr.Textbox( | |
| label="Status", | |
| interactive=False | |
| ) | |
| with gr.Accordion("π How It Works", open=False): | |
| gr.Markdown(""" | |
| **This tool:** | |
| - Takes your complete AI-generated song | |
| - Removes robotic/AI artifacts | |
| - Adds natural human performance variations | |
| - Keeps your original music intact | |
| - Makes it sound like humans performed it | |
| **Perfect for making AI music undetectable!** | |
| """) | |
| process_btn.click( | |
| fn=humanize_song, | |
| inputs=[input_audio, intensity], | |
| outputs=[output_audio, status] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |