Spaces:

FILMITO
/

HumanizeBot

Sleeping

App Files Files Community

FILMITO commited on 20 days ago

Commit

20ef8ef

verified ·

1 Parent(s): 49a971e

Update app.py

Browse files

Files changed (1) hide show

app.py +182 -139

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ import numpy as np
 import tempfile
 import librosa
 import soundfile as sf
-import noisereduce as nr
 from scipy import signal
 class AIHumanizer:
     def __init__(self):
@@ -13,39 +13,51 @@ class AIHumanizer:
     def humanize_audio(self, audio_path, intensity=0.7):
         """Remove AI artifacts and make audio sound human-made"""
         try:
-            # Load the full song
-            y, sr = librosa.load(audio_path, sr=44100, mono=False)
             # If stereo, process both channels
             if len(y.shape) > 1:
                 processed_channels = []
-                for channel in y:
                     processed_channel = self.process_channel(channel, sr, intensity)
                     processed_channels.append(processed_channel)
-                y_processed = np.vstack(processed_channels)
             else:
                 y_processed = self.process_channel(y, sr, intensity)
             return y_processed, sr
         except Exception as e:
             raise Exception(f"Humanization failed: {str(e)}")
     def process_channel(self, y, sr, intensity):
         """Process a single audio channel to remove AI artifacts"""
-        # 1. Reduce robotic/metallic frequencies (common in AI audio)
         y_processed = self.reduce_ai_artifacts(y, sr, intensity)
-        # 2. Add natural human timing variations
         y_processed = self.add_timing_variations(y_processed, sr, intensity)
-        # 3. Add subtle pitch variations (like human singers/instruments)
         y_processed = self.add_pitch_variations(y_processed, sr, intensity)
-        # 4. Add natural room ambiance
         y_processed = self.add_room_ambiance(y_processed, sr, intensity)
-        # 5. Add subtle analog warmth
         y_processed = self.add_analog_warmth(y_processed, sr, intensity)
         # 6. Reduce perfect quantization
@@ -55,116 +67,142 @@ class AIHumanizer:
     def reduce_ai_artifacts(self, y, sr, intensity):
         """Reduce common AI audio artifacts"""
-        # Reduce metallic/robotic frequencies (common in AI vocals)
-        # These are often in the 2kHz-6kHz range
-        sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
-        y_filtered = signal.sosfilt(sos, y)
-        # Blend with original based on intensity
-        y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
-        return y_processed
     def add_timing_variations(self, y, sr, intensity):
-        """Add subtle timing variations like human musicians"""
-        # Create a time-stretching effect with small random variations
-        original_length = len(y)
-        # Small random speed variations (like human timing)
-        speed_variation = 1.0 + np.random.normal(0, 0.005 * intensity)
-        # Resample to create timing variations
-        new_length = int(original_length / speed_variation)
-        indices = np.linspace(0, original_length-1, new_length)
-        y_varied = np.interp(indices, np.arange(original_length), y)
-        # Resample back to original length
-        if len(y_varied) != original_length:
-            indices_back = np.linspace(0, len(y_varied)-1, original_length)
-            y_varied = np.interp(indices_back, np.arange(len(y_varied)), y_varied)
-        # Blend with original
-        blend_factor = 0.1 * intensity
-        return y * (1 - blend_factor) + y_varied * blend_factor
     def add_pitch_variations(self, y, sr, intensity):
-        """Add subtle pitch variations like human performance"""
-        # Small pitch variations (vibrato effect)
-        t = np.linspace(0, len(y)/sr, len(y))
-        # Create subtle vibrato
-        vibrato_rate = 5.0  # Hz
-        vibrato_depth = 0.3 * intensity  # Semitones
-        pitch_variation = np.sin(2 * np.pi * vibrato_rate * t) * vibrato_depth
-        # Apply pitch shifting using phase vocoder
-        y_pitched = self.pitch_shift_pv(y, sr, pitch_variation)
-        # Blend with original
-        blend_factor = 0.15 * intensity
-        return y * (1 - blend_factor) + y_pitched * blend_factor
-    def pitch_shift_pv(self, y, sr, pitch_variation):
-        """Simple phase vocoder pitch shifting"""
-        # Simple implementation - in practice you'd use librosa's phase_vocoder
-        # For now, we'll use a simplified version
         try:
-            import librosa
-            # Use small constant pitch shift for simplicity
-            n_steps = 0.1 * np.mean(np.abs(pitch_variation))
-            y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
-            return y_shifted
         except:
             return y
     def add_room_ambiance(self, y, sr, intensity):
-        """Add natural room reverb to remove sterile AI sound"""
-        # Create simple impulse response for natural room
-        impulse_length = int(0.3 * sr)  # 300ms reverb
         impulse = np.zeros(impulse_length)
         # Early reflections
-        early_reflections = int(0.02 * sr)  # 20ms
-        impulse[early_reflections] = 0.7
         # Late reverb tail
-        decay = np.exp(-np.linspace(0, 10, impulse_length - early_reflections))
-        impulse[early_reflections:] = decay * 0.4
-        # Apply convolution reverb
-        y_reverb = signal.convolve(y, impulse, mode='same') / np.max(np.abs(impulse))
-        # Blend with original
-        blend_factor = 0.08 * intensity
-        return y * (1 - blend_factor) + y_reverb * blend_factor
     def add_analog_warmth(self, y, sr, intensity):
-        """Add analog-style warmth and saturation"""
         # Soft clipping saturation
-        saturation_amount = 1.0 + 0.5 * intensity
         y_saturated = np.tanh(y * saturation_amount) / saturation_amount
-        # Add subtle tape warmth (low-end boost)
-        sos_warmth = signal.butter(2, 150, 'highpass', fs=sr, output='sos')
-        y_warmth = signal.sosfilt(sos_warmth, y_saturated)
-        # Blend
-        blend_factor = 0.2 * intensity
-        return y * (1 - blend_factor) + y_warmth * blend_factor
     def reduce_perfect_quantization(self, y, sr, intensity):
-        """Reduce perfectly quantized timing"""
         # Add subtle random amplitude variations
         t = np.linspace(0, len(y)/sr, len(y))
-        # Low-frequency amplitude modulation (like human dynamics)
-        lfo_rate = 0.5  # Hz
-        lfo_depth = 0.05 * intensity
         amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
         # Random micro-variations
-        random_variation = 1.0 + np.random.normal(0, 0.02 * intensity, len(y))
         # Combine variations
         total_variation = amplitude_variation * random_variation
@@ -174,102 +212,107 @@ class AIHumanizer:
 def humanize_song(input_mp3, intensity):
     """Main humanization function"""
     if input_mp3 is None:
-        return None, "Please upload an MP3 file"
     humanizer = AIHumanizer()
     try:
         # Process the entire song to remove AI artifacts
         audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
-        # Save as MP3
-        output_path = tempfile.mktemp(suffix='_humanized.mp3')
         sf.write(output_path, audio_data, sr)
         return output_path, "✅ Song humanized! AI artifacts removed and human feel added."
     except Exception as e:
-        return None, f"❌ Error: {str(e)}"
-# Simple interface focused on humanization
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
     gr.Markdown("""
     # 🎵 AI Song Humanizer
-    **Remove AI Detection from Your Music - Make it Sound Human-Made**
-    *Upload AI-generated songs → Remove robotic artifacts → Download human-sounding music*
     """)
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### 1. Upload AI-Generated Song")
             input_audio = gr.Audio(
-                sources=["upload"],
                 type="filepath",
-                label="Upload your complete AI song (MP3, WAV, etc.)"
             )
-            gr.Markdown("### 2. Adjust Humanization")
             intensity = gr.Slider(
                 0.1, 1.0, value=0.7,
-                label="Humanization Strength",
-                info="How much to remove AI artifacts and add human feel"
             )
             process_btn = gr.Button(
-                "✨ Remove AI Artifacts",
                 variant="primary",
                 size="lg"
             )
         with gr.Column(scale=1):
-            gr.Markdown("### 3. Download Human Version")
             output_audio = gr.Audio(
-                label="Human-Sounding Song",
                 type="filepath",
                 interactive=False
             )
             status = gr.Textbox(
-                label="Processing Status",
-                interactive=False
             )
-    with gr.Accordion("🔍 What This Actually Does", open=True):
         gr.Markdown("""
-        **This tool processes your EXISTING complete song to:**
-        🎵 **Remove AI Audio Artifacts:**
-        - Reduces metallic/robotic frequencies
-        - Removes perfect quantization
-        - Eliminates sterile digital sound
-        🎤 **Add Human Performance Elements:**
-        - Subtle timing variations (like human musicians)
-        - Natural pitch fluctuations (vibrato, human imperfection)
-        - Dynamic amplitude changes
-        🎛️ **Add Analog Character:**
-        - Natural room ambiance and reverb
-        - Analog-style warmth and saturation
-        - Tape-like characteristics
-        **You keep:**
-        - Your original melody
-        - Your original arrangement
-        - Your original vocals/instruments
         - Your complete song structure
-        **What changes:**
-        - Sounds like humans performed it
-        - No more "AI sound"
-        - Natural imperfections added
-        - Warmer, more organic sound
-        **Perfect for:** Making AI-generated songs undetectable as AI!
         """)
-    # Processing
     process_btn.click(
         fn=humanize_song,
         inputs=[input_audio, intensity],
@@ -277,4 +320,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
     )
 if __name__ == "__main__":
-    demo.launch()

 import tempfile
 import librosa
 import soundfile as sf
 from scipy import signal
+import os
 class AIHumanizer:
     def __init__(self):
     def humanize_audio(self, audio_path, intensity=0.7):
         """Remove AI artifacts and make audio sound human-made"""
         try:
+            print(f"Loading audio from: {audio_path}")
+            # Load the full song - handle both mono and stereo
+            y, sr = librosa.load(audio_path, sr=None, mono=False)
+            print(f"Audio loaded: shape={y.shape}, sr={sr}, duration={len(y)/sr:.2f}s")
             # If stereo, process both channels
             if len(y.shape) > 1:
+                print("Processing stereo audio...")
                 processed_channels = []
+                for i, channel in enumerate(y):
+                    print(f"Processing channel {i+1}...")
                     processed_channel = self.process_channel(channel, sr, intensity)
                     processed_channels.append(processed_channel)
+                y_processed = np.array(processed_channels)
             else:
+                print("Processing mono audio...")
                 y_processed = self.process_channel(y, sr, intensity)
+                y_processed = np.array([y_processed])  # Make it 2D for consistency
+            print("Audio processing completed successfully")
             return y_processed, sr
         except Exception as e:
+            print(f"Error in humanize_audio: {str(e)}")
             raise Exception(f"Humanization failed: {str(e)}")
     def process_channel(self, y, sr, intensity):
         """Process a single audio channel to remove AI artifacts"""
+        print(f"Processing channel: {len(y)} samples, intensity={intensity}")
+        # 1. Reduce robotic frequencies
         y_processed = self.reduce_ai_artifacts(y, sr, intensity)
+        # 2. Add timing variations
         y_processed = self.add_timing_variations(y_processed, sr, intensity)
+        # 3. Add pitch variations
         y_processed = self.add_pitch_variations(y_processed, sr, intensity)
+        # 4. Add room ambiance
         y_processed = self.add_room_ambiance(y_processed, sr, intensity)
+        # 5. Add analog warmth
         y_processed = self.add_analog_warmth(y_processed, sr, intensity)
         # 6. Reduce perfect quantization
     def reduce_ai_artifacts(self, y, sr, intensity):
         """Reduce common AI audio artifacts"""
+        # Reduce harsh frequencies in the 2kHz-6kHz range (common AI artifacts)
+        if sr > 4000:  # Only if sample rate is high enough
+            sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
+            y_filtered = signal.sosfilt(sos, y)
+            # Blend with original based on intensity
+            y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
+            return y_processed
+        return y
     def add_timing_variations(self, y, sr, intensity):
+        """Add subtle timing variations"""
+        if intensity < 0.1:
+            return y
+        # Create small random speed variations
+        segment_size = int(sr * 2.0)  # 2-second segments
+        segments = []
+        for i in range(0, len(y), segment_size):
+            segment = y[i:i+segment_size]
+            if len(segment) > 100:  # Only process if segment is long enough
+                # Small speed variation
+                speed_var = 1.0 + np.random.normal(0, 0.004 * intensity)
+                new_length = int(len(segment) / speed_var)
+                if new_length > 0 and len(segment) > 0:
+                    # Simple resampling for timing variation
+                    original_indices = np.arange(len(segment))
+                    new_indices = np.linspace(0, len(segment)-1, new_length)
+                    segment_varied = np.interp(new_indices, original_indices, segment)
+                    # Resample back to original length if needed
+                    if len(segment_varied) != len(segment):
+                        if len(segment_varied) > len(segment):
+                            segment_varied = segment_varied[:len(segment)]
+                        else:
+                            segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied)))
+                    segments.append(segment_varied)
+                else:
+                    segments.append(segment)
+            else:
+                segments.append(segment)
+        if segments:
+            return np.concatenate(segments)
+        return y
     def add_pitch_variations(self, y, sr, intensity):
+        """Add subtle pitch variations"""
+        if intensity < 0.2:
+            return y
         try:
+            # Use librosa for pitch shifting (more reliable)
+            n_steps = np.random.normal(0, 0.1 * intensity)
+            y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps, bins_per_octave=24)
+            # Blend with original
+            blend_factor = 0.15 * intensity
+            return y * (1 - blend_factor) + y_shifted * blend_factor
         except:
             return y
     def add_room_ambiance(self, y, sr, intensity):
+        """Add natural room reverb"""
+        if intensity < 0.1:
+            return y
+        # Simple impulse response for natural room
+        impulse_length = int(0.2 * sr)  # 200ms reverb
+        if impulse_length < 10:
+            return y
         impulse = np.zeros(impulse_length)
         # Early reflections
+        early_reflections = int(0.01 * sr)  # 10ms
+        if early_reflections < len(impulse):
+            impulse[early_reflections] = 0.6
         # Late reverb tail
+        reverb_start = min(early_reflections + 1, len(impulse))
+        if reverb_start < len(impulse):
+            tail_length = len(impulse) - reverb_start
+            decay = np.exp(-np.linspace(0, 8, tail_length))
+            impulse[reverb_start:] = decay * 0.3
+        # Normalize impulse
+        if np.max(np.abs(impulse)) > 0:
+            impulse = impulse / np.max(np.abs(impulse))
+        # Apply convolution
+        try:
+            y_reverb = signal.convolve(y, impulse, mode='same')
+            # Normalize to prevent clipping
+            if np.max(np.abs(y_reverb)) > 0:
+                y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
+            # Blend with original
+            blend_factor = 0.08 * intensity
+            return y * (1 - blend_factor) + y_reverb * blend_factor
+        except:
+            return y
     def add_analog_warmth(self, y, sr, intensity):
+        """Add analog-style warmth"""
         # Soft clipping saturation
+        saturation_amount = 1.0 + 0.3 * intensity
         y_saturated = np.tanh(y * saturation_amount) / saturation_amount
+        # Add subtle warmth with EQ
+        try:
+            # Gentle low-end boost
+            sos = signal.butter(2, 80, 'highpass', fs=sr, output='sos')
+            y_warm = signal.sosfilt(sos, y_saturated)
+            # Blend
+            blend_factor = 0.1 * intensity
+            return y * (1 - blend_factor) + y_warm * blend_factor
+        except:
+            return y_saturated
     def reduce_perfect_quantization(self, y, sr, intensity):
+        """Reduce perfectly quantized timing with amplitude variations"""
         # Add subtle random amplitude variations
         t = np.linspace(0, len(y)/sr, len(y))
+        # Low-frequency amplitude modulation
+        lfo_rate = 0.3 + 0.4 * intensity  # Hz
+        lfo_depth = 0.03 * intensity
         amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
         # Random micro-variations
+        random_variation = 1.0 + np.random.normal(0, 0.01 * intensity, len(y))
         # Combine variations
         total_variation = amplitude_variation * random_variation
 def humanize_song(input_mp3, intensity):
     """Main humanization function"""
     if input_mp3 is None:
+        return None, "Please upload an audio file"
     humanizer = AIHumanizer()
     try:
+        print("Starting humanization process...")
         # Process the entire song to remove AI artifacts
         audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
+        print(f"Humanization complete. Saving audio: shape={audio_data.shape}, sr={sr}")
+        # Save as WAV (more reliable than MP3)
+        output_path = tempfile.mktemp(suffix='_humanized.wav')
+        # Ensure data is in correct format
+        if len(audio_data.shape) > 1:
+            audio_data = audio_data.T  # Transpose for soundfile
         sf.write(output_path, audio_data, sr)
+        print(f"Audio saved successfully to: {output_path}")
         return output_path, "✅ Song humanized! AI artifacts removed and human feel added."
     except Exception as e:
+        error_msg = f"❌ Error: {str(e)}"
+        print(error_msg)
+        return None, error_msg
+# Simple and reliable interface
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
     gr.Markdown("""
     # 🎵 AI Song Humanizer
+    **Remove AI Detection - Make Your Songs Sound Human-Made**
+    *Upload your AI-generated song → Remove robotic artifacts → Download natural-sounding version*
     """)
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### 1. Upload AI Song")
             input_audio = gr.Audio(
+                sources=["upload", "microphone"],
                 type="filepath",
+                label="Upload your complete AI-generated song",
+                editable=True
             )
+            gr.Markdown("### 2. Humanization Strength")
             intensity = gr.Slider(
                 0.1, 1.0, value=0.7,
+                label="How much human feel to add",
+                info="Lower = subtle, Higher = more natural/organic"
             )
             process_btn = gr.Button(
+                "🎹 Humanize This Song",
                 variant="primary",
                 size="lg"
             )
         with gr.Column(scale=1):
+            gr.Markdown("### 3. Download Result")
             output_audio = gr.Audio(
+                label="Your Human-Sounding Song",
                 type="filepath",
                 interactive=False
             )
             status = gr.Textbox(
+                label="Status",
+                interactive=False,
+                max_lines=3
             )
+    with gr.Accordion("💡 How It Works", open=True):
         gr.Markdown("""
+        **This tool processes your EXISTING song to remove AI characteristics:**
+        ✅ **Keeps Everything Original:**
         - Your complete song structure
+        - All vocals and instruments
+        - Melody and arrangement
+        - Everything you created
+        🎛️ **Removes AI Artifacts:**
+        - Robotic/metallic frequencies
+        - Perfect digital quantization
+        - Sterile, artificial sound
+        - AI-generated frequency patterns
+        🎵 **Adds Human Elements:**
+        - Natural timing variations
+        - Subtle pitch fluctuations
+        - Room ambiance and warmth
+        - Analog-style character
+        **Result:** Your same song, but it sounds like humans performed it!
         """)
+    # Processing function
     process_btn.click(
         fn=humanize_song,
         inputs=[input_audio, intensity],
     )
 if __name__ == "__main__":
+    demo.launch(debug=True)