Spaces:

FILMITO
/

HumanizeBot

Sleeping

App Files Files Community

FILMITO commited on 20 days ago

Commit

058c00b

verified ·

1 Parent(s): 20ef8ef

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -161

app.py CHANGED Viewed

@@ -15,24 +15,24 @@ class AIHumanizer:
         try:
             print(f"Loading audio from: {audio_path}")
-            # Load the full song - handle both mono and stereo
             y, sr = librosa.load(audio_path, sr=None, mono=False)
-            print(f"Audio loaded: shape={y.shape}, sr={sr}, duration={len(y)/sr:.2f}s")
             # If stereo, process both channels
             if len(y.shape) > 1:
                 print("Processing stereo audio...")
                 processed_channels = []
-                for i, channel in enumerate(y):
                     print(f"Processing channel {i+1}...")
-                    processed_channel = self.process_channel(channel, sr, intensity)
                     processed_channels.append(processed_channel)
                 y_processed = np.array(processed_channels)
             else:
                 print("Processing mono audio...")
                 y_processed = self.process_channel(y, sr, intensity)
-                y_processed = np.array([y_processed])  # Make it 2D for consistency
             print("Audio processing completed successfully")
             return y_processed, sr
@@ -43,232 +43,235 @@ class AIHumanizer:
     def process_channel(self, y, sr, intensity):
         """Process a single audio channel to remove AI artifacts"""
-        print(f"Processing channel: {len(y)} samples, intensity={intensity}")
         # 1. Reduce robotic frequencies
-        y_processed = self.reduce_ai_artifacts(y, sr, intensity)
         # 2. Add timing variations
-        y_processed = self.add_timing_variations(y_processed, sr, intensity)
         # 3. Add pitch variations
-        y_processed = self.add_pitch_variations(y_processed, sr, intensity)
         # 4. Add room ambiance
-        y_processed = self.add_room_ambiance(y_processed, sr, intensity)
         # 5. Add analog warmth
-        y_processed = self.add_analog_warmth(y_processed, sr, intensity)
         # 6. Reduce perfect quantization
-        y_processed = self.reduce_perfect_quantization(y_processed, sr, intensity)
-        return y_processed
     def reduce_ai_artifacts(self, y, sr, intensity):
         """Reduce common AI audio artifacts"""
-        # Reduce harsh frequencies in the 2kHz-6kHz range (common AI artifacts)
-        if sr > 4000:  # Only if sample rate is high enough
-            sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
-            y_filtered = signal.sosfilt(sos, y)
-            # Blend with original based on intensity
-            y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
-            return y_processed
         return y
     def add_timing_variations(self, y, sr, intensity):
         """Add subtle timing variations"""
-        if intensity < 0.1:
             return y
-        # Create small random speed variations
-        segment_size = int(sr * 2.0)  # 2-second segments
-        segments = []
-        for i in range(0, len(y), segment_size):
-            segment = y[i:i+segment_size]
-            if len(segment) > 100:  # Only process if segment is long enough
-                # Small speed variation
-                speed_var = 1.0 + np.random.normal(0, 0.004 * intensity)
-                new_length = int(len(segment) / speed_var)
-                if new_length > 0 and len(segment) > 0:
-                    # Simple resampling for timing variation
-                    original_indices = np.arange(len(segment))
-                    new_indices = np.linspace(0, len(segment)-1, new_length)
-                    segment_varied = np.interp(new_indices, original_indices, segment)
-                    # Resample back to original length if needed
-                    if len(segment_varied) != len(segment):
-                        if len(segment_varied) > len(segment):
-                            segment_varied = segment_varied[:len(segment)]
-                        else:
-                            segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied)))
-                    segments.append(segment_varied)
                 else:
                     segments.append(segment)
-            else:
-                segments.append(segment)
-        if segments:
             return np.concatenate(segments)
-        return y
     def add_pitch_variations(self, y, sr, intensity):
         """Add subtle pitch variations"""
-        if intensity < 0.2:
             return y
         try:
-            # Use librosa for pitch shifting (more reliable)
-            n_steps = np.random.normal(0, 0.1 * intensity)
-            y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps, bins_per_octave=24)
-            # Blend with original
-            blend_factor = 0.15 * intensity
             return y * (1 - blend_factor) + y_shifted * blend_factor
         except:
             return y
     def add_room_ambiance(self, y, sr, intensity):
         """Add natural room reverb"""
-        if intensity < 0.1:
-            return y
-        # Simple impulse response for natural room
-        impulse_length = int(0.2 * sr)  # 200ms reverb
-        if impulse_length < 10:
             return y
-        impulse = np.zeros(impulse_length)
-        # Early reflections
-        early_reflections = int(0.01 * sr)  # 10ms
-        if early_reflections < len(impulse):
-            impulse[early_reflections] = 0.6
-        # Late reverb tail
-        reverb_start = min(early_reflections + 1, len(impulse))
-        if reverb_start < len(impulse):
-            tail_length = len(impulse) - reverb_start
-            decay = np.exp(-np.linspace(0, 8, tail_length))
-            impulse[reverb_start:] = decay * 0.3
-        # Normalize impulse
-        if np.max(np.abs(impulse)) > 0:
-            impulse = impulse / np.max(np.abs(impulse))
-        # Apply convolution
         try:
             y_reverb = signal.convolve(y, impulse, mode='same')
-            # Normalize to prevent clipping
             if np.max(np.abs(y_reverb)) > 0:
                 y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
-            # Blend with original
-            blend_factor = 0.08 * intensity
             return y * (1 - blend_factor) + y_reverb * blend_factor
         except:
             return y
     def add_analog_warmth(self, y, sr, intensity):
         """Add analog-style warmth"""
-        # Soft clipping saturation
-        saturation_amount = 1.0 + 0.3 * intensity
-        y_saturated = np.tanh(y * saturation_amount) / saturation_amount
-        # Add subtle warmth with EQ
         try:
-            # Gentle low-end boost
-            sos = signal.butter(2, 80, 'highpass', fs=sr, output='sos')
-            y_warm = signal.sosfilt(sos, y_saturated)
-            # Blend
-            blend_factor = 0.1 * intensity
             return y * (1 - blend_factor) + y_warm * blend_factor
         except:
-            return y_saturated
     def reduce_perfect_quantization(self, y, sr, intensity):
-        """Reduce perfectly quantized timing with amplitude variations"""
-        # Add subtle random amplitude variations
-        t = np.linspace(0, len(y)/sr, len(y))
-        # Low-frequency amplitude modulation
-        lfo_rate = 0.3 + 0.4 * intensity  # Hz
-        lfo_depth = 0.03 * intensity
-        amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
-        # Random micro-variations
-        random_variation = 1.0 + np.random.normal(0, 0.01 * intensity, len(y))
-        # Combine variations
-        total_variation = amplitude_variation * random_variation
-        return y * total_variation
-def humanize_song(input_mp3, intensity):
     """Main humanization function"""
-    if input_mp3 is None:
         return None, "Please upload an audio file"
     humanizer = AIHumanizer()
     try:
-        print("Starting humanization process...")
-        # Process the entire song to remove AI artifacts
-        audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
-        print(f"Humanization complete. Saving audio: shape={audio_data.shape}, sr={sr}")
-        # Save as WAV (more reliable than MP3)
-        output_path = tempfile.mktemp(suffix='_humanized.wav')
-        # Ensure data is in correct format
-        if len(audio_data.shape) > 1:
-            audio_data = audio_data.T  # Transpose for soundfile
-        sf.write(output_path, audio_data, sr)
-        print(f"Audio saved successfully to: {output_path}")
-        return output_path, "✅ Song humanized! AI artifacts removed and human feel added."
     except Exception as e:
         error_msg = f"❌ Error: {str(e)}"
-        print(error_msg)
         return None, error_msg
-# Simple and reliable interface
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
     gr.Markdown("""
     # 🎵 AI Song Humanizer
-    **Remove AI Detection - Make Your Songs Sound Human-Made**
-    *Upload your AI-generated song → Remove robotic artifacts → Download natural-sounding version*
     """)
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### 1. Upload AI Song")
             input_audio = gr.Audio(
-                sources=["upload", "microphone"],
                 type="filepath",
-                label="Upload your complete AI-generated song",
-                editable=True
             )
             gr.Markdown("### 2. Humanization Strength")
             intensity = gr.Slider(
                 0.1, 1.0, value=0.7,
-                label="How much human feel to add",
-                info="Lower = subtle, Higher = more natural/organic"
             )
             process_btn = gr.Button(
-                "🎹 Humanize This Song",
                 variant="primary",
                 size="lg"
             )
@@ -276,43 +279,27 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
         with gr.Column(scale=1):
             gr.Markdown("### 3. Download Result")
             output_audio = gr.Audio(
-                label="Your Human-Sounding Song",
-                type="filepath",
-                interactive=False
             )
             status = gr.Textbox(
                 label="Status",
-                interactive=False,
-                max_lines=3
             )
-    with gr.Accordion("💡 How It Works", open=True):
         gr.Markdown("""
-        **This tool processes your EXISTING song to remove AI characteristics:**
-        ✅ **Keeps Everything Original:**
-        - Your complete song structure
-        - All vocals and instruments
-        - Melody and arrangement
-        - Everything you created
-        🎛️ **Removes AI Artifacts:**
-        - Robotic/metallic frequencies
-        - Perfect digital quantization
-        - Sterile, artificial sound
-        - AI-generated frequency patterns
-        🎵 **Adds Human Elements:**
-        - Natural timing variations
-        - Subtle pitch fluctuations
-        - Room ambiance and warmth
-        - Analog-style character
-        **Result:** Your same song, but it sounds like humans performed it!
         """)
-    # Processing function
     process_btn.click(
         fn=humanize_song,
         inputs=[input_audio, intensity],
@@ -320,4 +307,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
     )
 if __name__ == "__main__":
-    demo.launch(debug=True)

         try:
             print(f"Loading audio from: {audio_path}")
+            # Load the full song
             y, sr = librosa.load(audio_path, sr=None, mono=False)
+            print(f"Audio loaded: shape={y.shape if hasattr(y, 'shape') else 'mono'}, sr={sr}")
             # If stereo, process both channels
             if len(y.shape) > 1:
                 print("Processing stereo audio...")
                 processed_channels = []
+                for i in range(y.shape[0]):
                     print(f"Processing channel {i+1}...")
+                    processed_channel = self.process_channel(y[i], sr, intensity)
                     processed_channels.append(processed_channel)
                 y_processed = np.array(processed_channels)
             else:
                 print("Processing mono audio...")
                 y_processed = self.process_channel(y, sr, intensity)
+                y_processed = np.array([y_processed])
             print("Audio processing completed successfully")
             return y_processed, sr
     def process_channel(self, y, sr, intensity):
         """Process a single audio channel to remove AI artifacts"""
+        print(f"Processing channel: {len(y)} samples")
+        # Store original for blending
+        y_original = y.copy()
         # 1. Reduce robotic frequencies
+        y = self.reduce_ai_artifacts(y, sr, intensity)
         # 2. Add timing variations
+        y = self.add_timing_variations(y, sr, intensity)
         # 3. Add pitch variations
+        y = self.add_pitch_variations(y, sr, intensity)
         # 4. Add room ambiance
+        y = self.add_room_ambiance(y, sr, intensity)
         # 5. Add analog warmth
+        y = self.add_analog_warmth(y, sr, intensity)
         # 6. Reduce perfect quantization
+        y = self.reduce_perfect_quantization(y, sr, intensity)
+        return y
     def reduce_ai_artifacts(self, y, sr, intensity):
         """Reduce common AI audio artifacts"""
+        if sr > 4000 and intensity > 0.1:
+            try:
+                # Reduce harsh frequencies in the 2kHz-6kHz range
+                sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
+                y_filtered = signal.sosfilt(sos, y)
+                # Blend with original
+                blend_factor = 0.3 * intensity
+                return y * (1 - blend_factor) + y_filtered * blend_factor
+            except:
+                return y
         return y
     def add_timing_variations(self, y, sr, intensity):
         """Add subtle timing variations"""
+        if intensity < 0.2:
             return y
+        try:
+            # Simple approach: small random stretches
+            segment_size = int(sr * 1.0)  # 1-second segments
+            if len(y) < segment_size * 2:
+                return y
+            segments = []
+            for i in range(0, len(y), segment_size):
+                segment = y[i:i+segment_size]
+                if len(segment) == segment_size:
+                    # Small random stretch
+                    stretch = 1.0 + np.random.uniform(-0.01, 0.01) * intensity
+                    new_len = int(segment_size * stretch)
+                    # Resample
+                    x_old = np.linspace(0, 1, segment_size)
+                    x_new = np.linspace(0, 1, new_len)
+                    segment_stretched = np.interp(x_new, x_old, segment)
+                    # Trim or pad to original length
+                    if len(segment_stretched) > segment_size:
+                        segment_stretched = segment_stretched[:segment_size]
+                    else:
+                        segment_stretched = np.pad(segment_stretched, (0, segment_size - len(segment_stretched)))
+                    segments.append(segment_stretched)
                 else:
                     segments.append(segment)
             return np.concatenate(segments)
+        except:
+            return y
     def add_pitch_variations(self, y, sr, intensity):
         """Add subtle pitch variations"""
+        if intensity < 0.3:
             return y
         try:
+            # Small random pitch shifts
+            n_steps = np.random.uniform(-0.2, 0.2) * intensity
+            y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
+            # Blend
+            blend_factor = 0.2 * intensity
             return y * (1 - blend_factor) + y_shifted * blend_factor
         except:
             return y
     def add_room_ambiance(self, y, sr, intensity):
         """Add natural room reverb"""
+        if intensity < 0.2:
             return y
         try:
+            # Simple reverb impulse
+            impulse_len = int(0.15 * sr)
+            if impulse_len < 10:
+                return y
+            impulse = np.zeros(impulse_len)
+            # Early reflection
+            early = int(0.01 * sr)
+            if early < impulse_len:
+                impulse[early] = 0.8
+            # Reverb tail
+            tail_start = min(early + 1, impulse_len)
+            if tail_start < impulse_len:
+                tail_len = impulse_len - tail_start
+                decay = np.exp(-np.linspace(0, 6, tail_len))
+                impulse[tail_start:] = decay * 0.4
+            # Apply convolution
             y_reverb = signal.convolve(y, impulse, mode='same')
+            # Normalize
             if np.max(np.abs(y_reverb)) > 0:
                 y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
+            # Blend
+            blend_factor = 0.1 * intensity
             return y * (1 - blend_factor) + y_reverb * blend_factor
         except:
             return y
     def add_analog_warmth(self, y, sr, intensity):
         """Add analog-style warmth"""
+        if intensity < 0.1:
+            return y
         try:
+            # Soft clipping
+            saturation = 1.0 + 0.4 * intensity
+            y_warm = np.tanh(y * saturation) / saturation
+            # Gentle low boost
+            if sr > 1000:
+                sos = signal.butter(2, 100, 'high', fs=sr, output='sos')
+                y_warm = signal.sosfilt(sos, y_warm)
+            blend_factor = 0.15 * intensity
             return y * (1 - blend_factor) + y_warm * blend_factor
         except:
+            return y
     def reduce_perfect_quantization(self, y, sr, intensity):
+        """Reduce perfectly quantized timing"""
+        if intensity < 0.1:
+            return y
+        # Add subtle amplitude variations
+        t = np.arange(len(y)) / sr
+        # Slow LFO for natural dynamics
+        lfo1 = 1.0 + np.sin(2 * np.pi * 0.3 * t) * 0.02 * intensity
+        # Faster LFO for micro-variations
+        lfo2 = 1.0 + np.sin(2 * np.pi * 2.0 * t) * 0.01 * intensity
+        # Random noise
+        noise = 1.0 + np.random.normal(0, 0.005 * intensity, len(y))
+        combined = lfo1 * lfo2 * noise
+        return y * combined
+def humanize_song(input_audio, intensity):
     """Main humanization function"""
+    if input_audio is None:
         return None, "Please upload an audio file"
     humanizer = AIHumanizer()
     try:
+        print("Starting humanization...")
+        # Get the file path from the audio input
+        audio_path = input_audio
+        # Process the audio
+        audio_data, sr = humanizer.humanize_audio(audio_path, intensity)
+        print(f"Processing complete. Saving audio...")
+        # Save as WAV
+        output_path = tempfile.mktemp(suffix='_humanized.wav')
+        # Handle stereo/mono properly
+        if audio_data.shape[0] == 1:
+            # Mono
+            sf.write(output_path, audio_data[0], sr)
+        else:
+            # Stereo - transpose for soundfile
+            sf.write(output_path, audio_data.T, sr)
+        print(f"Saved to: {output_path}")
+        return output_path, "✅ Success! Your song now sounds human-made. Download below."
     except Exception as e:
         error_msg = f"❌ Error: {str(e)}"
+        print(f"Error details: {error_msg}")
         return None, error_msg
+# Simple and compatible interface
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
     gr.Markdown("""
     # 🎵 AI Song Humanizer
+    **Remove AI Detection - Make Songs Sound Human-Made**
+    *Upload AI song → Remove robotic sound → Download human version*
     """)
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### 1. Upload AI Song")
             input_audio = gr.Audio(
+                sources=["upload"],
                 type="filepath",
+                label="Upload your AI-generated song"
             )
             gr.Markdown("### 2. Humanization Strength")
             intensity = gr.Slider(
                 0.1, 1.0, value=0.7,
+                label="How much human feel to add"
             )
             process_btn = gr.Button(
+                "✨ Humanize Song",
                 variant="primary",
                 size="lg"
             )
         with gr.Column(scale=1):
             gr.Markdown("### 3. Download Result")
             output_audio = gr.Audio(
+                label="Human-Sounding Version",
+                type="filepath"
             )
             status = gr.Textbox(
                 label="Status",
+                interactive=False
             )
+    with gr.Accordion("📖 How It Works", open=False):
         gr.Markdown("""
+        **This tool:**
+        - Takes your complete AI-generated song
+        - Removes robotic/AI artifacts
+        - Adds natural human performance variations
+        - Keeps your original music intact
+        - Makes it sound like humans performed it
+        **Perfect for making AI music undetectable!**
         """)
     process_btn.click(
         fn=humanize_song,
         inputs=[input_audio, intensity],
     )
 if __name__ == "__main__":
+    demo.launch()