Spaces:

FILMITO
/

HumanizeBot

Sleeping

App Files Files Community

FILMITO commited on 20 days ago

Commit

14849b4

verified ·

1 Parent(s): feb9588

Update app.py

Browse files

Files changed (1) hide show

app.py +209 -391

app.py CHANGED Viewed

@@ -1,460 +1,278 @@
 import gradio as gr
-import pretty_midi
 import numpy as np
 import tempfile
 import librosa
 import soundfile as sf
-import os
-class CompleteSongHumanizer:
     def __init__(self):
-        # Complete band setups for each style
-        self.band_setups = {
-            "pop": {
-                "drums": {"program": 0, "name": "Pop Drums"},
-                "bass": {"program": 33, "name": "Bass Guitar"},
-                "rhythm": {"program": 25, "name": "Acoustic Guitar"},
-                "piano": {"program": 1, "name": "Piano"},
-                "strings": {"program": 49, "name": "String Ensemble"},
-                "synth": {"program": 81, "name": "Lead Synth"}
-            },
-            "electronic": {
-                "drums": {"program": 0, "name": "EDM Drums"},
-                "bass": {"program": 39, "name": "Synth Bass"},
-                "lead": {"program": 81, "name": "Lead Synth"},
-                "pad": {"program": 89, "name": "Warm Pad"},
-                "fx": {"program": 103, "name": "FX"},
-                "chords": {"program": 5, "name": "Electric Piano"}
-            },
-            "rock": {
-                "drums": {"program": 0, "name": "Rock Drums"},
-                "bass": {"program": 33, "name": "Bass Guitar"},
-                "guitar1": {"program": 30, "name": "Distortion Guitar"},
-                "guitar2": {"program": 27, "name": "Clean Guitar"},
-                "keys": {"program": 5, "name": "Electric Piano"},
-                "strings": {"program": 48, "name": "String Ensemble"}
-            },
-            "cinematic": {
-                "drums": {"program": 0, "name": "Orchestral Percussion"},
-                "strings1": {"program": 48, "name": "String Ensemble"},
-                "strings2": {"program": 49, "name": "Slow Strings"},
-                "brass": {"program": 61, "name": "French Horn"},
-                "woodwinds": {"program": 68, "name": "Oboe"},
-                "harp": {"program": 46, "name": "Harp"}
-            }
-        }
-        # Song structures
-        self.song_structures = {
-            "pop": ["intro", "verse", "chorus", "verse", "chorus", "bridge", "chorus", "outro"],
-            "electronic": ["intro", "build", "drop", "break", "build", "drop", "outro"],
-            "rock": ["intro", "verse", "chorus", "verse", "chorus", "solo", "chorus", "outro"],
-            "cinematic": ["intro", "theme", "build", "climax", "resolution", "outro"]
-        }
-    def create_complete_song(self, mp3_path, style="pop", intensity=0.7):
-        """Create a complete humanized song from MP3"""
         try:
-            # Load and analyze the audio
-            y, sr = librosa.load(mp3_path, sr=22050, mono=True)
-            duration = len(y) / sr
-            # Create MIDI object
-            midi = pretty_midi.PrettyMIDI()
-            # Setup complete band
-            band = self.setup_band(midi, style)
-            # Analyze audio to get musical content
-            audio_features = self.analyze_audio_features(y, sr, duration)
-            # Create complete song structure
-            self.build_complete_song(midi, band, style, audio_features, duration, intensity)
-            # Apply advanced humanization
-            self.apply_complete_humanization(midi, intensity)
-            # Synthesize to audio
-            audio_data = midi.synthesize()
-            return audio_data, sr
         except Exception as e:
-            raise Exception(f"Song creation failed: {str(e)}")
-    def setup_band(self, midi, style):
-        """Setup complete band instruments"""
-        band = {}
-        for role, inst_info in self.band_setups[style].items():
-            instrument = pretty_midi.Instrument(
-                program=inst_info["program"],
-                is_drum=(role == "drums"),
-                name=inst_info["name"]
-            )
-            midi.instruments.append(instrument)
-            band[role] = instrument
-        return band
-    def analyze_audio_features(self, y, sr, duration):
-        """Extract musical features from audio"""
-        # Get tempo and beats
-        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr, units='time')
-        # Detect onsets for melody extraction
-        onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, delta=0.08)
-        onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
-        # Extract melody contour
-        melody_contour = []
-        for onset_time in onset_times[:50]:  # Analyze first 50 onsets
-            start_idx = int(onset_time * sr)
-            end_idx = min(start_idx + int(0.4 * sr), len(y))
-            if end_idx > start_idx:
-                segment = y[start_idx:end_idx]
-                freq = self.detect_pitch(segment, sr)
-                if 100 < freq < 1000:
-                    melody_contour.append((onset_time, freq))
-        # Detect energy changes for song sections
-        energy = librosa.feature.rms(y=y)[0]
-        energy_times = librosa.times_like(energy, sr=sr)
-        return {
-            'tempo': tempo if tempo else 120,
-            'beats': beat_frames if len(beat_frames) > 0 else np.linspace(0, duration, 32),
-            'melody_contour': melody_contour,
-            'energy': list(zip(energy_times, energy)),
-            'duration': duration
-        }
-    def detect_pitch(self, segment, sr):
-        """Detect pitch in audio segment"""
-        try:
-            # Simple FFT-based pitch detection
-            fft = np.fft.rfft(segment * np.hanning(len(segment)))
-            freqs = np.fft.rfftfreq(len(segment), 1/sr)
-            mags = np.abs(fft)
-            # Find strongest frequency in reasonable range
-            mask = (freqs > 80) & (freqs < 1200)
-            if np.any(mask):
-                peak_idx = np.argmax(mags[mask])
-                return freqs[mask][peak_idx]
-        except:
-            pass
-        return 440
-    def build_complete_song(self, midi, band, style, features, duration, intensity):
-        """Build complete song with structure"""
-        structure = self.song_structures[style]
-        section_duration = duration / len(structure)
-        for section_idx, section_name in enumerate(structure):
-            start_time = section_idx * section_duration
-            end_time = (section_idx + 1) * section_duration
-            # Create section-specific music
-            self.create_section(
-                band, style, section_name, section_idx,
-                start_time, end_time, features, intensity
-            )
-    def create_section(self, band, style, section_name, section_idx, start_time, end_time, features, intensity):
-        """Create music for a specific song section"""
-        section_duration = end_time - start_time
-        # Get beats for this section
-        section_beats = [t for t in features['beats'] if start_time <= t < end_time]
-        if not section_beats:
-            section_beats = np.linspace(start_time, end_time, 8)
-        # Section-specific arrangements
-        if section_name in ["intro", "outro"]:
-            self.create_intro_outro(band, style, section_name, start_time, end_time, section_beats)
-        elif section_name in ["verse", "theme"]:
-            self.create_verse(band, style, start_time, end_time, section_beats, features)
-        elif section_name in ["chorus", "drop", "climax"]:
-            self.create_chorus(band, style, start_time, end_time, section_beats, features)
-        elif section_name in ["bridge", "break", "solo"]:
-            self.create_bridge(band, style, start_time, end_time, section_beats, features)
-        elif section_name in ["build"]:
-            self.create_build(band, style, start_time, end_time, section_beats, features)
-    def create_intro_outro(self, band, style, section_name, start_time, end_time, beats):
-        """Create intro/outro sections"""
-        # Drums - simple pattern
-        if 'drums' in band:
-            for i, beat_time in enumerate(beats):
-                if i % 4 == 0:  # Kick on downbeat
-                    band['drums'].notes.append(self.create_note(36, 80, beat_time, 0.3))
-                if section_name == "intro" and i % 2 == 0:  # Hi-hat in intro
-                    band['drums'].notes.append(self.create_note(42, 60, beat_time, 0.1))
-        # Bass - simple root notes
-        if 'bass' in band:
-            root_note = 36 if style != "cinematic" else 48
-            for i, beat_time in enumerate(beats[::2]):
-                band['bass'].notes.append(self.create_note(root_note, 70, beat_time, 0.8))
-        # Pad/strings - atmospheric
-        pad_instrument = next((inst for role, inst in band.items() if 'pad' in role or 'string' in role), None)
-        if pad_instrument:
-            chord_notes = self.get_chord_for_section(style, section_name, 0)
-            for note_pitch in chord_notes:
-                pad_instrument.notes.append(self.create_note(note_pitch, 50, start_time, end_time - start_time))
-    def create_verse(self, band, style, start_time, end_time, beats, features):
-        """Create verse section"""
-        # Full drum pattern
-        if 'drums' in band:
-            for i, beat_time in enumerate(beats):
-                # Kick on 1 and 3
-                if i % 4 in [0, 2]:
-                    band['drums'].notes.append(self.create_note(36, 85, beat_time, 0.3))
-                # Snare on 2 and 4
-                if i % 4 in [1, 3]:
-                    band['drums'].notes.append(self.create_note(38, 80, beat_time, 0.25))
-                # Hi-hats
-                if style in ["pop", "electronic"]:
-                    band['drums'].notes.append(self.create_note(42, 65, beat_time, 0.1))
-        # Bass line
-        if 'bass' in band:
-            bass_line = self.create_bass_line(beats, style)
-            for note in bass_line:
-                band['bass'].notes.append(note)
-        # Melody from audio analysis
-        if 'lead' in band or 'guitar1' in band:
-            lead_instrument = band.get('lead') or band.get('guitar1')
-            if lead_instrument:
-                melody = self.extract_melody_for_section(features['melody_contour'], start_time, end_time)
-                for time, freq in melody:
-                    midi_note = self.freq_to_midi(freq)
-                    if 48 <= midi_note <= 84:
-                        lead_instrument.notes.append(self.create_note(midi_note, 80, time, 0.4))
-    def create_chorus(self, band, style, start_time, end_time, beats, features):
-        """Create chorus/drop section - more intense"""
-        # Energetic drums
-        if 'drums' in band:
-            for i, beat_time in enumerate(beats):
-                # Stronger kicks
-                if i % 4 in [0, 2]:
-                    band['drums'].notes.append(self.create_note(36, 95, beat_time, 0.4))
-                # Louder snares
-                if i % 4 in [1, 3]:
-                    band['drums'].notes.append(self.create_note(38, 90, beat_time, 0.3))
-                # More hi-hats
-                if style in ["pop", "electronic"]:
-                    band['drums'].notes.append(self.create_note(42, 75, beat_time, 0.15))
-                # Crash cymbal on first beat
-                if i == 0:
-                    band['drums'].notes.append(self.create_note(49, 100, beat_time, 1.0))
-        # More active bass
-        if 'bass' in band:
-            for i, beat_time in enumerate(beats):
-                bass_note = 36 + (i % 4) * 2
-                band['bass'].notes.append(self.create_note(bass_note, 85, beat_time, 0.6))
-        # All instruments play
-        for role, instrument in band.items():
-            if role not in ['drums', 'bass'] and not instrument.is_drum:
-                chord_notes = self.get_chord_for_section(style, "chorus", 0)
-                for note_pitch in chord_notes:
-                    instrument.notes.append(self.create_note(note_pitch, 70, start_time, end_time - start_time))
-    def create_bass_line(self, beats, style):
-        """Create melodic bass line"""
-        bass_notes = []
-        if style == "pop":
-            pattern = [36, 38, 41, 43, 41, 38, 36, 35]
-        elif style == "electronic":
-            pattern = [36, 36, 39, 39, 41, 41, 39, 36]
-        elif style == "rock":
-            pattern = [36, 38, 36, 41, 36, 38, 36, 43]
-        else:  # cinematic
-            pattern = [36, 39, 43, 46, 43, 39, 36, 34]
-        for i, beat_time in enumerate(beats[::2]):  # Every other beat
-            note_pitch = pattern[i % len(pattern)]
-            bass_notes.append(self.create_note(note_pitch, 80, beat_time, 0.9))
-        return bass_notes
-    def extract_melody_for_section(self, melody_contour, start_time, end_time):
-        """Extract melody notes for a specific section"""
-        section_melody = []
-        for time, freq in melody_contour:
-            if start_time <= time < end_time:
-                section_melody.append((time, freq))
-        # If no melody detected, create one
-        if not section_melody:
-            section_duration = end_time - start_time
-            for i in range(8):
-                time = start_time + (i / 8) * section_duration
-                freq = 440 * (2 ** (i / 12))  # Rising pattern
-                section_melody.append((time, freq))
-        return section_melody
-    def get_chord_for_section(self, style, section_name, section_idx):
-        """Get appropriate chords for section"""
-        if style == "pop":
-            chords = [[60, 64, 67], [65, 69, 72], [67, 71, 74], [62, 65, 69]]  # C, F, G, Am
-        elif style == "electronic":
-            chords = [[65, 69, 72], [70, 74, 77], [72, 76, 79], [67, 70, 74]]  # F, Bb, C, Gm
-        elif style == "rock":
-            chords = [[59, 62, 65], [64, 67, 71], [65, 69, 72], [62, 65, 69]]  # Bm, Em, F, Am
-        else:  # cinematic
-            chords = [[60, 63, 67], [65, 68, 72], [67, 70, 74], [62, 65, 69]]  # Cm, Fm, Gm, Ab
-        return chords[section_idx % len(chords)]
-    def freq_to_midi(self, frequency):
-        """Convert frequency to MIDI note number"""
-        return int(69 + 12 * np.log2(frequency / 440.0))
-    def create_note(self, pitch, velocity, start, duration):
-        """Helper to create a note"""
-        return pretty_midi.Note(
-            velocity=velocity,
-            pitch=pitch,
-            start=start,
-            end=start + duration
-        )
-    def apply_complete_humanization(self, midi, intensity):
-        """Apply realistic humanization to entire song"""
-        for instrument in midi.instruments:
-            # Different humanization for different instrument types
-            if instrument.is_drum:
-                timing_variance = 0.01  # Drums are tighter
-                velocity_variance = 8
-            elif any(role in instrument.name.lower() for role in ['bass', 'pad', 'string']):
-                timing_variance = 0.02  # Background instruments
-                velocity_variance = 10
-            else:
-                timing_variance = 0.03  # Lead instruments have more feel
-                velocity_variance = 15
-            for note in instrument.notes:
-                # Timing variations
-                note.start += np.random.normal(0, timing_variance * intensity)
-                note.start = max(0, note.start)
-                # Velocity variations
-                vel_change = int(np.random.normal(0, velocity_variance * intensity))
-                note.velocity = max(30, min(127, note.velocity + vel_change))
-                # Duration variations (except drums)
-                if not instrument.is_drum:
-                    dur_change = np.random.normal(0, 0.05 * intensity)
-                    note.end = max(note.start + 0.1, note.end + dur_change)
-def create_complete_song(input_mp3, style, intensity):
-    """Main function to create complete song"""
     if input_mp3 is None:
         return None, "Please upload an MP3 file"
-    humanizer = CompleteSongHumanizer()
     try:
-        # Create complete humanized song
-        audio_data, sr = humanizer.create_complete_song(input_mp3, style, intensity)
         # Save as MP3
-        output_path = tempfile.mktemp(suffix='_complete_song.mp3')
         sf.write(output_path, audio_data, sr)
-        return output_path, "🎵 Complete song created! Your humanized masterpiece is ready!"
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
-# Professional interface
-with gr.Blocks(theme=gr.themes.Soft(), title="Complete Song Humanizer") as demo:
     gr.Markdown("""
-    # 🎵 Complete Song Humanizer
-    **Transform AI Music into Full, Professional Human Performances**
-    *Creates complete songs with verses, choruses, and full band arrangements*
     """)
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### 1. Upload Your AI Song")
             input_audio = gr.Audio(
                 sources=["upload"],
                 type="filepath",
-                label="Upload MP3 File"
-            )
-            gr.Markdown("### 2. Choose Song Style")
-            style = gr.Radio(
-                ["pop", "electronic", "rock", "cinematic"],
-                value="pop",
-                label="Music Genre",
-                info="Each style creates different band arrangements"
             )
             intensity = gr.Slider(
-                0.1, 1.0, value=0.8,
-                label="Human Feel Intensity",
-                info="How natural and human-like the performance sounds"
             )
-            create_btn = gr.Button(
-                "🎹 Create Complete Song",
                 variant="primary",
                 size="lg"
             )
         with gr.Column(scale=1):
-            gr.Markdown("### 3. Your Complete Song")
             output_audio = gr.Audio(
-                label="Your Humanized Masterpiece",
                 type="filepath",
                 interactive=False
             )
             status = gr.Textbox(
-                label="Creation Status",
                 interactive=False
             )
-    # Song details
-    with gr.Accordion("🎼 What You're Getting", open=True):
         gr.Markdown("""
-        **Each song includes:**
-        **🎵 Complete Song Structure:**
-        - Intro, Verses, Choruses, Bridge, Outro
-        - Professional arrangement with buildup and climax
-        - Dynamic changes between sections
-        **🎸 Full Band Arrangement:**
-        - **Pop**: Drums, Bass, Guitar, Piano, Strings, Synth (6 instruments)
-        - **Electronic**: EDM Drums, Synth Bass, Lead, Pad, FX, Electric Piano (6 instruments)
-        - **Rock**: Rock Drums, Bass, 2 Guitars, Keys, Strings (6 instruments)
-        - **Cinematic**: Orchestral Drums, 2 String sections, Brass, Woodwinds, Harp (6 instruments)
-        **🎛️ Professional Production:**
-        - Realistic human timing variations
-        - Dynamic velocity changes
-        - Section-specific arrangements
-        - Melodic development
-        - Harmonic progression
-        **⏱️ Processing Time:** ~15-30 seconds for a complete song
         """)
-    # Creation process
-    create_btn.click(
-        fn=create_complete_song,
-        inputs=[input_audio, style, intensity],
         outputs=[output_audio, status]
     )

 import gradio as gr
 import numpy as np
 import tempfile
 import librosa
 import soundfile as sf
+import noisereduce as nr
+from scipy import signal
+class AIHumanizer:
     def __init__(self):
+        pass
+    def humanize_audio(self, audio_path, intensity=0.7):
+        """Remove AI artifacts and make audio sound human-made"""
         try:
+            # Load the full song
+            y, sr = librosa.load(audio_path, sr=44100, mono=False)
+            # If stereo, process both channels
+            if len(y.shape) > 1:
+                processed_channels = []
+                for channel in y:
+                    processed_channel = self.process_channel(channel, sr, intensity)
+                    processed_channels.append(processed_channel)
+                y_processed = np.vstack(processed_channels)
+            else:
+                y_processed = self.process_channel(y, sr, intensity)
+            return y_processed, sr
         except Exception as e:
+            raise Exception(f"Humanization failed: {str(e)}")
+    def process_channel(self, y, sr, intensity):
+        """Process a single audio channel to remove AI artifacts"""
+        # 1. Reduce robotic/metallic frequencies (common in AI audio)
+        y_processed = self.reduce_ai_artifacts(y, sr, intensity)
+        # 2. Add natural human timing variations
+        y_processed = self.add_timing_variations(y_processed, sr, intensity)
+        # 3. Add subtle pitch variations (like human singers/instruments)
+        y_processed = self.add_pitch_variations(y_processed, sr, intensity)
+        # 4. Add natural room ambiance
+        y_processed = self.add_room_ambiance(y_processed, sr, intensity)
+        # 5. Add subtle analog warmth
+        y_processed = self.add_analog_warmth(y_processed, sr, intensity)
+        # 6. Reduce perfect quantization
+        y_processed = self.reduce_perfect_quantization(y_processed, sr, intensity)
+        return y_processed
+    def reduce_ai_artifacts(self, y, sr, intensity):
+        """Reduce common AI audio artifacts"""
+        # Reduce metallic/robotic frequencies (common in AI vocals)
+        # These are often in the 2kHz-6kHz range
+        sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
+        y_filtered = signal.sosfilt(sos, y)
+        # Blend with original based on intensity
+        y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
+        return y_processed
+    def add_timing_variations(self, y, sr, intensity):
+        """Add subtle timing variations like human musicians"""
+        # Create a time-stretching effect with small random variations
+        original_length = len(y)
+        # Small random speed variations (like human timing)
+        speed_variation = 1.0 + np.random.normal(0, 0.005 * intensity)
+        # Resample to create timing variations
+        new_length = int(original_length / speed_variation)
+        indices = np.linspace(0, original_length-1, new_length)
+        y_varied = np.interp(indices, np.arange(original_length), y)
+        # Resample back to original length
+        if len(y_varied) != original_length:
+            indices_back = np.linspace(0, len(y_varied)-1, original_length)
+            y_varied = np.interp(indices_back, np.arange(len(y_varied)), y_varied)
+        # Blend with original
+        blend_factor = 0.1 * intensity
+        return y * (1 - blend_factor) + y_varied * blend_factor
+    def add_pitch_variations(self, y, sr, intensity):
+        """Add subtle pitch variations like human performance"""
+        # Small pitch variations (vibrato effect)
+        t = np.linspace(0, len(y)/sr, len(y))
+        # Create subtle vibrato
+        vibrato_rate = 5.0  # Hz
+        vibrato_depth = 0.3 * intensity  # Semitones
+        pitch_variation = np.sin(2 * np.pi * vibrato_rate * t) * vibrato_depth
+        # Apply pitch shifting using phase vocoder
+        y_pitched = self.pitch_shift_pv(y, sr, pitch_variation)
+        # Blend with original
+        blend_factor = 0.15 * intensity
+        return y * (1 - blend_factor) + y_pitched * blend_factor
+    def pitch_shift_pv(self, y, sr, pitch_variation):
+        """Simple phase vocoder pitch shifting"""
+        # Simple implementation - in practice you'd use librosa's phase_vocoder
+        # For now, we'll use a simplified version
+        try:
+            import librosa
+            # Use small constant pitch shift for simplicity
+            n_steps = 0.1 * np.mean(np.abs(pitch_variation))
+            y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
+            return y_shifted
+        except:
+            return y
+    def add_room_ambiance(self, y, sr, intensity):
+        """Add natural room reverb to remove sterile AI sound"""
+        # Create simple impulse response for natural room
+        impulse_length = int(0.3 * sr)  # 300ms reverb
+        impulse = np.zeros(impulse_length)
+        # Early reflections
+        early_reflections = int(0.02 * sr)  # 20ms
+        impulse[early_reflections] = 0.7
+        # Late reverb tail
+        decay = np.exp(-np.linspace(0, 10, impulse_length - early_reflections))
+        impulse[early_reflections:] = decay * 0.4
+        # Apply convolution reverb
+        y_reverb = signal.convolve(y, impulse, mode='same') / np.max(np.abs(impulse))
+        # Blend with original
+        blend_factor = 0.08 * intensity
+        return y * (1 - blend_factor) + y_reverb * blend_factor
+    def add_analog_warmth(self, y, sr, intensity):
+        """Add analog-style warmth and saturation"""
+        # Soft clipping saturation
+        saturation_amount = 1.0 + 0.5 * intensity
+        y_saturated = np.tanh(y * saturation_amount) / saturation_amount
+        # Add subtle tape warmth (low-end boost)
+        sos_warmth = signal.butter(2, 150, 'highpass', fs=sr, output='sos')
+        y_warmth = signal.sosfilt(sos_warmth, y_saturated)
+        # Blend
+        blend_factor = 0.2 * intensity
+        return y * (1 - blend_factor) + y_warmth * blend_factor
+    def reduce_perfect_quantization(self, y, sr, intensity):
+        """Reduce perfectly quantized timing"""
+        # Add subtle random amplitude variations
+        t = np.linspace(0, len(y)/sr, len(y))
+        # Low-frequency amplitude modulation (like human dynamics)
+        lfo_rate = 0.5  # Hz
+        lfo_depth = 0.05 * intensity
+        amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
+        # Random micro-variations
+        random_variation = 1.0 + np.random.normal(0, 0.02 * intensity, len(y))
+        # Combine variations
+        total_variation = amplitude_variation * random_variation
+        return y * total_variation
+def humanize_song(input_mp3, intensity):
+    """Main humanization function"""
     if input_mp3 is None:
         return None, "Please upload an MP3 file"
+    humanizer = AIHumanizer()
     try:
+        # Process the entire song to remove AI artifacts
+        audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
         # Save as MP3
+        output_path = tempfile.mktemp(suffix='_humanized.mp3')
         sf.write(output_path, audio_data, sr)
+        return output_path, "✅ Song humanized! AI artifacts removed and human feel added."
     except Exception as e:
         return None, f"❌ Error: {str(e)}"
+# Simple interface focused on humanization
+with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
     gr.Markdown("""
+    # 🎵 AI Song Humanizer
+    **Remove AI Detection from Your Music - Make it Sound Human-Made**
+    *Upload AI-generated songs → Remove robotic artifacts → Download human-sounding music*
     """)
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### 1. Upload AI-Generated Song")
             input_audio = gr.Audio(
                 sources=["upload"],
                 type="filepath",
+                label="Upload your complete AI song (MP3, WAV, etc.)"
             )
+            gr.Markdown("### 2. Adjust Humanization")
             intensity = gr.Slider(
+                0.1, 1.0, value=0.7,
+                label="Humanization Strength",
+                info="How much to remove AI artifacts and add human feel"
             )
+            process_btn = gr.Button(
+                "✨ Remove AI Artifacts",
                 variant="primary",
                 size="lg"
             )
         with gr.Column(scale=1):
+            gr.Markdown("### 3. Download Human Version")
             output_audio = gr.Audio(
+                label="Human-Sounding Song",
                 type="filepath",
                 interactive=False
             )
             status = gr.Textbox(
+                label="Processing Status",
                 interactive=False
             )
+    with gr.Accordion("🔍 What This Actually Does", open=True):
         gr.Markdown("""
+        **This tool processes your EXISTING complete song to:**
+        🎵 **Remove AI Audio Artifacts:**
+        - Reduces metallic/robotic frequencies
+        - Removes perfect quantization
+        - Eliminates sterile digital sound
+        🎤 **Add Human Performance Elements:**
+        - Subtle timing variations (like human musicians)
+        - Natural pitch fluctuations (vibrato, human imperfection)
+        - Dynamic amplitude changes
+        🎛️ **Add Analog Character:**
+        - Natural room ambiance and reverb
+        - Analog-style warmth and saturation
+        - Tape-like characteristics
+        **You keep:**
+        - Your original melody
+        - Your original arrangement
+        - Your original vocals/instruments
+        - Your complete song structure
+        **What changes:**
+        - Sounds like humans performed it
+        - No more "AI sound"
+        - Natural imperfections added
+        - Warmer, more organic sound
+        **Perfect for:** Making AI-generated songs undetectable as AI!
         """)
+    # Processing
+    process_btn.click(
+        fn=humanize_song,
+        inputs=[input_audio, intensity],
         outputs=[output_audio, status]
     )