Spaces:

FILMITO
/

HumanizeBot

Sleeping

App Files Files Community

FILMITO commited on 22 days ago

Commit

71c15a0

verified ·

1 Parent(s): b7dc0ef

Update app.py

Browse files

Files changed (1) hide show

app.py +181 -200

app.py CHANGED Viewed

@@ -4,180 +4,137 @@ import numpy as np
 import tempfile
 import os
 import librosa
-import torch
-import torchaudio
-from pathlib import Path
 import soundfile as sf
 import io
 class MP3ToHumanized:
     def __init__(self):
         self.groove_profiles = {
-            "drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1},
-            "melody": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05},
-            "bass": {"timing_var": 0.015, "velocity_var": 12, "swing_factor": 0.07},
-            "chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03},
-            "other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}
         }
-    def audio_to_midi(self, audio_path, conversion_method="basic"):
-        """Convert audio file to MIDI using different methods"""
         try:
-            # Load audio file
-            y, sr = librosa.load(audio_path, sr=22050)
-            if conversion_method == "basic":
-                return self.basic_audio_to_midi(y, sr)
-            elif conversion_method == "melody":
-                return self.melody_extraction_to_midi(y, sr)
-            else:
-                return self.rhythm_based_midi(y, sr)
         except Exception as e:
-            raise Exception(f"Audio to MIDI conversion failed: {str(e)}")
-    def basic_audio_to_midi(self, y, sr):
-        """Basic onset detection and pitch estimation"""
-        # Create a pretty_midi object
-        midi = pretty_midi.PrettyMIDI()
-        # Create instrument
-        piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
-        instrument = pretty_midi.Instrument(program=piano_program)
-        # Detect onsets (when notes start)
-        onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, backtrack=True)
-        onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
-        # Estimate pitch for each onset
-        for onset_time in onset_times:
-            # Extract a segment around the onset
-            start_sample = int(onset_time * sr)
-            end_sample = start_sample + int(0.5 * sr)  # 500ms segment
-            if end_sample < len(y):
-                segment = y[start_sample:end_sample]
-                # Estimate fundamental frequency
-                f0 = self.estimate_pitch(segment, sr)
-                if f0 > 0:
-                    # Convert frequency to MIDI note number
-                    midi_note = int(69 + 12 * np.log2(f0 / 440.0))
-                    # Only add if it's a valid MIDI note
-                    if 0 <= midi_note <= 127:
-                        # Create note
-                        note = pretty_midi.Note(
-                            velocity=np.random.randint(60, 100),
-                            pitch=midi_note,
-                            start=onset_time,
-                            end=onset_time + 0.5  # 500ms duration
-                        )
-                        instrument.notes.append(note)
-        midi.instruments.append(instrument)
-        return midi
-    def melody_extraction_to_midi(self, y, sr):
-        """Extract melody and convert to MIDI"""
-        midi = pretty_midi.PrettyMIDI()
-        instrument = pretty_midi.Instrument(program=0)  # Piano
-        # Use librosa's melody extraction
-        f0, voiced_flag, voiced_probs = librosa.pyin(
-            y,
-            fmin=librosa.note_to_hz('C2'),
-            fmax=librosa.note_to_hz('C7'),
-            sr=sr
-        )
-        times = librosa.times_like(f0, sr=sr, hop_length=512)
-        current_note = None
-        note_start = 0
-        for time, freq, voiced in zip(times, f0, voiced_flag):
-            if voiced and not np.isnan(freq):
-                midi_note = int(69 + 12 * np.log2(freq / 440.0))
-                if 0 <= midi_note <= 127:
-                    if current_note != midi_note:
-                        if current_note is not None:
-                            # End previous note
-                            note = pretty_midi.Note(
-                                velocity=80,
-                                pitch=current_note,
-                                start=note_start,
-                                end=time
-                            )
-                            instrument.notes.append(note)
-                        # Start new note
-                        current_note = midi_note
-                        note_start = time
-            else:
-                if current_note is not None:
-                    # End current note
                     note = pretty_midi.Note(
                         velocity=80,
-                        pitch=current_note,
-                        start=note_start,
-                        end=time
                     )
-                    instrument.notes.append(note)
-                    current_note = None
-        midi.instruments.append(instrument)
-        return midi
-    def rhythm_based_midi(self, y, sr):
-        """Create rhythm-based MIDI from percussive elements"""
-        midi = pretty_midi.PrettyMIDI()
-        # Drum instrument
-        drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
-        # Detect strong beats and onsets
-        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
-        beat_times = librosa.frames_to_time(beats, sr=sr)
-        # Add drum hits on beats
-        for beat_time in beat_times:
-            # Kick drum on strong beats
-            note = pretty_midi.Note(
-                velocity=100,
-                pitch=36,  # Kick drum
-                start=beat_time,
-                end=beat_time + 0.1
-            )
-            drum_instrument.notes.append(note)
-        midi.instruments.append(drum_instrument)
-        return midi
-    def estimate_pitch(self, segment, sr):
-        """Estimate fundamental frequency from audio segment"""
-        try:
-            # Use autocorrelation for pitch detection
-            corr = np.correlate(segment, segment, mode='full')
-            corr = corr[len(corr)//2:]
-            # Find the first peak after zero lag (fundamental frequency)
-            d = np.diff(corr)
-            start = np.where(d > 0)[0]
-            if len(start) > 0:
-                start = start[0]
-                peak = np.argmax(corr[start:]) + start
-                freq = sr / peak if peak > 0 else 0
-                return freq
-        except:
-            pass
-        return 0
-    def humanize_midi(self, midi_data, intensity=0.7, style="organic"):
-        """Humanize the MIDI data"""
-        tempo = midi_data.estimate_tempo() if len(midi_data.instruments) > 0 else 120
         for instrument in midi_data.instruments:
             inst_type = "drums" if instrument.is_drum else "melody"
             profile = self.groove_profiles[inst_type]
@@ -187,32 +144,43 @@ class MP3ToHumanized:
                 timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
                 note.start = max(0, note.start + timing_shift)
-                # Humanize duration (except drums)
                 if not instrument.is_drum:
-                    duration_shift = np.random.normal(0, profile["timing_var"] * 0.3 * intensity)
                     note.end = max(note.start + 0.1, note.end + duration_shift)
                 # Humanize velocity
                 vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
                 new_velocity = note.velocity + int(vel_shift * intensity)
-                note.velocity = max(20, min(127, new_velocity))
         return midi_data
-def process_audio_files(files, intensity, style, conversion_method):
     if not files:
         return None, None, "Please upload audio files (MP3, WAV, etc.)"
     converter = MP3ToHumanized()
     processed_files = []
     for file in files:
         try:
             # Convert audio to MIDI
-            midi_data = converter.audio_to_midi(file.name, conversion_method)
             # Humanize the MIDI
-            humanized_midi = converter.humanize_midi(midi_data, intensity, style)
             # Save humanized MIDI
             output_path = tempfile.mktemp(suffix='_humanized.mid')
@@ -220,32 +188,35 @@ def process_audio_files(files, intensity, style, conversion_method):
             processed_files.append(output_path)
         except Exception as e:
-            return None, None, f"Error processing {file.name}: {str(e)}"
     if processed_files:
-        # Create audio preview from first file
         preview_audio = None
         try:
-            # Convert MIDI back to audio for preview
             midi_data = pretty_midi.PrettyMIDI(processed_files[0])
             audio_data = midi_data.synthesize()
             preview_path = tempfile.mktemp(suffix='_preview.wav')
             sf.write(preview_path, audio_data, 44100)
             preview_audio = preview_path
-        except:
             preview_audio = None
-        return processed_files, preview_audio, f"✅ Successfully processed {len(processed_files)} files!"
     else:
         return None, None, "❌ No files were processed successfully."
 # Create Gradio interface
-with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
     gr.Markdown("""
-    # 🎵 MP3 HumanizeBot
-    **Convert MP3/Audio to MIDI and remove AI traces to sound human-made!**
-    Upload audio files from AI music generators, convert to MIDI, and apply natural humanization.
     """)
     with gr.Row():
@@ -254,35 +225,33 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
             file_input = gr.File(
                 file_count="multiple",
-                file_types=[".mp3", ".wav", ".ogg", ".m4a", ".flac"],
-                label="Upload Audio Files",
                 type="filepath"
             )
-            conversion_method = gr.Radio(
-                ["basic", "melody", "rhythm"],
-                value="basic",
-                label="🎵 Conversion Method",
-                info="Basic = general purpose, Melody = focus on tunes, Rhythm = focus on beats"
-            )
             intensity = gr.Slider(
                 0.1, 1.0,
                 value=0.7,
-                label="🎚️ Humanization Intensity"
-            )
-            style = gr.Radio(
-                ["organic", "groovy", "gentle"],
-                value="organic",
-                label="🎸 Humanization Style"
             )
             process_btn = gr.Button(
-                "✨ Convert & Humanize!",
                 variant="primary",
                 size="lg"
             )
         with gr.Column(scale=1):
             gr.Markdown("### 📥 Download Results")
@@ -294,37 +263,49 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
             audio_output = gr.Audio(
                 label="MIDI Audio Preview",
-                interactive=False
             )
             status = gr.Textbox(
                 label="Status",
                 interactive=False,
-                max_lines=4
             )
-    with gr.Accordion("ℹ️ How It Works", open=False):
         gr.Markdown("""
-        **Process:**
-        1. **Upload** your AI-generated audio files (MP3, WAV, etc.)
-        2. **Convert** to MIDI using pitch and rhythm detection
-        3. **Humanize** with timing and velocity variations
-        4. **Download** humanized MIDI files
-        **Conversion Methods:**
-        - **Basic**: General purpose conversion for most music
-        - **Melody**: Focuses on extracting melodic content
-        - **Rhythm**: Focuses on drum patterns and beats
-        **Note**: Audio-to-MIDI conversion is challenging and works best with:
-        - Clear melodic lines
-        - Good audio quality
-        - Not too much reverb/effects
         """)
     process_btn.click(
         fn=process_audio_files,
-        inputs=[file_input, intensity, style, conversion_method],
         outputs=[file_output, audio_output, status]
     )

 import tempfile
 import os
 import librosa
 import soundfile as sf
+from pathlib import Path
+import subprocess
 import io
 class MP3ToHumanized:
     def __init__(self):
         self.groove_profiles = {
+            "drums": {"timing_var": 0.02, "velocity_var": 15},
+            "melody": {"timing_var": 0.01, "velocity_var": 10},
+            "bass": {"timing_var": 0.015, "velocity_var": 12},
+            "chords": {"timing_var": 0.008, "velocity_var": 8},
+            "other": {"timing_var": 0.01, "velocity_var": 10}
         }
+    def convert_to_wav(self, audio_path):
+        """Convert any audio format to WAV using librosa"""
         try:
+            # Load audio with librosa (handles MP3, WAV, etc.)
+            y, sr = librosa.load(audio_path, sr=22050, mono=True)
+            # Save as temporary WAV file
+            wav_path = tempfile.mktemp(suffix='.wav')
+            sf.write(wav_path, y, sr)
+            return wav_path, sr
         except Exception as e:
+            raise Exception(f"Audio conversion failed: {str(e)}")
+    def simple_audio_to_midi(self, audio_path):
+        """Simple but effective audio to MIDI conversion"""
+        try:
+            # Convert to WAV first
+            wav_path, sr = self.convert_to_wav(audio_path)
+            # Load the converted audio
+            y, sr = librosa.load(wav_path, sr=sr)
+            # Create MIDI object
+            midi = pretty_midi.PrettyMIDI()
+            instrument = pretty_midi.Instrument(program=0)  # Acoustic Grand Piano
+            # Method 1: Onset detection with pitch estimation
+            onset_frames = librosa.onset.onset_detect(
+                y=y, sr=sr,
+                hop_length=512,
+                backtrack=True,
+                delta=0.2
+            )
+            onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
+            # Get tempo for musical timing
+            tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr, units='time')
+            notes_added = 0
+            for i, onset_time in enumerate(onset_times):
+                if notes_added >= 50:  # Limit notes to avoid clutter
+                    break
+                # Extract a segment around the onset for pitch detection
+                start_idx = int(onset_time * sr)
+                end_idx = min(start_idx + int(0.3 * sr), len(y))  # 300ms window
+                if end_idx > start_idx:
+                    segment = y[start_idx:end_idx]
+                    # Simple pitch detection using FFT
+                    frequencies, magnitudes = self.simple_pitch_detection(segment, sr)
+                    if len(frequencies) > 0:
+                        # Take the strongest frequency
+                        main_freq = frequencies[np.argmax(magnitudes)]
+                        if 80 < main_freq < 1000:  # Reasonable frequency range
+                            midi_note = self.freq_to_midi(main_freq)
+                            if 48 <= midi_note <= 84:  # C3 to C6 range
+                                # Create note
+                                note = pretty_midi.Note(
+                                    velocity=np.random.randint(70, 100),
+                                    pitch=midi_note,
+                                    start=onset_time,
+                                    end=onset_time + 0.4  # 400ms duration
+                                )
+                                instrument.notes.append(note)
+                                notes_added += 1
+            # If we didn't get enough notes, add some rhythmic elements
+            if notes_added < 10 and len(beat_frames) > 0:
+                drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
+                for beat_time in beat_frames[:8]:  # First 8 beats
                     note = pretty_midi.Note(
                         velocity=80,
+                        pitch=36,  # Kick drum
+                        start=beat_time,
+                        end=beat_time + 0.2
                     )
+                    drum_instrument.notes.append(note)
+                midi.instruments.append(drum_instrument)
+            if len(instrument.notes) > 0:
+                midi.instruments.append(instrument)
+            return midi
+        except Exception as e:
+            raise Exception(f"MIDI conversion failed: {str(e)}")
+    def simple_pitch_detection(self, segment, sr):
+        """Simple FFT-based pitch detection"""
+        # Apply windowing
+        window = np.hanning(len(segment))
+        segment = segment * window
+        # FFT
+        fft = np.fft.rfft(segment)
+        magnitudes = np.abs(fft)
+        frequencies = np.fft.rfftfreq(len(segment), 1/sr)
+        # Filter reasonable frequencies
+        mask = (frequencies > 80) & (frequencies < 1000)
+        return frequencies[mask], magnitudes[mask]
+    def freq_to_midi(self, frequency):
+        """Convert frequency to MIDI note number"""
+        return int(69 + 12 * np.log2(frequency / 440.0))
+    def humanize_midi(self, midi_data, intensity=0.7):
+        """Apply humanization to MIDI"""
+        if len(midi_data.instruments) == 0:
+            return midi_data
         for instrument in midi_data.instruments:
             inst_type = "drums" if instrument.is_drum else "melody"
             profile = self.groove_profiles[inst_type]
                 timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
                 note.start = max(0, note.start + timing_shift)
+                # Humanize duration
                 if not instrument.is_drum:
+                    duration_shift = np.random.normal(0, profile["timing_var"] * 0.5 * intensity)
                     note.end = max(note.start + 0.1, note.end + duration_shift)
                 # Humanize velocity
                 vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
                 new_velocity = note.velocity + int(vel_shift * intensity)
+                note.velocity = max(40, min(127, new_velocity))
         return midi_data
+def process_audio_files(files, intensity):
     if not files:
         return None, None, "Please upload audio files (MP3, WAV, etc.)"
+    # Show what files we received
+    file_info = f"Received {len(files)} files: {[f.name for f in files]}"
+    print(file_info)
     converter = MP3ToHumanized()
     processed_files = []
     for file in files:
         try:
+            # Check file type
+            file_ext = Path(file.name).suffix.lower()
+            print(f"Processing {file.name} (extension: {file_ext})")
             # Convert audio to MIDI
+            midi_data = converter.simple_audio_to_midi(file.name)
+            if len(midi_data.instruments) == 0 or sum(len(instr.notes) for instr in midi_data.instruments) == 0:
+                return None, None, f"❌ Could not extract musical content from {file.name}. Try a different audio file with clear melody."
             # Humanize the MIDI
+            humanized_midi = converter.humanize_midi(midi_data, intensity)
             # Save humanized MIDI
             output_path = tempfile.mktemp(suffix='_humanized.mid')
             processed_files.append(output_path)
         except Exception as e:
+            error_msg = f"Error processing {file.name}: {str(e)}"
+            print(error_msg)
+            return None, None, error_msg
     if processed_files:
+        # Create audio preview
         preview_audio = None
         try:
             midi_data = pretty_midi.PrettyMIDI(processed_files[0])
             audio_data = midi_data.synthesize()
             preview_path = tempfile.mktemp(suffix='_preview.wav')
             sf.write(preview_path, audio_data, 44100)
             preview_audio = preview_path
+        except Exception as e:
+            print(f"Preview generation failed: {e}")
             preview_audio = None
+        success_msg = f"✅ Successfully processed {len(processed_files)} files! Converted audio to MIDI and applied humanization."
+        return processed_files, preview_audio, success_msg
     else:
         return None, None, "❌ No files were processed successfully."
 # Create Gradio interface
+with gr.Blocks(theme=gr.themes.Soft(), title="Audio HumanizeBot") as demo:
     gr.Markdown("""
+    # 🎵 Audio HumanizeBot
+    **Convert MP3/Audio to humanized MIDI - Remove AI traces from your music!**
+    Upload audio files from AI music generators and get humanized MIDI back.
     """)
     with gr.Row():
             file_input = gr.File(
                 file_count="multiple",
+                file_types=[".mp3", ".wav", ".m4a", ".ogg", ".flac"],
+                label="Upload your AI-generated audio files",
                 type="filepath"
             )
             intensity = gr.Slider(
                 0.1, 1.0,
                 value=0.7,
+                label="🎚️ Humanization Intensity",
+                info="How much human feel to add"
             )
             process_btn = gr.Button(
+                "✨ Convert & Humanize Audio!",
                 variant="primary",
                 size="lg"
             )
+            gr.Markdown("""
+            **Supported formats:** MP3, WAV, M4A, OGG, FLAC
+            **Works best with:**
+            - Clear melodic content
+            - AI-generated music
+            - Not too much reverb/effects
+            - 10-30 second clips
+            """)
         with gr.Column(scale=1):
             gr.Markdown("### 📥 Download Results")
             audio_output = gr.Audio(
                 label="MIDI Audio Preview",
+                interactive=False,
+                type="filepath"
             )
             status = gr.Textbox(
                 label="Status",
                 interactive=False,
+                max_lines=5
             )
+    with gr.Accordion("🎯 Tips for Best Results", open=True):
         gr.Markdown("""
+        **For best conversion:**
+        - Use clear AI-generated music with obvious melodies
+        - Avoid heavily processed/remixed tracks
+        - 10-30 second clips work better than full songs
+        - Instrumental music converts better than vocal-heavy tracks
+        **What to expect:**
+        - The MIDI will capture the main melodic and rhythmic ideas
+        - You can import the MIDI into any DAW (FL Studio, Ableton, etc.)
+        - Use high-quality instrument sounds in your DAW for best results
+        - The humanization adds natural timing and velocity variations
+        **Limitations:**
+        - Complex arrangements may not convert perfectly
+        - Audio-to-MIDI is an approximation
+        - Very ambient or effect-heavy music may not work well
+        """)
+    # Examples section
+    with gr.Accordion("🎵 Try These Example Files", open=False):
+        gr.Markdown("""
+        **Test with these types of audio:**
+        - AI piano melodies
+        - Simple electronic beats
+        - Clear synth lines
+        - Drum patterns from AI generators
         """)
     process_btn.click(
         fn=process_audio_files,
+        inputs=[file_input, intensity],
         outputs=[file_output, audio_output, status]
     )