Spaces:

FILMITO
/

HumanizeBot

Sleeping

App Files Files Community

FILMITO commited on 22 days ago

Commit

6f55663

verified ·

1 Parent(s): ace3e17

Update app.py

Browse files

Files changed (1) hide show

app.py +246 -152

app.py CHANGED Viewed

@@ -3,14 +3,14 @@ import pretty_midi
 import numpy as np
 import tempfile
 import os
-import scipy
-from scipy import signal
 import librosa
-import io
-import base64
 from pathlib import Path
-class HumanizeBot:
     def __init__(self):
         self.groove_profiles = {
             "drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1},
@@ -19,169 +19,267 @@ class HumanizeBot:
             "chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03},
             "other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}
         }
-    def classify_instrument(self, instrument):
-        """Classify instrument type for appropriate humanization"""
-        if instrument.is_drum:
-            return "drums"
-        elif 32 <= instrument.program <= 39:  # Bass
-            return "bass"
-        elif 0 <= instrument.program <= 7:  # Piano
-            return "chords"
-        elif 40 <= instrument.program <= 55:  # Strings, orchestra
-            return "chords"
-        elif 80 <= instrument.program <= 104:  # Synth leads, pads
-            return "melody"
-        else:
-            return "melody"
-    def apply_swing(self, notes, swing_factor, tempo):
-        """Apply swing/groove to notes"""
-        swung_notes = []
-        for note in notes:
-            # Simple swing: push even 8th notes slightly later
-            beat_position = (note.start * tempo / 60) % 1
-            if 0.25 < beat_position < 0.75:  # Off-beat positions
-                note.start += 0.01 * swing_factor
-                note.end += 0.01 * swing_factor
-            swung_notes.append(note)
-        return swung_notes
-    def humanize_midi(self, midi_file, intensity=0.7, style="organic", add_swing=True):
-        """Main humanization function"""
         try:
-            # Load MIDI file
-            midi_data = pretty_midi.PrettyMIDI(midi_file.name)
-            tempo = midi_data.estimate_tempo()
-            # Process each instrument
-            for instrument in midi_data.instruments:
-                inst_type = self.classify_instrument(instrument)
-                profile = self.groove_profiles[inst_type]
-                # Apply swing if requested
-                if add_swing and inst_type in ["drums", "bass"]:
-                    instrument.notes = self.apply_swing(
-                        instrument.notes,
-                        profile["swing_factor"] * intensity,
-                        tempo
-                    )
-                # Humanize timing and velocity
-                for note in instrument.notes:
-                    # Humanize timing (more variation for drums)
-                    timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
-                    note.start = max(0, note.start + timing_shift)
-                    # Humanize note duration (except for drums)
-                    if not instrument.is_drum:
-                        duration_shift = np.random.normal(0, profile["timing_var"] * 0.5 * intensity)
-                        note.end = max(note.start + 0.05, note.end + duration_shift)
-                    # Humanize velocity
-                    vel_pattern = self.get_velocity_pattern(note, instrument, style)
-                    vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
-                    new_velocity = note.velocity + int(vel_shift * intensity * vel_pattern)
-                    note.velocity = max(20, min(127, new_velocity))
-            # Save humanized MIDI
-            output_path = tempfile.mktemp(suffix='_humanized.mid')
-            midi_data.write(output_path)
-            return output_path, "✅ Humanization successful! File is ready for download."
-        except Exception as e:
-            return None, f"❌ Error processing file: {str(e)}"
-    def get_velocity_pattern(self, note, instrument, style):
-        """Get velocity multiplier based on style and musical context"""
-        if style == "organic":
-            return 1.0
-        elif style == "groovy":
-            # Accentuate beats more
-            beat_position = (note.start * 2) % 1  # Simple beat detection
-            if beat_position < 0.1:  # On strong beats
-                return 1.2
             else:
-                return 0.9
-        elif style == "gentle":
-            return 0.8
-        return 1.0
-def create_audio_preview(midi_path):
-    """Create a simple audio preview from MIDI"""
-    try:
-        midi_data = pretty_midi.PrettyMIDI(midi_path)
-        # Generate audio using fluidsynth (simplified)
-        audio_data = midi_data.synthesize()
-        return 44100, audio_data.astype(np.float32)
-    except:
-        return None, None
-def process_files(files, intensity, style, add_swing):
     if not files:
-        return None, None, "Please upload MIDI files to begin."
-    bot = HumanizeBot()
     processed_files = []
-    audio_previews = []
     for file in files:
-        humanized_path, message = bot.humanize_midi(file, intensity, style, add_swing)
-        if humanized_path:
-            processed_files.append(humanized_path)
-            # Create audio preview
-            sr, audio = create_audio_preview(humanized_path)
-            if audio is not None:
-                audio_previews.append((sr, audio))
     if processed_files:
-        return processed_files, audio_previews[0] if audio_previews else None, f"✅ Successfully processed {len(processed_files)} files!"
     else:
         return None, None, "❌ No files were processed successfully."
-# Create the Gradio interface
-with gr.Blocks(theme=gr.themes.Soft(), title="HumanizeBot") as demo:
     gr.Markdown("""
-    # 🎵 HumanizeBot
-    **Remove AI traces from your music and make it sound human-made!**
-    Upload MIDI files from AI music generators to apply natural humanization: subtle timing variations, velocity changes, and musical feel.
     """)
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("### 📁 Upload & Settings")
             file_input = gr.File(
                 file_count="multiple",
-                file_types=[".mid", ".midi"],
-                label="Upload MIDI Files",
                 type="filepath"
             )
             intensity = gr.Slider(
                 0.1, 1.0,
                 value=0.7,
-                label="🎚️ Humanization Intensity",
-                info="Low = subtle, High = very human"
             )
             style = gr.Radio(
                 ["organic", "groovy", "gentle"],
                 value="organic",
-                label="🎸 Humanization Style",
-                info="Organic = natural, Groovy = rhythmic, Gentle = subtle"
-            )
-            add_swing = gr.Checkbox(
-                value=True,
-                label="🔄 Add Swing/Groove",
-                info="Add rhythmic push and pull"
             )
             process_btn = gr.Button(
-                "✨ Humanize My Music!",
                 variant="primary",
                 size="lg"
             )
@@ -195,44 +293,40 @@ with gr.Blocks(theme=gr.themes.Soft(), title="HumanizeBot") as demo:
             )
             audio_output = gr.Audio(
-                label="Audio Preview (First File)",
                 interactive=False
             )
             status = gr.Textbox(
                 label="Status",
                 interactive=False,
-                max_lines=3
             )
-    # Examples section
-    with gr.Accordion("🎯 Examples & Tips", open=False):
         gr.Markdown("""
-        **Best used with:**
-        - AI-generated MIDI from Soundraw, AIVA, MuseNet, etc.
-        - Robotic-sounding drum patterns
-        - Static piano or synth sequences
-        **How it works:**
-        - Adds subtle timing variations (like a human player)
-        - Adjusts velocity (note strength) dynamically
-        - Can add swing/groove for rhythmic parts
-        - Preserves the original musical content
-        **Pro tip:** Start with intensity 0.7 for balanced results!
         """)
-    # Connect the processing function
     process_btn.click(
-        fn=process_files,
-        inputs=[file_input, intensity, style, add_swing],
         outputs=[file_output, audio_output, status]
     )
-    gr.Markdown("""
-    ---
-    *Built with ❤️ using Gradio and PrettyMIDI. Works best with MIDI files from AI music generators.*
-    """)
 if __name__ == "__main__":
     demo.launch(debug=True)

 import numpy as np
 import tempfile
 import os
 import librosa
+import torch
+import torchaudio
 from pathlib import Path
+import soundfile as sf
+import io
+class MP3ToHumanized:
     def __init__(self):
         self.groove_profiles = {
             "drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1},
             "chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03},
             "other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}
         }
+    def audio_to_midi(self, audio_path, conversion_method="basic"):
+        """Convert audio file to MIDI using different methods"""
         try:
+            # Load audio file
+            y, sr = librosa.load(audio_path, sr=22050)
+            if conversion_method == "basic":
+                return self.basic_audio_to_midi(y, sr)
+            elif conversion_method == "melody":
+                return self.melody_extraction_to_midi(y, sr)
+            else:
+                return self.rhythm_based_midi(y, sr)
+        except Exception as e:
+            raise Exception(f"Audio to MIDI conversion failed: {str(e)}")
+    def basic_audio_to_midi(self, y, sr):
+        """Basic onset detection and pitch estimation"""
+        # Create a pretty_midi object
+        midi = pretty_midi.PrettyMIDI()
+        # Create instrument
+        piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
+        instrument = pretty_midi.Instrument(program=piano_program)
+        # Detect onsets (when notes start)
+        onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, backtrack=True)
+        onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
+        # Estimate pitch for each onset
+        for onset_time in onset_times:
+            # Extract a segment around the onset
+            start_sample = int(onset_time * sr)
+            end_sample = start_sample + int(0.5 * sr)  # 500ms segment
+            if end_sample < len(y):
+                segment = y[start_sample:end_sample]
+                # Estimate fundamental frequency
+                f0 = self.estimate_pitch(segment, sr)
+                if f0 > 0:
+                    # Convert frequency to MIDI note number
+                    midi_note = int(69 + 12 * np.log2(f0 / 440.0))
+                    # Only add if it's a valid MIDI note
+                    if 0 <= midi_note <= 127:
+                        # Create note
+                        note = pretty_midi.Note(
+                            velocity=np.random.randint(60, 100),
+                            pitch=midi_note,
+                            start=onset_time,
+                            end=onset_time + 0.5  # 500ms duration
+                        )
+                        instrument.notes.append(note)
+        midi.instruments.append(instrument)
+        return midi
+    def melody_extraction_to_midi(self, y, sr):
+        """Extract melody and convert to MIDI"""
+        midi = pretty_midi.PrettyMIDI()
+        instrument = pretty_midi.Instrument(program=0)  # Piano
+        # Use librosa's melody extraction
+        f0, voiced_flag, voiced_probs = librosa.pyin(
+            y,
+            fmin=librosa.note_to_hz('C2'),
+            fmax=librosa.note_to_hz('C7'),
+            sr=sr
+        )
+        times = librosa.times_like(f0, sr=sr, hop_length=512)
+        current_note = None
+        note_start = 0
+        for time, freq, voiced in zip(times, f0, voiced_flag):
+            if voiced and not np.isnan(freq):
+                midi_note = int(69 + 12 * np.log2(freq / 440.0))
+                if 0 <= midi_note <= 127:
+                    if current_note != midi_note:
+                        if current_note is not None:
+                            # End previous note
+                            note = pretty_midi.Note(
+                                velocity=80,
+                                pitch=current_note,
+                                start=note_start,
+                                end=time
+                            )
+                            instrument.notes.append(note)
+                        # Start new note
+                        current_note = midi_note
+                        note_start = time
             else:
+                if current_note is not None:
+                    # End current note
+                    note = pretty_midi.Note(
+                        velocity=80,
+                        pitch=current_note,
+                        start=note_start,
+                        end=time
+                    )
+                    instrument.notes.append(note)
+                    current_note = None
+        midi.instruments.append(instrument)
+        return midi
+    def rhythm_based_midi(self, y, sr):
+        """Create rhythm-based MIDI from percussive elements"""
+        midi = pretty_midi.PrettyMIDI()
+        # Drum instrument
+        drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
+        # Detect strong beats and onsets
+        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
+        beat_times = librosa.frames_to_time(beats, sr=sr)
+        # Add drum hits on beats
+        for beat_time in beat_times:
+            # Kick drum on strong beats
+            note = pretty_midi.Note(
+                velocity=100,
+                pitch=36,  # Kick drum
+                start=beat_time,
+                end=beat_time + 0.1
+            )
+            drum_instrument.notes.append(note)
+        midi.instruments.append(drum_instrument)
+        return midi
+    def estimate_pitch(self, segment, sr):
+        """Estimate fundamental frequency from audio segment"""
+        try:
+            # Use autocorrelation for pitch detection
+            corr = np.correlate(segment, segment, mode='full')
+            corr = corr[len(corr)//2:]
+            # Find the first peak after zero lag (fundamental frequency)
+            d = np.diff(corr)
+            start = np.where(d > 0)[0]
+            if len(start) > 0:
+                start = start[0]
+                peak = np.argmax(corr[start:]) + start
+                freq = sr / peak if peak > 0 else 0
+                return freq
+        except:
+            pass
+        return 0
+    def humanize_midi(self, midi_data, intensity=0.7, style="organic"):
+        """Humanize the MIDI data"""
+        tempo = midi_data.estimate_tempo() if len(midi_data.instruments) > 0 else 120
+        for instrument in midi_data.instruments:
+            inst_type = "drums" if instrument.is_drum else "melody"
+            profile = self.groove_profiles[inst_type]
+            for note in instrument.notes:
+                # Humanize timing
+                timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
+                note.start = max(0, note.start + timing_shift)
+                # Humanize duration (except drums)
+                if not instrument.is_drum:
+                    duration_shift = np.random.normal(0, profile["timing_var"] * 0.3 * intensity)
+                    note.end = max(note.start + 0.1, note.end + duration_shift)
+                # Humanize velocity
+                vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
+                new_velocity = note.velocity + int(vel_shift * intensity)
+                note.velocity = max(20, min(127, new_velocity))
+        return midi_data
+def process_audio_files(files, intensity, style, conversion_method):
     if not files:
+        return None, None, "Please upload audio files (MP3, WAV, etc.)"
+    converter = MP3ToHumanized()
     processed_files = []
     for file in files:
+        try:
+            # Convert audio to MIDI
+            midi_data = converter.audio_to_midi(file.name, conversion_method)
+            # Humanize the MIDI
+            humanized_midi = converter.humanize_midi(midi_data, intensity, style)
+            # Save humanized MIDI
+            output_path = tempfile.mktemp(suffix='_humanized.mid')
+            humanized_midi.write(output_path)
+            processed_files.append(output_path)
+        except Exception as e:
+            return None, None, f"Error processing {file.name}: {str(e)}"
     if processed_files:
+        # Create audio preview from first file
+        preview_audio = None
+        try:
+            # Convert MIDI back to audio for preview
+            midi_data = pretty_midi.PrettyMIDI(processed_files[0])
+            audio_data = midi_data.synthesize()
+            preview_path = tempfile.mktemp(suffix='_preview.wav')
+            sf.write(preview_path, audio_data, 44100)
+            preview_audio = preview_path
+        except:
+            preview_audio = None
+        return processed_files, preview_audio, f"✅ Successfully processed {len(processed_files)} files!"
     else:
         return None, None, "❌ No files were processed successfully."
+# Create Gradio interface
+with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
     gr.Markdown("""
+    # 🎵 MP3 HumanizeBot
+    **Convert MP3/Audio to MIDI and remove AI traces to sound human-made!**
+    Upload audio files from AI music generators, convert to MIDI, and apply natural humanization.
     """)
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### 📁 Upload Audio Files")
             file_input = gr.File(
                 file_count="multiple",
+                file_types=[".mp3", ".wav", ".ogg", ".m4a", ".flac"],
+                label="Upload Audio Files",
                 type="filepath"
             )
+            conversion_method = gr.Radio(
+                ["basic", "melody", "rhythm"],
+                value="basic",
+                label="🎵 Conversion Method",
+                info="Basic = general purpose, Melody = focus on tunes, Rhythm = focus on beats"
+            )
             intensity = gr.Slider(
                 0.1, 1.0,
                 value=0.7,
+                label="🎚️ Humanization Intensity"
             )
             style = gr.Radio(
                 ["organic", "groovy", "gentle"],
                 value="organic",
+                label="🎸 Humanization Style"
             )
             process_btn = gr.Button(
+                "✨ Convert & Humanize!",
                 variant="primary",
                 size="lg"
             )
             )
             audio_output = gr.Audio(
+                label="MIDI Audio Preview",
                 interactive=False
             )
             status = gr.Textbox(
                 label="Status",
                 interactive=False,
+                max_lines=4
             )
+    with gr.Accordion("ℹ️ How It Works", open=False):
         gr.Markdown("""
+        **Process:**
+        1. **Upload** your AI-generated audio files (MP3, WAV, etc.)
+        2. **Convert** to MIDI using pitch and rhythm detection
+        3. **Humanize** with timing and velocity variations
+        4. **Download** humanized MIDI files
+        **Conversion Methods:**
+        - **Basic**: General purpose conversion for most music
+        - **Melody**: Focuses on extracting melodic content
+        - **Rhythm**: Focuses on drum patterns and beats
+        **Note**: Audio-to-MIDI conversion is challenging and works best with:
+        - Clear melodic lines
+        - Good audio quality
+        - Not too much reverb/effects
         """)
     process_btn.click(
+        fn=process_audio_files,
+        inputs=[file_input, intensity, style, conversion_method],
         outputs=[file_output, audio_output, status]
     )
 if __name__ == "__main__":
     demo.launch(debug=True)