Spaces:

FILMITO
/

HumanizeBot

Sleeping

App Files Files Community

FILMITO commited on 22 days ago

Commit

0e91831

verified ·

1 Parent(s): 802e2f9

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -216

app.py CHANGED Viewed

@@ -6,308 +6,292 @@ import os
 import librosa
 import soundfile as sf
 from pathlib import Path
-import subprocess
-import io
-class MP3ToHumanized:
     def __init__(self):
-        self.groove_profiles = {
-            "drums": {"timing_var": 0.02, "velocity_var": 15},
-            "melody": {"timing_var": 0.01, "velocity_var": 10},
-            "bass": {"timing_var": 0.015, "velocity_var": 12},
-            "chords": {"timing_var": 0.008, "velocity_var": 8},
-            "other": {"timing_var": 0.01, "velocity_var": 10}
         }
     def convert_to_wav(self, audio_path):
-        """Convert any audio format to WAV using librosa"""
         try:
-            # Load audio with librosa (handles MP3, WAV, etc.)
             y, sr = librosa.load(audio_path, sr=22050, mono=True)
-            # Save as temporary WAV file
             wav_path = tempfile.mktemp(suffix='.wav')
             sf.write(wav_path, y, sr)
             return wav_path, sr
         except Exception as e:
             raise Exception(f"Audio conversion failed: {str(e)}")
-    def simple_audio_to_midi(self, audio_path):
-        """Simple but effective audio to MIDI conversion"""
         try:
-            # Convert to WAV first
             wav_path, sr = self.convert_to_wav(audio_path)
-            # Load the converted audio
             y, sr = librosa.load(wav_path, sr=sr)
-            # Create MIDI object
             midi = pretty_midi.PrettyMIDI()
-            instrument = pretty_midi.Instrument(program=0)  # Acoustic Grand Piano
-            # Method 1: Onset detection with pitch estimation
-            onset_frames = librosa.onset.onset_detect(
-                y=y, sr=sr,
-                hop_length=512,
-                backtrack=True,
-                delta=0.2
-            )
-            onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
-            # Get tempo for musical timing
-            tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr, units='time')
-            notes_added = 0
-            for i, onset_time in enumerate(onset_times):
-                if notes_added >= 50:  # Limit notes to avoid clutter
-                    break
-                # Extract a segment around the onset for pitch detection
-                start_idx = int(onset_time * sr)
-                end_idx = min(start_idx + int(0.3 * sr), len(y))  # 300ms window
-                if end_idx > start_idx:
-                    segment = y[start_idx:end_idx]
-                    # Simple pitch detection using FFT
-                    frequencies, magnitudes = self.simple_pitch_detection(segment, sr)
-                    if len(frequencies) > 0:
-                        # Take the strongest frequency
-                        main_freq = frequencies[np.argmax(magnitudes)]
-                        if 80 < main_freq < 1000:  # Reasonable frequency range
-                            midi_note = self.freq_to_midi(main_freq)
-                            if 48 <= midi_note <= 84:  # C3 to C6 range
-                                # Create note
-                                note = pretty_midi.Note(
-                                    velocity=np.random.randint(70, 100),
-                                    pitch=midi_note,
-                                    start=onset_time,
-                                    end=onset_time + 0.4  # 400ms duration
-                                )
-                                instrument.notes.append(note)
-                                notes_added += 1
-            # If we didn't get enough notes, add some rhythmic elements
-            if notes_added < 10 and len(beat_frames) > 0:
-                drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
-                for beat_time in beat_frames[:8]:  # First 8 beats
-                    note = pretty_midi.Note(
-                        velocity=80,
-                        pitch=36,  # Kick drum
-                        start=beat_time,
-                        end=beat_time + 0.2
-                    )
-                    drum_instrument.notes.append(note)
-                midi.instruments.append(drum_instrument)
-            if len(instrument.notes) > 0:
-                midi.instruments.append(instrument)
             return midi
         except Exception as e:
-            raise Exception(f"MIDI conversion failed: {str(e)}")
-    def simple_pitch_detection(self, segment, sr):
-        """Simple FFT-based pitch detection"""
-        # Apply windowing
-        window = np.hanning(len(segment))
-        segment = segment * window
-        # FFT
-        fft = np.fft.rfft(segment)
-        magnitudes = np.abs(fft)
-        frequencies = np.fft.rfftfreq(len(segment), 1/sr)
-        # Filter reasonable frequencies
-        mask = (frequencies > 80) & (frequencies < 1000)
-        return frequencies[mask], magnitudes[mask]
-    def freq_to_midi(self, frequency):
-        """Convert frequency to MIDI note number"""
-        return int(69 + 12 * np.log2(frequency / 440.0))
-    def humanize_midi(self, midi_data, intensity=0.7):
-        """Apply humanization to MIDI"""
-        if len(midi_data.instruments) == 0:
-            return midi_data
         for instrument in midi_data.instruments:
-            inst_type = "drums" if instrument.is_drum else "melody"
-            profile = self.groove_profiles[inst_type]
             for note in instrument.notes:
                 # Humanize timing
-                timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
                 note.start = max(0, note.start + timing_shift)
-                # Humanize duration
-                if not instrument.is_drum:
-                    duration_shift = np.random.normal(0, profile["timing_var"] * 0.5 * intensity)
-                    note.end = max(note.start + 0.1, note.end + duration_shift)
                 # Humanize velocity
-                vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
-                new_velocity = note.velocity + int(vel_shift * intensity)
-                note.velocity = max(40, min(127, new_velocity))
         return midi_data
-def process_audio_files(files, intensity):
     if not files:
-        return None, None, "Please upload audio files (MP3, WAV, etc.)"
-    # Show what files we received
-    file_info = f"Received {len(files)} files: {[f.name for f in files]}"
-    print(file_info)
-    converter = MP3ToHumanized()
     processed_files = []
     for file in files:
         try:
-            # Check file type
-            file_ext = Path(file.name).suffix.lower()
-            print(f"Processing {file.name} (extension: {file_ext})")
-            # Convert audio to MIDI
-            midi_data = converter.simple_audio_to_midi(file.name)
-            if len(midi_data.instruments) == 0 or sum(len(instr.notes) for instr in midi_data.instruments) == 0:
-                return None, None, f"❌ Could not extract musical content from {file.name}. Try a different audio file with clear melody."
-            # Humanize the MIDI
-            humanized_midi = converter.humanize_midi(midi_data, intensity)
-            # Save humanized MIDI
-            output_path = tempfile.mktemp(suffix='_humanized.mid')
-            humanized_midi.write(output_path)
-            processed_files.append(output_path)
         except Exception as e:
-            error_msg = f"Error processing {file.name}: {str(e)}"
-            print(error_msg)
-            return None, None, error_msg
     if processed_files:
-        # Create audio preview
-        preview_audio = None
-        try:
-            midi_data = pretty_midi.PrettyMIDI(processed_files[0])
-            audio_data = midi_data.synthesize()
-            preview_path = tempfile.mktemp(suffix='_preview.wav')
-            sf.write(preview_path, audio_data, 44100)
-            preview_audio = preview_path
-        except Exception as e:
-            print(f"Preview generation failed: {e}")
-            preview_audio = None
-        success_msg = f"✅ Successfully processed {len(processed_files)} files! Converted audio to MIDI and applied humanization."
-        return processed_files, preview_audio, success_msg
     else:
-        return None, None, "❌ No files were processed successfully."
-# Create Gradio interface
-with gr.Blocks(theme=gr.themes.Soft(), title="Audio HumanizeBot") as demo:
     gr.Markdown("""
-    # 🎵 Audio HumanizeBot
-    **Convert MP3/Audio to humanized MIDI - Remove AI traces from your music!**
-    Upload audio files from AI music generators and get humanized MIDI back.
     """)
     with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 📁 Upload Audio Files")
             file_input = gr.File(
                 file_count="multiple",
-                file_types=[".mp3", ".wav", ".m4a", ".ogg", ".flac"],
-                label="Upload your AI-generated audio files",
-                type="filepath"
-            )
-            intensity = gr.Slider(
-                0.1, 1.0,
-                value=0.7,
-                label="🎚️ Humanization Intensity",
-                info="How much human feel to add"
             )
-            process_btn = gr.Button(
-                "✨ Convert & Humanize Audio!",
-                variant="primary",
-                size="lg"
             )
-            gr.Markdown("""
-            **Supported formats:** MP3, WAV, M4A, OGG, FLAC
-            **Works best with:**
-            - Clear melodic content
-            - AI-generated music
-            - Not too much reverb/effects
-            - 10-30 second clips
-            """)
-        with gr.Column(scale=1):
-            gr.Markdown("### 📥 Download Results")
             file_output = gr.File(
                 file_count="multiple",
-                label="Download Humanized MIDI Files"
             )
             audio_output = gr.Audio(
-                label="MIDI Audio Preview",
-                interactive=False,
                 type="filepath"
             )
-            status = gr.Textbox(
-                label="Status",
-                interactive=False,
-                max_lines=5
-            )
-    with gr.Accordion("🎯 Tips for Best Results", open=True):
         gr.Markdown("""
-        **For best conversion:**
-        - Use clear AI-generated music with obvious melodies
-        - Avoid heavily processed/remixed tracks
-        - 10-30 second clips work better than full songs
-        - Instrumental music converts better than vocal-heavy tracks
-        **What to expect:**
-        - The MIDI will capture the main melodic and rhythmic ideas
-        - You can import the MIDI into any DAW (FL Studio, Ableton, etc.)
-        - Use high-quality instrument sounds in your DAW for best results
-        - The humanization adds natural timing and velocity variations
-        **Limitations:**
-        - Complex arrangements may not convert perfectly
-        - Audio-to-MIDI is an approximation
-        - Very ambient or effect-heavy music may not work well
-        """)
-    # Examples section
-    with gr.Accordion("🎵 Try These Example Files", open=False):
-        gr.Markdown("""
-        **Test with these types of audio:**
-        - AI piano melodies
-        - Simple electronic beats
-        - Clear synth lines
-        - Drum patterns from AI generators
         """)
     process_btn.click(
-        fn=process_audio_files,
-        inputs=[file_input, intensity],
         outputs=[file_output, audio_output, status]
     )
 if __name__ == "__main__":
-    demo.launch(debug=True)

 import librosa
 import soundfile as sf
 from pathlib import Path
+class MP3ToBetterMusic:
     def __init__(self):
+        self.instrument_map = {
+            "melody": [
+                {"name": "Warm Synth", "program": 81},      # Lead synth
+                {"name": "Electric Guitar", "program": 27}, # Clean guitar
+                {"name": "Violin", "program": 40},          # Strings
+                {"name": "Saxophone", "program": 66},       # Sax
+            ],
+            "chords": [
+                {"name": "Electric Piano", "program": 5},   # Rhodes
+                {"name": "Pad", "program": 89},            # Warm pad
+                {"name": "Strings", "program": 49},        # String ensemble
+            ],
+            "bass": [
+                {"name": "Electric Bass", "program": 33},   # Bass guitar
+                {"name": "Synth Bass", "program": 39},      # Synth bass
+            ],
+            "drums": [
+                {"name": "Drum Kit", "program": 0, "is_drum": True},
+            ]
         }
     def convert_to_wav(self, audio_path):
+        """Convert MP3 to WAV for processing"""
         try:
             y, sr = librosa.load(audio_path, sr=22050, mono=True)
             wav_path = tempfile.mktemp(suffix='.wav')
             sf.write(wav_path, y, sr)
             return wav_path, sr
         except Exception as e:
             raise Exception(f"Audio conversion failed: {str(e)}")
+    def extract_melody_to_midi(self, audio_path, style="melodic"):
+        """Convert audio to MIDI with style-based extraction"""
         try:
             wav_path, sr = self.convert_to_wav(audio_path)
             y, sr = librosa.load(wav_path, sr=sr)
             midi = pretty_midi.PrettyMIDI()
+            # Choose instruments based on style
+            if style == "melodic":
+                instruments = self.create_instruments(["melody", "chords", "bass"])
+            elif style == "electronic":
+                instruments = self.create_instruments(["synth", "bass", "drums"])
+            elif style == "acoustic":
+                instruments = self.create_instruments(["guitar", "bass", "drums"])
+            else:  # balanced
+                instruments = self.create_instruments(["melody", "chords", "bass", "drums"])
+            # Extract melody and chords
+            melody_notes, chord_notes = self.analyze_audio(y, sr, style)
+            # Assign notes to instruments
+            if melody_notes and instruments.get('melody'):
+                instruments['melody'].notes.extend(melody_notes[:20])  # Limit notes
+            if chord_notes and instruments.get('chords'):
+                instruments['chords'].notes.extend(chord_notes[:15])
+            # Add some basic rhythm if we have drums
+            if instruments.get('drums'):
+                self.add_basic_drums(instruments['drums'], y, sr)
+            # Add instruments to MIDI
+            for instrument in instruments.values():
+                if instrument.notes:
+                    midi.instruments.append(instrument)
             return midi
         except Exception as e:
+            raise Exception(f"Music extraction failed: {str(e)}")
+    def create_instruments(self, types):
+        """Create instrument objects based on types"""
+        instruments = {}
+        for inst_type in types:
+            if inst_type in self.instrument_map:
+                # Pick a random instrument from the category
+                import random
+                inst_info = random.choice(self.instrument_map[inst_type])
+                instrument = pretty_midi.Instrument(
+                    program=inst_info['program'],
+                    is_drum=inst_info.get('is_drum', False),
+                    name=inst_info['name']
+                )
+                instruments[inst_type] = instrument
+        return instruments
+    def analyze_audio(self, y, sr, style):
+        """Extract melody and chords from audio"""
+        melody_notes = []
+        chord_notes = []
+        # Detect onsets (when notes happen)
+        onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, delta=0.1)
+        onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
+        # Simple pitch detection for each onset
+        for i, onset_time in enumerate(onset_times[:30]):  # Limit to 30 notes
+            start_idx = int(onset_time * sr)
+            end_idx = min(start_idx + int(0.4 * sr), len(y))
+            if end_idx > start_idx:
+                segment = y[start_idx:end_idx]
+                freq = self.detect_pitch(segment, sr)
+                if 100 < freq < 800:  # Reasonable pitch range
+                    midi_note = int(69 + 12 * np.log2(freq / 440.0))
+                    if 48 <= midi_note <= 84:  # Good MIDI range
+                        velocity = np.random.randint(70, 100)
+                        note = pretty_midi.Note(
+                            velocity=velocity,
+                            pitch=midi_note,
+                            start=onset_time,
+                            end=onset_time + 0.5
+                        )
+                        # Separate melody and chords
+                        if i % 3 == 0:  # Every 3rd note as melody
+                            melody_notes.append(note)
+                        else:  # Others as chords
+                            chord_notes.append(note)
+        return melody_notes, chord_notes
+    def detect_pitch(self, segment, sr):
+        """Simple pitch detection"""
+        try:
+            # Use FFT to find dominant frequency
+            fft = np.fft.rfft(segment * np.hanning(len(segment)))
+            freqs = np.fft.rfftfreq(len(segment), 1/sr)
+            mags = np.abs(fft)
+            # Find peak in reasonable frequency range
+            mask = (freqs > 80) & (freqs < 1000)
+            if np.any(mask):
+                peak_idx = np.argmax(mags[mask])
+                return freqs[mask][peak_idx]
+        except:
+            pass
+        return 440  # Default to A4
+    def add_basic_drums(self, drum_instrument, y, sr):
+        """Add simple drum pattern"""
+        try:
+            tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
+            beat_times = librosa.frames_to_time(beats, sr=sr)
+            # Add kick on strong beats, snare on off-beats
+            for i, beat_time in enumerate(beat_times[:16]):  # First 16 beats
+                # Kick drum on beats 1 and 3
+                if i % 4 in [0, 2]:
+                    note = pretty_midi.Note(
+                        velocity=90, pitch=36, start=beat_time, end=beat_time + 0.2
+                    )
+                    drum_instrument.notes.append(note)
+                # Snare on beats 2 and 4
+                if i % 4 in [1, 3]:
+                    note = pretty_midi.Note(
+                        velocity=80, pitch=38, start=beat_time, end=beat_time + 0.2
+                    )
+                    drum_instrument.notes.append(note)
+        except:
+            pass
+    def humanize_midi(self, midi_data, intensity=0.7):
+        """Add human feel to the MIDI"""
         for instrument in midi_data.instruments:
             for note in instrument.notes:
                 # Humanize timing
+                timing_shift = np.random.normal(0, 0.01 * intensity)
                 note.start = max(0, note.start + timing_shift)
                 # Humanize velocity
+                vel_shift = np.random.randint(-10, 10)
+                note.velocity = max(40, min(127, note.velocity + vel_shift))
         return midi_data
+def process_audio_to_music(files, style, intensity):
+    """Main processing function"""
     if not files:
+        return None, None, "Please upload audio files"
+    converter = MP3ToBetterMusic()
     processed_files = []
     for file in files:
         try:
+            # Convert to MIDI with better sounds
+            midi_data = converter.extract_melody_to_midi(file.name, style)
+            # Humanize
+            humanized_midi = converter.humanize_midi(midi_data, intensity)
+            # Save MIDI
+            midi_path = tempfile.mktemp(suffix='_music.mid')
+            humanized_midi.write(midi_path)
+            # Create audio preview with better sounds
+            audio_path = tempfile.mktemp(suffix='_preview.wav')
+            audio_data = humanized_midi.synthesize()
+            sf.write(audio_path, audio_data, 44100)
+            processed_files.append((midi_path, audio_path))
         except Exception as e:
+            return None, None, f"Error: {str(e)}"
     if processed_files:
+        # Return first file's audio and all MIDI files
+        midi_files = [f[0] for f in processed_files]
+        audio_preview = processed_files[0][1]
+        return midi_files, audio_preview, f"✅ Created {len(processed_files)} tracks with better sounds!"
     else:
+        return None, None, "❌ Processing failed"
+# Simple interface
+with gr.Blocks(theme=gr.themes.Soft(), title="MP3 to Better Music") as demo:
     gr.Markdown("""
+    # 🎵 MP3 to Better Music
+    **Convert your MP3 to MIDI with real instrument sounds - no extra software needed!**
+    Upload MP3 → Get back MIDI with guitars, synths, drums, etc.
     """)
     with gr.Row():
+        with gr.Column():
             file_input = gr.File(
                 file_count="multiple",
+                file_types=[".mp3", ".wav", ".m4a"],
+                label="Upload your MP3 files"
             )
+            style = gr.Radio(
+                ["melodic", "electronic", "acoustic", "balanced"],
+                value="balanced",
+                label="Music Style",
+                info="Choose the instrument sound you want"
             )
+            intensity = gr.Slider(0.1, 1.0, value=0.7, label="Human Feel")
+            process_btn = gr.Button("🎵 Create Better Music!", variant="primary")
+        with gr.Column():
             file_output = gr.File(
                 file_count="multiple",
+                label="Download MIDI Files with Better Sounds"
             )
             audio_output = gr.Audio(
+                label="Music Preview (Hear the better sounds!)",
                 type="filepath"
             )
+            status = gr.Textbox(label="Status")
+    with gr.Accordion("🎸 What Instruments You Get", open=True):
         gr.Markdown("""
+        **Instead of boring piano, you get:**
+        - 🎸 **Electric guitars & basses**
+        - 🎹 **Synths & electric pianos**
+        - 🥁 **Drum kits** (kick, snare, hi-hats)
+        - 🎻 **Strings & orchestral sounds**
+        - 🎷 **Saxophones & brass**
+        **Styles:**
+        - **Melodic**: Focus on leads and strings
+        - **Electronic**: Synths and electronic drums
+        - **Acoustic**: Guitars and real instruments
+        - **Balanced**: Mix of everything
         """)
     process_btn.click(
+        fn=process_audio_to_music,
+        inputs=[file_input, style, intensity],
         outputs=[file_output, audio_output, status]
     )
 if __name__ == "__main__":
+    demo.launch()