Spaces:

FILMITO
/

HumanizeBot

Sleeping

App Files Files Community

FILMITO commited on Nov 23

Commit

3ce0be1

verified ·

1 Parent(s): ea6c345

Update app.py

Browse files

Files changed (1) hide show

app.py +233 -201

app.py CHANGED Viewed

@@ -7,290 +7,322 @@ import librosa
 import soundfile as sf
 from pathlib import Path
-class MP3ToBetterMusic:
     def __init__(self):
-        self.instrument_map = {
-            "melody": [
-                {"name": "Warm Synth", "program": 81},      # Lead synth
-                {"name": "Electric Guitar", "program": 27}, # Clean guitar
-                {"name": "Violin", "program": 40},          # Strings
-                {"name": "Saxophone", "program": 66},       # Sax
             ],
-            "chords": [
-                {"name": "Electric Piano", "program": 5},   # Rhodes
-                {"name": "Pad", "program": 89},            # Warm pad
-                {"name": "Strings", "program": 49},        # String ensemble
             ],
-            "bass": [
-                {"name": "Electric Bass", "program": 33},   # Bass guitar
-                {"name": "Synth Bass", "program": 39},      # Synth bass
             ],
-            "drums": [
-                {"name": "Drum Kit", "program": 0, "is_drum": True},
             ]
         }
-    def convert_to_wav(self, audio_path):
-        """Convert MP3 to WAV for processing"""
         try:
             y, sr = librosa.load(audio_path, sr=22050, mono=True)
-            wav_path = tempfile.mktemp(suffix='.wav')
-            sf.write(wav_path, y, sr)
-            return wav_path, sr
-        except Exception as e:
-            raise Exception(f"Audio conversion failed: {str(e)}")
-    def extract_melody_to_midi(self, audio_path, style="melodic"):
-        """Convert audio to MIDI with style-based extraction"""
-        try:
-            wav_path, sr = self.convert_to_wav(audio_path)
-            y, sr = librosa.load(wav_path, sr=sr)
             midi = pretty_midi.PrettyMIDI()
-            # Choose instruments based on style
-            if style == "melodic":
-                instruments = self.create_instruments(["melody", "chords", "bass"])
-            elif style == "electronic":
-                instruments = self.create_instruments(["synth", "bass", "drums"])
-            elif style == "acoustic":
-                instruments = self.create_instruments(["guitar", "bass", "drums"])
-            else:  # balanced
-                instruments = self.create_instruments(["melody", "chords", "bass", "drums"])
-            # Extract melody and chords
-            melody_notes, chord_notes = self.analyze_audio(y, sr, style)
-            # Assign notes to instruments
-            if melody_notes and instruments.get('melody'):
-                instruments['melody'].notes.extend(melody_notes[:20])  # Limit notes
-            if chord_notes and instruments.get('chords'):
-                instruments['chords'].notes.extend(chord_notes[:15])
-            # Add some basic rhythm if we have drums
-            if instruments.get('drums'):
-                self.add_basic_drums(instruments['drums'], y, sr)
-            # Add instruments to MIDI
-            for instrument in instruments.values():
-                if instrument.notes:
-                    midi.instruments.append(instrument)
-            return midi
         except Exception as e:
-            raise Exception(f"Music extraction failed: {str(e)}")
-    def create_instruments(self, types):
-        """Create instrument objects based on types"""
-        instruments = {}
-        for inst_type in types:
-            if inst_type in self.instrument_map:
-                # Pick a random instrument from the category
-                import random
-                inst_info = random.choice(self.instrument_map[inst_type])
-                instrument = pretty_midi.Instrument(
-                    program=inst_info['program'],
-                    is_drum=inst_info.get('is_drum', False),
-                    name=inst_info['name']
-                )
-                instruments[inst_type] = instrument
-        return instruments
-    def analyze_audio(self, y, sr, style):
-        """Extract melody and chords from audio"""
-        melody_notes = []
-        chord_notes = []
-        # Detect onsets (when notes happen)
-        onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, delta=0.1)
-        onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
-        # Simple pitch detection for each onset
-        for i, onset_time in enumerate(onset_times[:30]):  # Limit to 30 notes
-            start_idx = int(onset_time * sr)
-            end_idx = min(start_idx + int(0.4 * sr), len(y))
-            if end_idx > start_idx:
-                segment = y[start_idx:end_idx]
-                freq = self.detect_pitch(segment, sr)
-                if 100 < freq < 800:  # Reasonable pitch range
-                    midi_note = int(69 + 12 * np.log2(freq / 440.0))
-                    if 48 <= midi_note <= 84:  # Good MIDI range
-                        velocity = np.random.randint(70, 100)
-                        note = pretty_midi.Note(
-                            velocity=velocity,
-                            pitch=midi_note,
-                            start=onset_time,
-                            end=onset_time + 0.5
-                        )
-                        # Separate melody and chords
-                        if i % 3 == 0:  # Every 3rd note as melody
-                            melody_notes.append(note)
-                        else:  # Others as chords
-                            chord_notes.append(note)
-        return melody_notes, chord_notes
-    def detect_pitch(self, segment, sr):
-        """Simple pitch detection"""
-        try:
-            # Use FFT to find dominant frequency
-            fft = np.fft.rfft(segment * np.hanning(len(segment)))
-            freqs = np.fft.rfftfreq(len(segment), 1/sr)
-            mags = np.abs(fft)
-            # Find peak in reasonable frequency range
-            mask = (freqs > 80) & (freqs < 1000)
-            if np.any(mask):
-                peak_idx = np.argmax(mags[mask])
-                return freqs[mask][peak_idx]
-        except:
-            pass
-        return 440  # Default to A4
-    def add_basic_drums(self, drum_instrument, y, sr):
-        """Add simple drum pattern"""
-        try:
-            tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
-            beat_times = librosa.frames_to_time(beats, sr=sr)
-            # Add kick on strong beats, snare on off-beats
-            for i, beat_time in enumerate(beat_times[:16]):  # First 16 beats
-                # Kick drum on beats 1 and 3
-                if i % 4 in [0, 2]:
                     note = pretty_midi.Note(
-                        velocity=90, pitch=36, start=beat_time, end=beat_time + 0.2
                     )
-                    drum_instrument.notes.append(note)
-                # Snare on beats 2 and 4
-                if i % 4 in [1, 3]:
                     note = pretty_midi.Note(
-                        velocity=80, pitch=38, start=beat_time, end=beat_time + 0.2
                     )
-                    drum_instrument.notes.append(note)
-        except:
-            pass
-    def humanize_midi(self, midi_data, intensity=0.7):
-        """Add human feel to the MIDI"""
-        for instrument in midi_data.instruments:
             for note in instrument.notes:
                 # Humanize timing
-                timing_shift = np.random.normal(0, 0.01 * intensity)
                 note.start = max(0, note.start + timing_shift)
                 # Humanize velocity
-                vel_shift = np.random.randint(-10, 10)
-                note.velocity = max(40, min(127, note.velocity + vel_shift))
-        return midi_data
-def process_audio_to_music(files, style, intensity):
-    """Main processing function"""
     if not files:
-        return None, None, "Please upload audio files"
-    converter = MP3ToBetterMusic()
-    processed_files = []
     for file in files:
         try:
-            # Convert to MIDI with better sounds
-            midi_data = converter.extract_melody_to_midi(file.name, style)
-            # Humanize
-            humanized_midi = converter.humanize_midi(midi_data, intensity)
-            # Save MIDI
-            midi_path = tempfile.mktemp(suffix='_music.mid')
-            humanized_midi.write(midi_path)
-            # Create audio preview with better sounds
-            audio_path = tempfile.mktemp(suffix='_preview.wav')
-            audio_data = humanized_midi.synthesize()
-            sf.write(audio_path, audio_data, 44100)
-            processed_files.append((midi_path, audio_path))
         except Exception as e:
-            return None, None, f"Error: {str(e)}"
-    if processed_files:
-        # Return first file's audio and all MIDI files
-        midi_files = [f[0] for f in processed_files]
-        audio_preview = processed_files[0][1]
-        return midi_files, audio_preview, f"✅ Created {len(processed_files)} tracks with better sounds!"
     else:
-        return None, None, "❌ Processing failed"
-# Simple interface
-with gr.Blocks(theme=gr.themes.Soft(), title="MP3 to Better Music") as demo:
     gr.Markdown("""
-    # 🎵 MP3 to Better Music
-    **Convert your MP3 to MIDI with real instrument sounds - no extra software needed!**
-    Upload MP3 → Get back MIDI with guitars, synths, drums, etc.
     """)
     with gr.Row():
         with gr.Column():
             file_input = gr.File(
                 file_count="multiple",
                 file_types=[".mp3", ".wav", ".m4a"],
-                label="Upload your MP3 files"
             )
             style = gr.Radio(
-                ["melodic", "electronic", "acoustic", "balanced"],
-                value="balanced",
-                label="Music Style",
-                info="Choose the instrument sound you want"
             )
-            intensity = gr.Slider(0.1, 1.0, value=0.7, label="Human Feel")
-            process_btn = gr.Button("🎵 Create Better Music!", variant="primary")
         with gr.Column():
             file_output = gr.File(
                 file_count="multiple",
-                label="Download MIDI Files with Better Sounds"
             )
             audio_output = gr.Audio(
-                label="Music Preview (Hear the better sounds!)",
                 type="filepath"
             )
             status = gr.Textbox(label="Status")
-    with gr.Accordion("🎸 What Instruments You Get", open=True):
         gr.Markdown("""
-        **Instead of boring piano, you get:**
-        - 🎸 **Electric guitars & basses**
-        - 🎹 **Synths & electric pianos**
-        - 🥁 **Drum kits** (kick, snare, hi-hats)
-        - 🎻 **Strings & orchestral sounds**
-        - 🎷 **Saxophones & brass**
-        **Styles:**
-        - **Melodic**: Focus on leads and strings
-        - **Electronic**: Synths and electronic drums
-        - **Acoustic**: Guitars and real instruments
-        - **Balanced**: Mix of everything
         """)
     process_btn.click(
-        fn=process_audio_to_music,
         inputs=[file_input, style, intensity],
-        outputs=[file_output, audio_output, status]
     )
 if __name__ == "__main__":

 import soundfile as sf
 from pathlib import Path
+class MP3ToHumanizedMP3:
     def __init__(self):
+        self.instrument_sets = {
+            "pop": [
+                {"program": 0, "name": "Drums", "is_drum": True, "volume": 0.8},
+                {"program": 33, "name": "Bass Guitar", "is_drum": False, "volume": 0.7},
+                {"program": 25, "name": "Acoustic Guitar", "is_drum": False, "volume": 0.6},
+                {"program": 1, "name": "Piano", "is_drum": False, "volume": 0.5},
             ],
+            "electronic": [
+                {"program": 0, "name": "Drums", "is_drum": True, "volume": 0.9},
+                {"program": 39, "name": "Synth Bass", "is_drum": False, "volume": 0.8},
+                {"program": 81, "name": "Lead Synth", "is_drum": False, "volume": 0.7},
+                {"program": 89, "name": "Pad", "is_drum": False, "volume": 0.4},
             ],
+            "rock": [
+                {"program": 0, "name": "Drums", "is_drum": True, "volume": 0.9},
+                {"program": 33, "name": "Bass", "is_drum": False, "volume": 0.7},
+                {"program": 30, "name": "Distortion Guitar", "is_drum": False, "volume": 0.8},
+                {"program": 27, "name": "Clean Guitar", "is_drum": False, "volume": 0.6},
             ],
+            "cinematic": [
+                {"program": 0, "name": "Drums", "is_drum": True, "volume": 0.6},
+                {"program": 48, "name": "String Ensemble", "is_drum": False, "volume": 0.8},
+                {"program": 61, "name": "French Horn", "is_drum": False, "volume": 0.7},
+                {"program": 5, "name": "Electric Piano", "is_drum": False, "volume": 0.5},
             ]
         }
+    def create_full_song(self, audio_path, style="pop", intensity=0.7):
+        """Convert MP3 to a complete humanized song"""
         try:
+            # Load and analyze original audio
             y, sr = librosa.load(audio_path, sr=22050, mono=True)
+            # Create MIDI structure
             midi = pretty_midi.PrettyMIDI()
+            # Add instruments based on style
+            for inst_info in self.instrument_sets[style]:
+                instrument = pretty_midi.Instrument(
+                    program=inst_info["program"],
+                    is_drum=inst_info["is_drum"],
+                    name=inst_info["name"]
+                )
+                midi.instruments.append(instrument)
+            # Extract musical elements from audio
+            self.extract_music_to_instruments(midi, y, sr, style, intensity)
+            # Humanize the performance
+            self.humanize_performance(midi, intensity)
+            # Synthesize to audio
+            audio_data = midi.synthesize()
+            return audio_data, sr
         except Exception as e:
+            raise Exception(f"Song creation failed: {str(e)}")
+    def extract_music_to_instruments(self, midi, y, sr, style, intensity):
+        """Extract different musical parts and assign to instruments"""
+        # Get tempo and beats
+        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
+        beat_times = librosa.frames_to_time(beat_frames, sr=sr)
+        # Detect melody/pitch content
+        f0, voiced_flag, voiced_probs = librosa.pyin(
+            y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C6'), sr=sr
+        )
+        times = librosa.times_like(f0, sr=sr)
+        # Assign notes to instruments based on style
+        instruments = midi.instruments
+        # Add drum pattern
+        drum_instrument = next((inst for inst in instruments if inst.is_drum), None)
+        if drum_instrument and len(beat_times) > 0:
+            self.add_drum_pattern(drum_instrument, beat_times, style)
+        # Add bass line to bass instrument
+        bass_instrument = next((inst for inst in instruments if not inst.is_drum and 32 <= inst.program <= 39), None)
+        if bass_instrument and len(beat_times) > 0:
+            self.add_bass_line(bass_instrument, beat_times, f0, times, voiced_flag)
+        # Add melody to lead instrument
+        lead_instrument = next((inst for inst in instruments if not inst.is_drum and inst.program not in range(32, 40)), None)
+        if lead_instrument:
+            self.add_melody(lead_instrument, f0, times, voiced_flag, intensity)
+        # Add chords/pads to remaining instruments
+        other_instruments = [inst for inst in instruments if not inst.is_drum and inst != bass_instrument and inst != lead_instrument]
+        for inst in other_instruments:
+            self.add_harmony(inst, beat_times, f0, times, style)
+    def add_drum_pattern(self, drums, beat_times, style):
+        """Add style-appropriate drum pattern"""
+        for i, beat_time in enumerate(beat_times[:32]):  # First 32 beats
+            # Kick on strong beats
+            if i % 4 == 0:
+                note = pretty_midi.Note(
+                    velocity=90, pitch=36, start=beat_time, end=beat_time + 0.3
+                )
+                drums.notes.append(note)
+            # Snare on beats 2 and 4
+            if i % 4 in [2]:
+                note = pretty_midi.Note(
+                    velocity=80, pitch=38, start=beat_time, end=beat_time + 0.2
+                )
+                drums.notes.append(note)
+            # Hi-hats
+            if style in ["electronic", "pop"]:
+                note = pretty_midi.Note(
+                    velocity=70, pitch=42, start=beat_time, end=beat_time + 0.1
+                )
+                drums.notes.append(note)
+    def add_bass_line(self, bass, beat_times, f0, times, voiced_flag):
+        """Add simple bass line"""
+        if len(f0) == 0:
+            return
+        for i, beat_time in enumerate(beat_times[:16]):
+            if i % 2 == 0:  # Every other beat
+                # Find a pitch around this time
+                time_idx = min(int(beat_time * 100), len(f0) - 1)
+                if voiced_flag[time_idx] and not np.isnan(f0[time_idx]):
+                    midi_note = int(69 + 12 * np.log2(f0[time_idx] / 440.0))
+                    # Put in bass range
+                    bass_note = max(36, min(55, midi_note - 12))
                     note = pretty_midi.Note(
+                        velocity=80,
+                        pitch=bass_note,
+                        start=beat_time,
+                        end=beat_time + 0.8
                     )
+                    bass.notes.append(note)
+    def add_melody(self, lead, f0, times, voiced_flag, intensity):
+        """Extract and add melody"""
+        if len(f0) == 0:
+            return
+        note_start = None
+        current_pitch = None
+        for i, (time, freq, voiced) in enumerate(zip(times, f0, voiced_flag)):
+            if voiced and not np.isnan(freq):
+                midi_note = int(69 + 12 * np.log2(freq / 440.0))
+                if 60 <= midi_note <= 84:  # Good melody range
+                    if current_pitch != midi_note:
+                        if current_pitch is not None and note_start is not None:
+                            # End previous note
+                            note = pretty_midi.Note(
+                                velocity=np.random.randint(70, 90),
+                                pitch=current_pitch,
+                                start=note_start,
+                                end=time
+                            )
+                            lead.notes.append(note)
+                        # Start new note
+                        current_pitch = midi_note
+                        note_start = time
+            else:
+                if current_pitch is not None and note_start is not None:
+                    # End current note
+                    note = pretty_midi.Note(
+                        velocity=np.random.randint(70, 90),
+                        pitch=current_pitch,
+                        start=note_start,
+                        end=time
+                    )
+                    lead.notes.append(note)
+                    current_pitch = None
+                    note_start = None
+    def add_harmony(self, instrument, beat_times, f0, times, style):
+        """Add chordal harmony"""
+        for i, beat_time in enumerate(beat_times[:8]):
+            if i % 2 == 0:  # Every 2 beats
+                # Simple chord based on style
+                if style == "pop":
+                    chord_notes = [60, 64, 67]  # C Major
+                elif style == "electronic":
+                    chord_notes = [65, 69, 72]  # F Major
+                elif style == "rock":
+                    chord_notes = [59, 62, 65]  # B Minor
+                else:
+                    chord_notes = [60, 64, 67]  # C Major
+                for note_pitch in chord_notes:
                     note = pretty_midi.Note(
+                        velocity=60,
+                        pitch=note_pitch,
+                        start=beat_time,
+                        end=beat_time + 1.0
                     )
+                    instrument.notes.append(note)
+    def humanize_performance(self, midi, intensity):
+        """Add human feel to all instruments"""
+        for instrument in midi.instruments:
             for note in instrument.notes:
                 # Humanize timing
+                timing_shift = np.random.normal(0, 0.02 * intensity)
                 note.start = max(0, note.start + timing_shift)
                 # Humanize velocity
+                vel_shift = np.random.randint(-15, 15)
+                note.velocity = max(40, min(127, note.velocity + int(vel_shift * intensity)))
+                # Humanize duration (except drums)
+                if not instrument.is_drum:
+                    duration_shift = np.random.normal(0, 0.1 * intensity)
+                    note.end = max(note.start + 0.1, note.end + duration_shift)
+def process_to_mp3(files, style, intensity):
+    """Process audio files and return MP3 results"""
     if not files:
+        return None, "Please upload audio files"
+    converter = MP3ToHumanizedMP3()
+    output_files = []
     for file in files:
         try:
+            # Create full humanized song
+            audio_data, sr = converter.create_full_song(file.name, style, intensity)
+            # Save as MP3
+            mp3_path = tempfile.mktemp(suffix='_humanized.mp3')
+            sf.write(mp3_path, audio_data, sr)
+            output_files.append(mp3_path)
         except Exception as e:
+            return None, f"Error processing {file.name}: {str(e)}"
+    if output_files:
+        return output_files, f"✅ Created {len(output_files)} humanized songs!"
     else:
+        return None, "❌ No files were processed successfully"
+# Simple interface focused on MP3 output
+with gr.Blocks(theme=gr.themes.Soft(), title="MP3 Humanizer") as demo:
     gr.Markdown("""
+    # 🎵 MP3 Humanizer
+    **Convert your AI music to human-sounding MP3 songs!**
+    Upload MP3 → Get back humanized MP3 with full instrumentation
     """)
     with gr.Row():
         with gr.Column():
+            gr.Markdown("### 📁 Upload Your AI Music")
             file_input = gr.File(
                 file_count="multiple",
                 file_types=[".mp3", ".wav", ".m4a"],
+                label="Upload your AI-generated music"
             )
             style = gr.Radio(
+                ["pop", "electronic", "rock", "cinematic"],
+                value="pop",
+                label="🎵 Music Style",
+                info="Choose the style for your humanized song"
             )
+            intensity = gr.Slider(
+                0.1, 1.0, value=0.7,
+                label="🎛️ Humanization Intensity",
+                info="How much human feel to add"
+            )
+            process_btn = gr.Button("✨ Create Humanized MP3", variant="primary")
         with gr.Column():
+            gr.Markdown("### 📥 Download Humanized Songs")
             file_output = gr.File(
                 file_count="multiple",
+                label="Download Your Humanized MP3 Files"
             )
             audio_output = gr.Audio(
+                label="Preview Your Humanized Song",
                 type="filepath"
             )
             status = gr.Textbox(label="Status")
+    with gr.Accordion("🎸 What You'll Get", open=True):
         gr.Markdown("""
+        **Instead of just piano, you get full songs with:**
+        **Pop Style:** Drums, Bass Guitar, Acoustic Guitar, Piano
+        **Electronic:** Electronic Drums, Synth Bass, Lead Synth, Pads
+        **Rock:** Drums, Bass, Electric Guitars
+        **Cinematic:** Orchestral Drums, Strings, Horns, Piano
+        **Output:** Complete MP3 files ready to use!
         """)
     process_btn.click(
+        fn=process_to_mp3,
         inputs=[file_input, style, intensity],
+        outputs=[file_output, status]
+    ).then(
+        lambda files: files[0] if files else None,
+        inputs=[file_output],
+        outputs=[audio_output]
     )
 if __name__ == "__main__":