FILMITO commited on
Commit
0e91831
Β·
verified Β·
1 Parent(s): 802e2f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -216
app.py CHANGED
@@ -6,308 +6,292 @@ import os
6
  import librosa
7
  import soundfile as sf
8
  from pathlib import Path
9
- import subprocess
10
- import io
11
 
12
- class MP3ToHumanized:
13
  def __init__(self):
14
- self.groove_profiles = {
15
- "drums": {"timing_var": 0.02, "velocity_var": 15},
16
- "melody": {"timing_var": 0.01, "velocity_var": 10},
17
- "bass": {"timing_var": 0.015, "velocity_var": 12},
18
- "chords": {"timing_var": 0.008, "velocity_var": 8},
19
- "other": {"timing_var": 0.01, "velocity_var": 10}
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }
21
 
22
  def convert_to_wav(self, audio_path):
23
- """Convert any audio format to WAV using librosa"""
24
  try:
25
- # Load audio with librosa (handles MP3, WAV, etc.)
26
  y, sr = librosa.load(audio_path, sr=22050, mono=True)
27
-
28
- # Save as temporary WAV file
29
  wav_path = tempfile.mktemp(suffix='.wav')
30
  sf.write(wav_path, y, sr)
31
  return wav_path, sr
32
  except Exception as e:
33
  raise Exception(f"Audio conversion failed: {str(e)}")
34
 
35
- def simple_audio_to_midi(self, audio_path):
36
- """Simple but effective audio to MIDI conversion"""
37
  try:
38
- # Convert to WAV first
39
  wav_path, sr = self.convert_to_wav(audio_path)
40
-
41
- # Load the converted audio
42
  y, sr = librosa.load(wav_path, sr=sr)
43
 
44
- # Create MIDI object
45
  midi = pretty_midi.PrettyMIDI()
46
- instrument = pretty_midi.Instrument(program=0) # Acoustic Grand Piano
47
 
48
- # Method 1: Onset detection with pitch estimation
49
- onset_frames = librosa.onset.onset_detect(
50
- y=y, sr=sr,
51
- hop_length=512,
52
- backtrack=True,
53
- delta=0.2
54
- )
55
- onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
 
56
 
57
- # Get tempo for musical timing
58
- tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr, units='time')
59
 
60
- notes_added = 0
61
- for i, onset_time in enumerate(onset_times):
62
- if notes_added >= 50: # Limit notes to avoid clutter
63
- break
64
-
65
- # Extract a segment around the onset for pitch detection
66
- start_idx = int(onset_time * sr)
67
- end_idx = min(start_idx + int(0.3 * sr), len(y)) # 300ms window
68
-
69
- if end_idx > start_idx:
70
- segment = y[start_idx:end_idx]
71
-
72
- # Simple pitch detection using FFT
73
- frequencies, magnitudes = self.simple_pitch_detection(segment, sr)
74
-
75
- if len(frequencies) > 0:
76
- # Take the strongest frequency
77
- main_freq = frequencies[np.argmax(magnitudes)]
78
-
79
- if 80 < main_freq < 1000: # Reasonable frequency range
80
- midi_note = self.freq_to_midi(main_freq)
81
-
82
- if 48 <= midi_note <= 84: # C3 to C6 range
83
- # Create note
84
- note = pretty_midi.Note(
85
- velocity=np.random.randint(70, 100),
86
- pitch=midi_note,
87
- start=onset_time,
88
- end=onset_time + 0.4 # 400ms duration
89
- )
90
- instrument.notes.append(note)
91
- notes_added += 1
92
 
93
- # If we didn't get enough notes, add some rhythmic elements
94
- if notes_added < 10 and len(beat_frames) > 0:
95
- drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
96
- for beat_time in beat_frames[:8]: # First 8 beats
97
- note = pretty_midi.Note(
98
- velocity=80,
99
- pitch=36, # Kick drum
100
- start=beat_time,
101
- end=beat_time + 0.2
102
- )
103
- drum_instrument.notes.append(note)
104
- midi.instruments.append(drum_instrument)
105
 
106
- if len(instrument.notes) > 0:
107
- midi.instruments.append(instrument)
 
 
108
 
109
  return midi
110
 
111
  except Exception as e:
112
- raise Exception(f"MIDI conversion failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- def simple_pitch_detection(self, segment, sr):
115
- """Simple FFT-based pitch detection"""
116
- # Apply windowing
117
- window = np.hanning(len(segment))
118
- segment = segment * window
119
 
120
- # FFT
121
- fft = np.fft.rfft(segment)
122
- magnitudes = np.abs(fft)
123
- frequencies = np.fft.rfftfreq(len(segment), 1/sr)
124
 
125
- # Filter reasonable frequencies
126
- mask = (frequencies > 80) & (frequencies < 1000)
127
- return frequencies[mask], magnitudes[mask]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- def freq_to_midi(self, frequency):
130
- """Convert frequency to MIDI note number"""
131
- return int(69 + 12 * np.log2(frequency / 440.0))
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- def humanize_midi(self, midi_data, intensity=0.7):
134
- """Apply humanization to MIDI"""
135
- if len(midi_data.instruments) == 0:
136
- return midi_data
 
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  for instrument in midi_data.instruments:
139
- inst_type = "drums" if instrument.is_drum else "melody"
140
- profile = self.groove_profiles[inst_type]
141
-
142
  for note in instrument.notes:
143
  # Humanize timing
144
- timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
145
  note.start = max(0, note.start + timing_shift)
146
 
147
- # Humanize duration
148
- if not instrument.is_drum:
149
- duration_shift = np.random.normal(0, profile["timing_var"] * 0.5 * intensity)
150
- note.end = max(note.start + 0.1, note.end + duration_shift)
151
-
152
  # Humanize velocity
153
- vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
154
- new_velocity = note.velocity + int(vel_shift * intensity)
155
- note.velocity = max(40, min(127, new_velocity))
156
 
157
  return midi_data
158
 
159
- def process_audio_files(files, intensity):
 
160
  if not files:
161
- return None, None, "Please upload audio files (MP3, WAV, etc.)"
162
-
163
- # Show what files we received
164
- file_info = f"Received {len(files)} files: {[f.name for f in files]}"
165
- print(file_info)
166
 
167
- converter = MP3ToHumanized()
168
  processed_files = []
169
 
170
  for file in files:
171
  try:
172
- # Check file type
173
- file_ext = Path(file.name).suffix.lower()
174
- print(f"Processing {file.name} (extension: {file_ext})")
175
 
176
- # Convert audio to MIDI
177
- midi_data = converter.simple_audio_to_midi(file.name)
178
 
179
- if len(midi_data.instruments) == 0 or sum(len(instr.notes) for instr in midi_data.instruments) == 0:
180
- return None, None, f"❌ Could not extract musical content from {file.name}. Try a different audio file with clear melody."
 
181
 
182
- # Humanize the MIDI
183
- humanized_midi = converter.humanize_midi(midi_data, intensity)
 
 
184
 
185
- # Save humanized MIDI
186
- output_path = tempfile.mktemp(suffix='_humanized.mid')
187
- humanized_midi.write(output_path)
188
- processed_files.append(output_path)
189
 
190
  except Exception as e:
191
- error_msg = f"Error processing {file.name}: {str(e)}"
192
- print(error_msg)
193
- return None, None, error_msg
194
 
195
  if processed_files:
196
- # Create audio preview
197
- preview_audio = None
198
- try:
199
- midi_data = pretty_midi.PrettyMIDI(processed_files[0])
200
- audio_data = midi_data.synthesize()
201
- preview_path = tempfile.mktemp(suffix='_preview.wav')
202
- sf.write(preview_path, audio_data, 44100)
203
- preview_audio = preview_path
204
- except Exception as e:
205
- print(f"Preview generation failed: {e}")
206
- preview_audio = None
207
-
208
- success_msg = f"βœ… Successfully processed {len(processed_files)} files! Converted audio to MIDI and applied humanization."
209
- return processed_files, preview_audio, success_msg
210
  else:
211
- return None, None, "❌ No files were processed successfully."
212
 
213
- # Create Gradio interface
214
- with gr.Blocks(theme=gr.themes.Soft(), title="Audio HumanizeBot") as demo:
215
  gr.Markdown("""
216
- # 🎡 Audio HumanizeBot
217
- **Convert MP3/Audio to humanized MIDI - Remove AI traces from your music!**
218
 
219
- Upload audio files from AI music generators and get humanized MIDI back.
220
  """)
221
 
222
  with gr.Row():
223
- with gr.Column(scale=1):
224
- gr.Markdown("### πŸ“ Upload Audio Files")
225
-
226
  file_input = gr.File(
227
  file_count="multiple",
228
- file_types=[".mp3", ".wav", ".m4a", ".ogg", ".flac"],
229
- label="Upload your AI-generated audio files",
230
- type="filepath"
231
- )
232
-
233
- intensity = gr.Slider(
234
- 0.1, 1.0,
235
- value=0.7,
236
- label="🎚️ Humanization Intensity",
237
- info="How much human feel to add"
238
  )
239
 
240
- process_btn = gr.Button(
241
- "✨ Convert & Humanize Audio!",
242
- variant="primary",
243
- size="lg"
 
244
  )
245
 
246
- gr.Markdown("""
247
- **Supported formats:** MP3, WAV, M4A, OGG, FLAC
248
 
249
- **Works best with:**
250
- - Clear melodic content
251
- - AI-generated music
252
- - Not too much reverb/effects
253
- - 10-30 second clips
254
- """)
255
 
256
- with gr.Column(scale=1):
257
- gr.Markdown("### πŸ“₯ Download Results")
258
-
259
  file_output = gr.File(
260
  file_count="multiple",
261
- label="Download Humanized MIDI Files"
262
  )
263
 
264
  audio_output = gr.Audio(
265
- label="MIDI Audio Preview",
266
- interactive=False,
267
  type="filepath"
268
  )
269
 
270
- status = gr.Textbox(
271
- label="Status",
272
- interactive=False,
273
- max_lines=5
274
- )
275
 
276
- with gr.Accordion("🎯 Tips for Best Results", open=True):
277
  gr.Markdown("""
278
- **For best conversion:**
279
- - Use clear AI-generated music with obvious melodies
280
- - Avoid heavily processed/remixed tracks
281
- - 10-30 second clips work better than full songs
282
- - Instrumental music converts better than vocal-heavy tracks
283
-
284
- **What to expect:**
285
- - The MIDI will capture the main melodic and rhythmic ideas
286
- - You can import the MIDI into any DAW (FL Studio, Ableton, etc.)
287
- - Use high-quality instrument sounds in your DAW for best results
288
- - The humanization adds natural timing and velocity variations
289
 
290
- **Limitations:**
291
- - Complex arrangements may not convert perfectly
292
- - Audio-to-MIDI is an approximation
293
- - Very ambient or effect-heavy music may not work well
294
- """)
295
-
296
- # Examples section
297
- with gr.Accordion("🎡 Try These Example Files", open=False):
298
- gr.Markdown("""
299
- **Test with these types of audio:**
300
- - AI piano melodies
301
- - Simple electronic beats
302
- - Clear synth lines
303
- - Drum patterns from AI generators
304
  """)
305
 
306
  process_btn.click(
307
- fn=process_audio_files,
308
- inputs=[file_input, intensity],
309
  outputs=[file_output, audio_output, status]
310
  )
311
 
312
  if __name__ == "__main__":
313
- demo.launch(debug=True)
 
6
  import librosa
7
  import soundfile as sf
8
  from pathlib import Path
 
 
9
 
10
+ class MP3ToBetterMusic:
11
  def __init__(self):
12
+ self.instrument_map = {
13
+ "melody": [
14
+ {"name": "Warm Synth", "program": 81}, # Lead synth
15
+ {"name": "Electric Guitar", "program": 27}, # Clean guitar
16
+ {"name": "Violin", "program": 40}, # Strings
17
+ {"name": "Saxophone", "program": 66}, # Sax
18
+ ],
19
+ "chords": [
20
+ {"name": "Electric Piano", "program": 5}, # Rhodes
21
+ {"name": "Pad", "program": 89}, # Warm pad
22
+ {"name": "Strings", "program": 49}, # String ensemble
23
+ ],
24
+ "bass": [
25
+ {"name": "Electric Bass", "program": 33}, # Bass guitar
26
+ {"name": "Synth Bass", "program": 39}, # Synth bass
27
+ ],
28
+ "drums": [
29
+ {"name": "Drum Kit", "program": 0, "is_drum": True},
30
+ ]
31
  }
32
 
33
  def convert_to_wav(self, audio_path):
34
+ """Convert MP3 to WAV for processing"""
35
  try:
 
36
  y, sr = librosa.load(audio_path, sr=22050, mono=True)
 
 
37
  wav_path = tempfile.mktemp(suffix='.wav')
38
  sf.write(wav_path, y, sr)
39
  return wav_path, sr
40
  except Exception as e:
41
  raise Exception(f"Audio conversion failed: {str(e)}")
42
 
43
+ def extract_melody_to_midi(self, audio_path, style="melodic"):
44
+ """Convert audio to MIDI with style-based extraction"""
45
  try:
 
46
  wav_path, sr = self.convert_to_wav(audio_path)
 
 
47
  y, sr = librosa.load(wav_path, sr=sr)
48
 
 
49
  midi = pretty_midi.PrettyMIDI()
 
50
 
51
+ # Choose instruments based on style
52
+ if style == "melodic":
53
+ instruments = self.create_instruments(["melody", "chords", "bass"])
54
+ elif style == "electronic":
55
+ instruments = self.create_instruments(["synth", "bass", "drums"])
56
+ elif style == "acoustic":
57
+ instruments = self.create_instruments(["guitar", "bass", "drums"])
58
+ else: # balanced
59
+ instruments = self.create_instruments(["melody", "chords", "bass", "drums"])
60
 
61
+ # Extract melody and chords
62
+ melody_notes, chord_notes = self.analyze_audio(y, sr, style)
63
 
64
+ # Assign notes to instruments
65
+ if melody_notes and instruments.get('melody'):
66
+ instruments['melody'].notes.extend(melody_notes[:20]) # Limit notes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ if chord_notes and instruments.get('chords'):
69
+ instruments['chords'].notes.extend(chord_notes[:15])
70
+
71
+ # Add some basic rhythm if we have drums
72
+ if instruments.get('drums'):
73
+ self.add_basic_drums(instruments['drums'], y, sr)
 
 
 
 
 
 
74
 
75
+ # Add instruments to MIDI
76
+ for instrument in instruments.values():
77
+ if instrument.notes:
78
+ midi.instruments.append(instrument)
79
 
80
  return midi
81
 
82
  except Exception as e:
83
+ raise Exception(f"Music extraction failed: {str(e)}")
84
+
85
+ def create_instruments(self, types):
86
+ """Create instrument objects based on types"""
87
+ instruments = {}
88
+ for inst_type in types:
89
+ if inst_type in self.instrument_map:
90
+ # Pick a random instrument from the category
91
+ import random
92
+ inst_info = random.choice(self.instrument_map[inst_type])
93
+ instrument = pretty_midi.Instrument(
94
+ program=inst_info['program'],
95
+ is_drum=inst_info.get('is_drum', False),
96
+ name=inst_info['name']
97
+ )
98
+ instruments[inst_type] = instrument
99
+ return instruments
100
 
101
+ def analyze_audio(self, y, sr, style):
102
+ """Extract melody and chords from audio"""
103
+ melody_notes = []
104
+ chord_notes = []
 
105
 
106
+ # Detect onsets (when notes happen)
107
+ onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, delta=0.1)
108
+ onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
 
109
 
110
+ # Simple pitch detection for each onset
111
+ for i, onset_time in enumerate(onset_times[:30]): # Limit to 30 notes
112
+ start_idx = int(onset_time * sr)
113
+ end_idx = min(start_idx + int(0.4 * sr), len(y))
114
+
115
+ if end_idx > start_idx:
116
+ segment = y[start_idx:end_idx]
117
+ freq = self.detect_pitch(segment, sr)
118
+
119
+ if 100 < freq < 800: # Reasonable pitch range
120
+ midi_note = int(69 + 12 * np.log2(freq / 440.0))
121
+
122
+ if 48 <= midi_note <= 84: # Good MIDI range
123
+ velocity = np.random.randint(70, 100)
124
+
125
+ note = pretty_midi.Note(
126
+ velocity=velocity,
127
+ pitch=midi_note,
128
+ start=onset_time,
129
+ end=onset_time + 0.5
130
+ )
131
+
132
+ # Separate melody and chords
133
+ if i % 3 == 0: # Every 3rd note as melody
134
+ melody_notes.append(note)
135
+ else: # Others as chords
136
+ chord_notes.append(note)
137
+
138
+ return melody_notes, chord_notes
139
 
140
+ def detect_pitch(self, segment, sr):
141
+ """Simple pitch detection"""
142
+ try:
143
+ # Use FFT to find dominant frequency
144
+ fft = np.fft.rfft(segment * np.hanning(len(segment)))
145
+ freqs = np.fft.rfftfreq(len(segment), 1/sr)
146
+ mags = np.abs(fft)
147
+
148
+ # Find peak in reasonable frequency range
149
+ mask = (freqs > 80) & (freqs < 1000)
150
+ if np.any(mask):
151
+ peak_idx = np.argmax(mags[mask])
152
+ return freqs[mask][peak_idx]
153
+ except:
154
+ pass
155
+ return 440 # Default to A4
156
 
157
+ def add_basic_drums(self, drum_instrument, y, sr):
158
+ """Add simple drum pattern"""
159
+ try:
160
+ tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
161
+ beat_times = librosa.frames_to_time(beats, sr=sr)
162
 
163
+ # Add kick on strong beats, snare on off-beats
164
+ for i, beat_time in enumerate(beat_times[:16]): # First 16 beats
165
+ # Kick drum on beats 1 and 3
166
+ if i % 4 in [0, 2]:
167
+ note = pretty_midi.Note(
168
+ velocity=90, pitch=36, start=beat_time, end=beat_time + 0.2
169
+ )
170
+ drum_instrument.notes.append(note)
171
+
172
+ # Snare on beats 2 and 4
173
+ if i % 4 in [1, 3]:
174
+ note = pretty_midi.Note(
175
+ velocity=80, pitch=38, start=beat_time, end=beat_time + 0.2
176
+ )
177
+ drum_instrument.notes.append(note)
178
+ except:
179
+ pass
180
+
181
+ def humanize_midi(self, midi_data, intensity=0.7):
182
+ """Add human feel to the MIDI"""
183
  for instrument in midi_data.instruments:
 
 
 
184
  for note in instrument.notes:
185
  # Humanize timing
186
+ timing_shift = np.random.normal(0, 0.01 * intensity)
187
  note.start = max(0, note.start + timing_shift)
188
 
 
 
 
 
 
189
  # Humanize velocity
190
+ vel_shift = np.random.randint(-10, 10)
191
+ note.velocity = max(40, min(127, note.velocity + vel_shift))
 
192
 
193
  return midi_data
194
 
195
+ def process_audio_to_music(files, style, intensity):
196
+ """Main processing function"""
197
  if not files:
198
+ return None, None, "Please upload audio files"
 
 
 
 
199
 
200
+ converter = MP3ToBetterMusic()
201
  processed_files = []
202
 
203
  for file in files:
204
  try:
205
+ # Convert to MIDI with better sounds
206
+ midi_data = converter.extract_melody_to_midi(file.name, style)
 
207
 
208
+ # Humanize
209
+ humanized_midi = converter.humanize_midi(midi_data, intensity)
210
 
211
+ # Save MIDI
212
+ midi_path = tempfile.mktemp(suffix='_music.mid')
213
+ humanized_midi.write(midi_path)
214
 
215
+ # Create audio preview with better sounds
216
+ audio_path = tempfile.mktemp(suffix='_preview.wav')
217
+ audio_data = humanized_midi.synthesize()
218
+ sf.write(audio_path, audio_data, 44100)
219
 
220
+ processed_files.append((midi_path, audio_path))
 
 
 
221
 
222
  except Exception as e:
223
+ return None, None, f"Error: {str(e)}"
 
 
224
 
225
  if processed_files:
226
+ # Return first file's audio and all MIDI files
227
+ midi_files = [f[0] for f in processed_files]
228
+ audio_preview = processed_files[0][1]
229
+ return midi_files, audio_preview, f"βœ… Created {len(processed_files)} tracks with better sounds!"
 
 
 
 
 
 
 
 
 
 
230
  else:
231
+ return None, None, "❌ Processing failed"
232
 
233
+ # Simple interface
234
+ with gr.Blocks(theme=gr.themes.Soft(), title="MP3 to Better Music") as demo:
235
  gr.Markdown("""
236
+ # 🎡 MP3 to Better Music
237
+ **Convert your MP3 to MIDI with real instrument sounds - no extra software needed!**
238
 
239
+ Upload MP3 β†’ Get back MIDI with guitars, synths, drums, etc.
240
  """)
241
 
242
  with gr.Row():
243
+ with gr.Column():
 
 
244
  file_input = gr.File(
245
  file_count="multiple",
246
+ file_types=[".mp3", ".wav", ".m4a"],
247
+ label="Upload your MP3 files"
 
 
 
 
 
 
 
 
248
  )
249
 
250
+ style = gr.Radio(
251
+ ["melodic", "electronic", "acoustic", "balanced"],
252
+ value="balanced",
253
+ label="Music Style",
254
+ info="Choose the instrument sound you want"
255
  )
256
 
257
+ intensity = gr.Slider(0.1, 1.0, value=0.7, label="Human Feel")
 
258
 
259
+ process_btn = gr.Button("🎡 Create Better Music!", variant="primary")
 
 
 
 
 
260
 
261
+ with gr.Column():
 
 
262
  file_output = gr.File(
263
  file_count="multiple",
264
+ label="Download MIDI Files with Better Sounds"
265
  )
266
 
267
  audio_output = gr.Audio(
268
+ label="Music Preview (Hear the better sounds!)",
 
269
  type="filepath"
270
  )
271
 
272
+ status = gr.Textbox(label="Status")
 
 
 
 
273
 
274
+ with gr.Accordion("🎸 What Instruments You Get", open=True):
275
  gr.Markdown("""
276
+ **Instead of boring piano, you get:**
277
+ - 🎸 **Electric guitars & basses**
278
+ - 🎹 **Synths & electric pianos**
279
+ - πŸ₯ **Drum kits** (kick, snare, hi-hats)
280
+ - 🎻 **Strings & orchestral sounds**
281
+ - 🎷 **Saxophones & brass**
 
 
 
 
 
282
 
283
+ **Styles:**
284
+ - **Melodic**: Focus on leads and strings
285
+ - **Electronic**: Synths and electronic drums
286
+ - **Acoustic**: Guitars and real instruments
287
+ - **Balanced**: Mix of everything
 
 
 
 
 
 
 
 
 
288
  """)
289
 
290
  process_btn.click(
291
+ fn=process_audio_to_music,
292
+ inputs=[file_input, style, intensity],
293
  outputs=[file_output, audio_output, status]
294
  )
295
 
296
  if __name__ == "__main__":
297
+ demo.launch()