FILMITO commited on
Commit
71c15a0
Β·
verified Β·
1 Parent(s): b7dc0ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -200
app.py CHANGED
@@ -4,180 +4,137 @@ import numpy as np
4
  import tempfile
5
  import os
6
  import librosa
7
- import torch
8
- import torchaudio
9
- from pathlib import Path
10
  import soundfile as sf
 
 
11
  import io
12
 
13
  class MP3ToHumanized:
14
  def __init__(self):
15
  self.groove_profiles = {
16
- "drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1},
17
- "melody": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05},
18
- "bass": {"timing_var": 0.015, "velocity_var": 12, "swing_factor": 0.07},
19
- "chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03},
20
- "other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}
21
  }
22
-
23
- def audio_to_midi(self, audio_path, conversion_method="basic"):
24
- """Convert audio file to MIDI using different methods"""
25
  try:
26
- # Load audio file
27
- y, sr = librosa.load(audio_path, sr=22050)
28
 
29
- if conversion_method == "basic":
30
- return self.basic_audio_to_midi(y, sr)
31
- elif conversion_method == "melody":
32
- return self.melody_extraction_to_midi(y, sr)
33
- else:
34
- return self.rhythm_based_midi(y, sr)
35
-
36
  except Exception as e:
37
- raise Exception(f"Audio to MIDI conversion failed: {str(e)}")
38
 
39
- def basic_audio_to_midi(self, y, sr):
40
- """Basic onset detection and pitch estimation"""
41
- # Create a pretty_midi object
42
- midi = pretty_midi.PrettyMIDI()
43
-
44
- # Create instrument
45
- piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
46
- instrument = pretty_midi.Instrument(program=piano_program)
47
-
48
- # Detect onsets (when notes start)
49
- onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, backtrack=True)
50
- onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
51
-
52
- # Estimate pitch for each onset
53
- for onset_time in onset_times:
54
- # Extract a segment around the onset
55
- start_sample = int(onset_time * sr)
56
- end_sample = start_sample + int(0.5 * sr) # 500ms segment
57
 
58
- if end_sample < len(y):
59
- segment = y[start_sample:end_sample]
60
-
61
- # Estimate fundamental frequency
62
- f0 = self.estimate_pitch(segment, sr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- if f0 > 0:
65
- # Convert frequency to MIDI note number
66
- midi_note = int(69 + 12 * np.log2(f0 / 440.0))
67
 
68
- # Only add if it's a valid MIDI note
69
- if 0 <= midi_note <= 127:
70
- # Create note
71
- note = pretty_midi.Note(
72
- velocity=np.random.randint(60, 100),
73
- pitch=midi_note,
74
- start=onset_time,
75
- end=onset_time + 0.5 # 500ms duration
76
- )
77
- instrument.notes.append(note)
78
-
79
- midi.instruments.append(instrument)
80
- return midi
81
-
82
- def melody_extraction_to_midi(self, y, sr):
83
- """Extract melody and convert to MIDI"""
84
- midi = pretty_midi.PrettyMIDI()
85
- instrument = pretty_midi.Instrument(program=0) # Piano
86
-
87
- # Use librosa's melody extraction
88
- f0, voiced_flag, voiced_probs = librosa.pyin(
89
- y,
90
- fmin=librosa.note_to_hz('C2'),
91
- fmax=librosa.note_to_hz('C7'),
92
- sr=sr
93
- )
94
-
95
- times = librosa.times_like(f0, sr=sr, hop_length=512)
96
-
97
- current_note = None
98
- note_start = 0
99
-
100
- for time, freq, voiced in zip(times, f0, voiced_flag):
101
- if voiced and not np.isnan(freq):
102
- midi_note = int(69 + 12 * np.log2(freq / 440.0))
103
- if 0 <= midi_note <= 127:
104
- if current_note != midi_note:
105
- if current_note is not None:
106
- # End previous note
107
- note = pretty_midi.Note(
108
- velocity=80,
109
- pitch=current_note,
110
- start=note_start,
111
- end=time
112
- )
113
- instrument.notes.append(note)
114
 
115
- # Start new note
116
- current_note = midi_note
117
- note_start = time
118
- else:
119
- if current_note is not None:
120
- # End current note
 
 
 
 
 
 
 
 
 
 
 
 
121
  note = pretty_midi.Note(
122
  velocity=80,
123
- pitch=current_note,
124
- start=note_start,
125
- end=time
126
  )
127
- instrument.notes.append(note)
128
- current_note = None
129
-
130
- midi.instruments.append(instrument)
131
- return midi
 
 
 
 
 
132
 
133
- def rhythm_based_midi(self, y, sr):
134
- """Create rhythm-based MIDI from percussive elements"""
135
- midi = pretty_midi.PrettyMIDI()
136
-
137
- # Drum instrument
138
- drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
139
 
140
- # Detect strong beats and onsets
141
- tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
142
- beat_times = librosa.frames_to_time(beats, sr=sr)
 
143
 
144
- # Add drum hits on beats
145
- for beat_time in beat_times:
146
- # Kick drum on strong beats
147
- note = pretty_midi.Note(
148
- velocity=100,
149
- pitch=36, # Kick drum
150
- start=beat_time,
151
- end=beat_time + 0.1
152
- )
153
- drum_instrument.notes.append(note)
154
-
155
- midi.instruments.append(drum_instrument)
156
- return midi
157
 
158
- def estimate_pitch(self, segment, sr):
159
- """Estimate fundamental frequency from audio segment"""
160
- try:
161
- # Use autocorrelation for pitch detection
162
- corr = np.correlate(segment, segment, mode='full')
163
- corr = corr[len(corr)//2:]
164
-
165
- # Find the first peak after zero lag (fundamental frequency)
166
- d = np.diff(corr)
167
- start = np.where(d > 0)[0]
168
- if len(start) > 0:
169
- start = start[0]
170
- peak = np.argmax(corr[start:]) + start
171
- freq = sr / peak if peak > 0 else 0
172
- return freq
173
- except:
174
- pass
175
- return 0
176
 
177
- def humanize_midi(self, midi_data, intensity=0.7, style="organic"):
178
- """Humanize the MIDI data"""
179
- tempo = midi_data.estimate_tempo() if len(midi_data.instruments) > 0 else 120
180
-
 
181
  for instrument in midi_data.instruments:
182
  inst_type = "drums" if instrument.is_drum else "melody"
183
  profile = self.groove_profiles[inst_type]
@@ -187,32 +144,43 @@ class MP3ToHumanized:
187
  timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
188
  note.start = max(0, note.start + timing_shift)
189
 
190
- # Humanize duration (except drums)
191
  if not instrument.is_drum:
192
- duration_shift = np.random.normal(0, profile["timing_var"] * 0.3 * intensity)
193
  note.end = max(note.start + 0.1, note.end + duration_shift)
194
 
195
  # Humanize velocity
196
  vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
197
  new_velocity = note.velocity + int(vel_shift * intensity)
198
- note.velocity = max(20, min(127, new_velocity))
199
 
200
  return midi_data
201
 
202
- def process_audio_files(files, intensity, style, conversion_method):
203
  if not files:
204
  return None, None, "Please upload audio files (MP3, WAV, etc.)"
205
 
 
 
 
 
206
  converter = MP3ToHumanized()
207
  processed_files = []
208
 
209
  for file in files:
210
  try:
 
 
 
 
211
  # Convert audio to MIDI
212
- midi_data = converter.audio_to_midi(file.name, conversion_method)
 
 
 
213
 
214
  # Humanize the MIDI
215
- humanized_midi = converter.humanize_midi(midi_data, intensity, style)
216
 
217
  # Save humanized MIDI
218
  output_path = tempfile.mktemp(suffix='_humanized.mid')
@@ -220,32 +188,35 @@ def process_audio_files(files, intensity, style, conversion_method):
220
  processed_files.append(output_path)
221
 
222
  except Exception as e:
223
- return None, None, f"Error processing {file.name}: {str(e)}"
 
 
224
 
225
  if processed_files:
226
- # Create audio preview from first file
227
  preview_audio = None
228
  try:
229
- # Convert MIDI back to audio for preview
230
  midi_data = pretty_midi.PrettyMIDI(processed_files[0])
231
  audio_data = midi_data.synthesize()
232
  preview_path = tempfile.mktemp(suffix='_preview.wav')
233
  sf.write(preview_path, audio_data, 44100)
234
  preview_audio = preview_path
235
- except:
 
236
  preview_audio = None
237
 
238
- return processed_files, preview_audio, f"βœ… Successfully processed {len(processed_files)} files!"
 
239
  else:
240
  return None, None, "❌ No files were processed successfully."
241
 
242
  # Create Gradio interface
243
- with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
244
  gr.Markdown("""
245
- # 🎡 MP3 HumanizeBot
246
- **Convert MP3/Audio to MIDI and remove AI traces to sound human-made!**
247
 
248
- Upload audio files from AI music generators, convert to MIDI, and apply natural humanization.
249
  """)
250
 
251
  with gr.Row():
@@ -254,35 +225,33 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
254
 
255
  file_input = gr.File(
256
  file_count="multiple",
257
- file_types=[".mp3", ".wav", ".ogg", ".m4a", ".flac"],
258
- label="Upload Audio Files",
259
  type="filepath"
260
  )
261
 
262
- conversion_method = gr.Radio(
263
- ["basic", "melody", "rhythm"],
264
- value="basic",
265
- label="🎡 Conversion Method",
266
- info="Basic = general purpose, Melody = focus on tunes, Rhythm = focus on beats"
267
- )
268
-
269
  intensity = gr.Slider(
270
  0.1, 1.0,
271
  value=0.7,
272
- label="🎚️ Humanization Intensity"
273
- )
274
-
275
- style = gr.Radio(
276
- ["organic", "groovy", "gentle"],
277
- value="organic",
278
- label="🎸 Humanization Style"
279
  )
280
 
281
  process_btn = gr.Button(
282
- "✨ Convert & Humanize!",
283
  variant="primary",
284
  size="lg"
285
  )
 
 
 
 
 
 
 
 
 
 
286
 
287
  with gr.Column(scale=1):
288
  gr.Markdown("### πŸ“₯ Download Results")
@@ -294,37 +263,49 @@ with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
294
 
295
  audio_output = gr.Audio(
296
  label="MIDI Audio Preview",
297
- interactive=False
 
298
  )
299
 
300
  status = gr.Textbox(
301
  label="Status",
302
  interactive=False,
303
- max_lines=4
304
  )
305
 
306
- with gr.Accordion("ℹ️ How It Works", open=False):
307
  gr.Markdown("""
308
- **Process:**
309
- 1. **Upload** your AI-generated audio files (MP3, WAV, etc.)
310
- 2. **Convert** to MIDI using pitch and rhythm detection
311
- 3. **Humanize** with timing and velocity variations
312
- 4. **Download** humanized MIDI files
313
 
314
- **Conversion Methods:**
315
- - **Basic**: General purpose conversion for most music
316
- - **Melody**: Focuses on extracting melodic content
317
- - **Rhythm**: Focuses on drum patterns and beats
 
318
 
319
- **Note**: Audio-to-MIDI conversion is challenging and works best with:
320
- - Clear melodic lines
321
- - Good audio quality
322
- - Not too much reverb/effects
 
 
 
 
 
 
 
 
 
 
323
  """)
324
 
325
  process_btn.click(
326
  fn=process_audio_files,
327
- inputs=[file_input, intensity, style, conversion_method],
328
  outputs=[file_output, audio_output, status]
329
  )
330
 
 
4
  import tempfile
5
  import os
6
  import librosa
 
 
 
7
  import soundfile as sf
8
+ from pathlib import Path
9
+ import subprocess
10
  import io
11
 
12
  class MP3ToHumanized:
13
  def __init__(self):
14
  self.groove_profiles = {
15
+ "drums": {"timing_var": 0.02, "velocity_var": 15},
16
+ "melody": {"timing_var": 0.01, "velocity_var": 10},
17
+ "bass": {"timing_var": 0.015, "velocity_var": 12},
18
+ "chords": {"timing_var": 0.008, "velocity_var": 8},
19
+ "other": {"timing_var": 0.01, "velocity_var": 10}
20
  }
21
+
22
+ def convert_to_wav(self, audio_path):
23
+ """Convert any audio format to WAV using librosa"""
24
  try:
25
+ # Load audio with librosa (handles MP3, WAV, etc.)
26
+ y, sr = librosa.load(audio_path, sr=22050, mono=True)
27
 
28
+ # Save as temporary WAV file
29
+ wav_path = tempfile.mktemp(suffix='.wav')
30
+ sf.write(wav_path, y, sr)
31
+ return wav_path, sr
 
 
 
32
  except Exception as e:
33
+ raise Exception(f"Audio conversion failed: {str(e)}")
34
 
35
+ def simple_audio_to_midi(self, audio_path):
36
+ """Simple but effective audio to MIDI conversion"""
37
+ try:
38
+ # Convert to WAV first
39
+ wav_path, sr = self.convert_to_wav(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # Load the converted audio
42
+ y, sr = librosa.load(wav_path, sr=sr)
43
+
44
+ # Create MIDI object
45
+ midi = pretty_midi.PrettyMIDI()
46
+ instrument = pretty_midi.Instrument(program=0) # Acoustic Grand Piano
47
+
48
+ # Method 1: Onset detection with pitch estimation
49
+ onset_frames = librosa.onset.onset_detect(
50
+ y=y, sr=sr,
51
+ hop_length=512,
52
+ backtrack=True,
53
+ delta=0.2
54
+ )
55
+ onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
56
+
57
+ # Get tempo for musical timing
58
+ tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr, units='time')
59
+
60
+ notes_added = 0
61
+ for i, onset_time in enumerate(onset_times):
62
+ if notes_added >= 50: # Limit notes to avoid clutter
63
+ break
64
+
65
+ # Extract a segment around the onset for pitch detection
66
+ start_idx = int(onset_time * sr)
67
+ end_idx = min(start_idx + int(0.3 * sr), len(y)) # 300ms window
68
 
69
+ if end_idx > start_idx:
70
+ segment = y[start_idx:end_idx]
 
71
 
72
+ # Simple pitch detection using FFT
73
+ frequencies, magnitudes = self.simple_pitch_detection(segment, sr)
74
+
75
+ if len(frequencies) > 0:
76
+ # Take the strongest frequency
77
+ main_freq = frequencies[np.argmax(magnitudes)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ if 80 < main_freq < 1000: # Reasonable frequency range
80
+ midi_note = self.freq_to_midi(main_freq)
81
+
82
+ if 48 <= midi_note <= 84: # C3 to C6 range
83
+ # Create note
84
+ note = pretty_midi.Note(
85
+ velocity=np.random.randint(70, 100),
86
+ pitch=midi_note,
87
+ start=onset_time,
88
+ end=onset_time + 0.4 # 400ms duration
89
+ )
90
+ instrument.notes.append(note)
91
+ notes_added += 1
92
+
93
+ # If we didn't get enough notes, add some rhythmic elements
94
+ if notes_added < 10 and len(beat_frames) > 0:
95
+ drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
96
+ for beat_time in beat_frames[:8]: # First 8 beats
97
  note = pretty_midi.Note(
98
  velocity=80,
99
+ pitch=36, # Kick drum
100
+ start=beat_time,
101
+ end=beat_time + 0.2
102
  )
103
+ drum_instrument.notes.append(note)
104
+ midi.instruments.append(drum_instrument)
105
+
106
+ if len(instrument.notes) > 0:
107
+ midi.instruments.append(instrument)
108
+
109
+ return midi
110
+
111
+ except Exception as e:
112
+ raise Exception(f"MIDI conversion failed: {str(e)}")
113
 
114
+ def simple_pitch_detection(self, segment, sr):
115
+ """Simple FFT-based pitch detection"""
116
+ # Apply windowing
117
+ window = np.hanning(len(segment))
118
+ segment = segment * window
 
119
 
120
+ # FFT
121
+ fft = np.fft.rfft(segment)
122
+ magnitudes = np.abs(fft)
123
+ frequencies = np.fft.rfftfreq(len(segment), 1/sr)
124
 
125
+ # Filter reasonable frequencies
126
+ mask = (frequencies > 80) & (frequencies < 1000)
127
+ return frequencies[mask], magnitudes[mask]
 
 
 
 
 
 
 
 
 
 
128
 
129
+ def freq_to_midi(self, frequency):
130
+ """Convert frequency to MIDI note number"""
131
+ return int(69 + 12 * np.log2(frequency / 440.0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
+ def humanize_midi(self, midi_data, intensity=0.7):
134
+ """Apply humanization to MIDI"""
135
+ if len(midi_data.instruments) == 0:
136
+ return midi_data
137
+
138
  for instrument in midi_data.instruments:
139
  inst_type = "drums" if instrument.is_drum else "melody"
140
  profile = self.groove_profiles[inst_type]
 
144
  timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
145
  note.start = max(0, note.start + timing_shift)
146
 
147
+ # Humanize duration
148
  if not instrument.is_drum:
149
+ duration_shift = np.random.normal(0, profile["timing_var"] * 0.5 * intensity)
150
  note.end = max(note.start + 0.1, note.end + duration_shift)
151
 
152
  # Humanize velocity
153
  vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
154
  new_velocity = note.velocity + int(vel_shift * intensity)
155
+ note.velocity = max(40, min(127, new_velocity))
156
 
157
  return midi_data
158
 
159
+ def process_audio_files(files, intensity):
160
  if not files:
161
  return None, None, "Please upload audio files (MP3, WAV, etc.)"
162
 
163
+ # Show what files we received
164
+ file_info = f"Received {len(files)} files: {[f.name for f in files]}"
165
+ print(file_info)
166
+
167
  converter = MP3ToHumanized()
168
  processed_files = []
169
 
170
  for file in files:
171
  try:
172
+ # Check file type
173
+ file_ext = Path(file.name).suffix.lower()
174
+ print(f"Processing {file.name} (extension: {file_ext})")
175
+
176
  # Convert audio to MIDI
177
+ midi_data = converter.simple_audio_to_midi(file.name)
178
+
179
+ if len(midi_data.instruments) == 0 or sum(len(instr.notes) for instr in midi_data.instruments) == 0:
180
+ return None, None, f"❌ Could not extract musical content from {file.name}. Try a different audio file with clear melody."
181
 
182
  # Humanize the MIDI
183
+ humanized_midi = converter.humanize_midi(midi_data, intensity)
184
 
185
  # Save humanized MIDI
186
  output_path = tempfile.mktemp(suffix='_humanized.mid')
 
188
  processed_files.append(output_path)
189
 
190
  except Exception as e:
191
+ error_msg = f"Error processing {file.name}: {str(e)}"
192
+ print(error_msg)
193
+ return None, None, error_msg
194
 
195
  if processed_files:
196
+ # Create audio preview
197
  preview_audio = None
198
  try:
 
199
  midi_data = pretty_midi.PrettyMIDI(processed_files[0])
200
  audio_data = midi_data.synthesize()
201
  preview_path = tempfile.mktemp(suffix='_preview.wav')
202
  sf.write(preview_path, audio_data, 44100)
203
  preview_audio = preview_path
204
+ except Exception as e:
205
+ print(f"Preview generation failed: {e}")
206
  preview_audio = None
207
 
208
+ success_msg = f"βœ… Successfully processed {len(processed_files)} files! Converted audio to MIDI and applied humanization."
209
+ return processed_files, preview_audio, success_msg
210
  else:
211
  return None, None, "❌ No files were processed successfully."
212
 
213
  # Create Gradio interface
214
+ with gr.Blocks(theme=gr.themes.Soft(), title="Audio HumanizeBot") as demo:
215
  gr.Markdown("""
216
+ # 🎡 Audio HumanizeBot
217
+ **Convert MP3/Audio to humanized MIDI - Remove AI traces from your music!**
218
 
219
+ Upload audio files from AI music generators and get humanized MIDI back.
220
  """)
221
 
222
  with gr.Row():
 
225
 
226
  file_input = gr.File(
227
  file_count="multiple",
228
+ file_types=[".mp3", ".wav", ".m4a", ".ogg", ".flac"],
229
+ label="Upload your AI-generated audio files",
230
  type="filepath"
231
  )
232
 
 
 
 
 
 
 
 
233
  intensity = gr.Slider(
234
  0.1, 1.0,
235
  value=0.7,
236
+ label="🎚️ Humanization Intensity",
237
+ info="How much human feel to add"
 
 
 
 
 
238
  )
239
 
240
  process_btn = gr.Button(
241
+ "✨ Convert & Humanize Audio!",
242
  variant="primary",
243
  size="lg"
244
  )
245
+
246
+ gr.Markdown("""
247
+ **Supported formats:** MP3, WAV, M4A, OGG, FLAC
248
+
249
+ **Works best with:**
250
+ - Clear melodic content
251
+ - AI-generated music
252
+ - Not too much reverb/effects
253
+ - 10-30 second clips
254
+ """)
255
 
256
  with gr.Column(scale=1):
257
  gr.Markdown("### πŸ“₯ Download Results")
 
263
 
264
  audio_output = gr.Audio(
265
  label="MIDI Audio Preview",
266
+ interactive=False,
267
+ type="filepath"
268
  )
269
 
270
  status = gr.Textbox(
271
  label="Status",
272
  interactive=False,
273
+ max_lines=5
274
  )
275
 
276
+ with gr.Accordion("🎯 Tips for Best Results", open=True):
277
  gr.Markdown("""
278
+ **For best conversion:**
279
+ - Use clear AI-generated music with obvious melodies
280
+ - Avoid heavily processed/remixed tracks
281
+ - 10-30 second clips work better than full songs
282
+ - Instrumental music converts better than vocal-heavy tracks
283
 
284
+ **What to expect:**
285
+ - The MIDI will capture the main melodic and rhythmic ideas
286
+ - You can import the MIDI into any DAW (FL Studio, Ableton, etc.)
287
+ - Use high-quality instrument sounds in your DAW for best results
288
+ - The humanization adds natural timing and velocity variations
289
 
290
+ **Limitations:**
291
+ - Complex arrangements may not convert perfectly
292
+ - Audio-to-MIDI is an approximation
293
+ - Very ambient or effect-heavy music may not work well
294
+ """)
295
+
296
+ # Examples section
297
+ with gr.Accordion("🎡 Try These Example Files", open=False):
298
+ gr.Markdown("""
299
+ **Test with these types of audio:**
300
+ - AI piano melodies
301
+ - Simple electronic beats
302
+ - Clear synth lines
303
+ - Drum patterns from AI generators
304
  """)
305
 
306
  process_btn.click(
307
  fn=process_audio_files,
308
+ inputs=[file_input, intensity],
309
  outputs=[file_output, audio_output, status]
310
  )
311