FILMITO commited on
Commit
6f55663
·
verified ·
1 Parent(s): ace3e17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +246 -152
app.py CHANGED
@@ -3,14 +3,14 @@ import pretty_midi
3
  import numpy as np
4
  import tempfile
5
  import os
6
- import scipy
7
- from scipy import signal
8
  import librosa
9
- import io
10
- import base64
11
  from pathlib import Path
 
 
12
 
13
- class HumanizeBot:
14
  def __init__(self):
15
  self.groove_profiles = {
16
  "drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1},
@@ -19,169 +19,267 @@ class HumanizeBot:
19
  "chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03},
20
  "other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}
21
  }
22
-
23
- def classify_instrument(self, instrument):
24
- """Classify instrument type for appropriate humanization"""
25
- if instrument.is_drum:
26
- return "drums"
27
- elif 32 <= instrument.program <= 39: # Bass
28
- return "bass"
29
- elif 0 <= instrument.program <= 7: # Piano
30
- return "chords"
31
- elif 40 <= instrument.program <= 55: # Strings, orchestra
32
- return "chords"
33
- elif 80 <= instrument.program <= 104: # Synth leads, pads
34
- return "melody"
35
- else:
36
- return "melody"
37
-
38
- def apply_swing(self, notes, swing_factor, tempo):
39
- """Apply swing/groove to notes"""
40
- swung_notes = []
41
- for note in notes:
42
- # Simple swing: push even 8th notes slightly later
43
- beat_position = (note.start * tempo / 60) % 1
44
- if 0.25 < beat_position < 0.75: # Off-beat positions
45
- note.start += 0.01 * swing_factor
46
- note.end += 0.01 * swing_factor
47
- swung_notes.append(note)
48
- return swung_notes
49
-
50
- def humanize_midi(self, midi_file, intensity=0.7, style="organic", add_swing=True):
51
- """Main humanization function"""
52
  try:
53
- # Load MIDI file
54
- midi_data = pretty_midi.PrettyMIDI(midi_file.name)
55
- tempo = midi_data.estimate_tempo()
56
-
57
- # Process each instrument
58
- for instrument in midi_data.instruments:
59
- inst_type = self.classify_instrument(instrument)
60
- profile = self.groove_profiles[inst_type]
 
61
 
62
- # Apply swing if requested
63
- if add_swing and inst_type in ["drums", "bass"]:
64
- instrument.notes = self.apply_swing(
65
- instrument.notes,
66
- profile["swing_factor"] * intensity,
67
- tempo
68
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- # Humanize timing and velocity
71
- for note in instrument.notes:
72
- # Humanize timing (more variation for drums)
73
- timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
74
- note.start = max(0, note.start + timing_shift)
75
-
76
- # Humanize note duration (except for drums)
77
- if not instrument.is_drum:
78
- duration_shift = np.random.normal(0, profile["timing_var"] * 0.5 * intensity)
79
- note.end = max(note.start + 0.05, note.end + duration_shift)
80
 
81
- # Humanize velocity
82
- vel_pattern = self.get_velocity_pattern(note, instrument, style)
83
- vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
84
- new_velocity = note.velocity + int(vel_shift * intensity * vel_pattern)
85
- note.velocity = max(20, min(127, new_velocity))
86
-
87
- # Save humanized MIDI
88
- output_path = tempfile.mktemp(suffix='_humanized.mid')
89
- midi_data.write(output_path)
90
- return output_path, "✅ Humanization successful! File is ready for download."
91
-
92
- except Exception as e:
93
- return None, f"❌ Error processing file: {str(e)}"
94
 
95
- def get_velocity_pattern(self, note, instrument, style):
96
- """Get velocity multiplier based on style and musical context"""
97
- if style == "organic":
98
- return 1.0
99
- elif style == "groovy":
100
- # Accentuate beats more
101
- beat_position = (note.start * 2) % 1 # Simple beat detection
102
- if beat_position < 0.1: # On strong beats
103
- return 1.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  else:
105
- return 0.9
106
- elif style == "gentle":
107
- return 0.8
108
- return 1.0
109
-
110
- def create_audio_preview(midi_path):
111
- """Create a simple audio preview from MIDI"""
112
- try:
113
- midi_data = pretty_midi.PrettyMIDI(midi_path)
114
- # Generate audio using fluidsynth (simplified)
115
- audio_data = midi_data.synthesize()
116
- return 44100, audio_data.astype(np.float32)
117
- except:
118
- return None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- def process_files(files, intensity, style, add_swing):
121
  if not files:
122
- return None, None, "Please upload MIDI files to begin."
123
 
124
- bot = HumanizeBot()
125
  processed_files = []
126
- audio_previews = []
127
 
128
  for file in files:
129
- humanized_path, message = bot.humanize_midi(file, intensity, style, add_swing)
130
- if humanized_path:
131
- processed_files.append(humanized_path)
132
-
133
- # Create audio preview
134
- sr, audio = create_audio_preview(humanized_path)
135
- if audio is not None:
136
- audio_previews.append((sr, audio))
 
 
 
 
 
 
137
 
138
  if processed_files:
139
- return processed_files, audio_previews[0] if audio_previews else None, f"✅ Successfully processed {len(processed_files)} files!"
 
 
 
 
 
 
 
 
 
 
 
 
140
  else:
141
  return None, None, "❌ No files were processed successfully."
142
 
143
- # Create the Gradio interface
144
- with gr.Blocks(theme=gr.themes.Soft(), title="HumanizeBot") as demo:
145
  gr.Markdown("""
146
- # 🎵 HumanizeBot
147
- **Remove AI traces from your music and make it sound human-made!**
148
 
149
- Upload MIDI files from AI music generators to apply natural humanization: subtle timing variations, velocity changes, and musical feel.
150
  """)
151
 
152
  with gr.Row():
153
  with gr.Column(scale=1):
154
- gr.Markdown("### 📁 Upload & Settings")
155
 
156
  file_input = gr.File(
157
  file_count="multiple",
158
- file_types=[".mid", ".midi"],
159
- label="Upload MIDI Files",
160
  type="filepath"
161
  )
162
 
 
 
 
 
 
 
 
163
  intensity = gr.Slider(
164
  0.1, 1.0,
165
  value=0.7,
166
- label="🎚️ Humanization Intensity",
167
- info="Low = subtle, High = very human"
168
  )
169
 
170
  style = gr.Radio(
171
  ["organic", "groovy", "gentle"],
172
  value="organic",
173
- label="🎸 Humanization Style",
174
- info="Organic = natural, Groovy = rhythmic, Gentle = subtle"
175
- )
176
-
177
- add_swing = gr.Checkbox(
178
- value=True,
179
- label="🔄 Add Swing/Groove",
180
- info="Add rhythmic push and pull"
181
  )
182
 
183
  process_btn = gr.Button(
184
- "✨ Humanize My Music!",
185
  variant="primary",
186
  size="lg"
187
  )
@@ -195,44 +293,40 @@ with gr.Blocks(theme=gr.themes.Soft(), title="HumanizeBot") as demo:
195
  )
196
 
197
  audio_output = gr.Audio(
198
- label="Audio Preview (First File)",
199
  interactive=False
200
  )
201
 
202
  status = gr.Textbox(
203
  label="Status",
204
  interactive=False,
205
- max_lines=3
206
  )
207
 
208
- # Examples section
209
- with gr.Accordion("🎯 Examples & Tips", open=False):
210
  gr.Markdown("""
211
- **Best used with:**
212
- - AI-generated MIDI from Soundraw, AIVA, MuseNet, etc.
213
- - Robotic-sounding drum patterns
214
- - Static piano or synth sequences
215
-
216
- **How it works:**
217
- - Adds subtle timing variations (like a human player)
218
- - Adjusts velocity (note strength) dynamically
219
- - Can add swing/groove for rhythmic parts
220
- - Preserves the original musical content
221
-
222
- **Pro tip:** Start with intensity 0.7 for balanced results!
 
 
 
223
  """)
224
 
225
- # Connect the processing function
226
  process_btn.click(
227
- fn=process_files,
228
- inputs=[file_input, intensity, style, add_swing],
229
  outputs=[file_output, audio_output, status]
230
  )
231
-
232
- gr.Markdown("""
233
- ---
234
- *Built with ❤️ using Gradio and PrettyMIDI. Works best with MIDI files from AI music generators.*
235
- """)
236
 
237
  if __name__ == "__main__":
238
  demo.launch(debug=True)
 
3
  import numpy as np
4
  import tempfile
5
  import os
 
 
6
  import librosa
7
+ import torch
8
+ import torchaudio
9
  from pathlib import Path
10
+ import soundfile as sf
11
+ import io
12
 
13
+ class MP3ToHumanized:
14
  def __init__(self):
15
  self.groove_profiles = {
16
  "drums": {"timing_var": 0.02, "velocity_var": 15, "swing_factor": 0.1},
 
19
  "chords": {"timing_var": 0.008, "velocity_var": 8, "swing_factor": 0.03},
20
  "other": {"timing_var": 0.01, "velocity_var": 10, "swing_factor": 0.05}
21
  }
22
+
23
+ def audio_to_midi(self, audio_path, conversion_method="basic"):
24
+ """Convert audio file to MIDI using different methods"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  try:
26
+ # Load audio file
27
+ y, sr = librosa.load(audio_path, sr=22050)
28
+
29
+ if conversion_method == "basic":
30
+ return self.basic_audio_to_midi(y, sr)
31
+ elif conversion_method == "melody":
32
+ return self.melody_extraction_to_midi(y, sr)
33
+ else:
34
+ return self.rhythm_based_midi(y, sr)
35
 
36
+ except Exception as e:
37
+ raise Exception(f"Audio to MIDI conversion failed: {str(e)}")
38
+
39
+ def basic_audio_to_midi(self, y, sr):
40
+ """Basic onset detection and pitch estimation"""
41
+ # Create a pretty_midi object
42
+ midi = pretty_midi.PrettyMIDI()
43
+
44
+ # Create instrument
45
+ piano_program = pretty_midi.instrument_name_to_program('Acoustic Grand Piano')
46
+ instrument = pretty_midi.Instrument(program=piano_program)
47
+
48
+ # Detect onsets (when notes start)
49
+ onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, backtrack=True)
50
+ onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
51
+
52
+ # Estimate pitch for each onset
53
+ for onset_time in onset_times:
54
+ # Extract a segment around the onset
55
+ start_sample = int(onset_time * sr)
56
+ end_sample = start_sample + int(0.5 * sr) # 500ms segment
57
+
58
+ if end_sample < len(y):
59
+ segment = y[start_sample:end_sample]
60
 
61
+ # Estimate fundamental frequency
62
+ f0 = self.estimate_pitch(segment, sr)
63
+
64
+ if f0 > 0:
65
+ # Convert frequency to MIDI note number
66
+ midi_note = int(69 + 12 * np.log2(f0 / 440.0))
 
 
 
 
67
 
68
+ # Only add if it's a valid MIDI note
69
+ if 0 <= midi_note <= 127:
70
+ # Create note
71
+ note = pretty_midi.Note(
72
+ velocity=np.random.randint(60, 100),
73
+ pitch=midi_note,
74
+ start=onset_time,
75
+ end=onset_time + 0.5 # 500ms duration
76
+ )
77
+ instrument.notes.append(note)
78
+
79
+ midi.instruments.append(instrument)
80
+ return midi
81
 
82
+ def melody_extraction_to_midi(self, y, sr):
83
+ """Extract melody and convert to MIDI"""
84
+ midi = pretty_midi.PrettyMIDI()
85
+ instrument = pretty_midi.Instrument(program=0) # Piano
86
+
87
+ # Use librosa's melody extraction
88
+ f0, voiced_flag, voiced_probs = librosa.pyin(
89
+ y,
90
+ fmin=librosa.note_to_hz('C2'),
91
+ fmax=librosa.note_to_hz('C7'),
92
+ sr=sr
93
+ )
94
+
95
+ times = librosa.times_like(f0, sr=sr, hop_length=512)
96
+
97
+ current_note = None
98
+ note_start = 0
99
+
100
+ for time, freq, voiced in zip(times, f0, voiced_flag):
101
+ if voiced and not np.isnan(freq):
102
+ midi_note = int(69 + 12 * np.log2(freq / 440.0))
103
+ if 0 <= midi_note <= 127:
104
+ if current_note != midi_note:
105
+ if current_note is not None:
106
+ # End previous note
107
+ note = pretty_midi.Note(
108
+ velocity=80,
109
+ pitch=current_note,
110
+ start=note_start,
111
+ end=time
112
+ )
113
+ instrument.notes.append(note)
114
+
115
+ # Start new note
116
+ current_note = midi_note
117
+ note_start = time
118
  else:
119
+ if current_note is not None:
120
+ # End current note
121
+ note = pretty_midi.Note(
122
+ velocity=80,
123
+ pitch=current_note,
124
+ start=note_start,
125
+ end=time
126
+ )
127
+ instrument.notes.append(note)
128
+ current_note = None
129
+
130
+ midi.instruments.append(instrument)
131
+ return midi
132
+
133
+ def rhythm_based_midi(self, y, sr):
134
+ """Create rhythm-based MIDI from percussive elements"""
135
+ midi = pretty_midi.PrettyMIDI()
136
+
137
+ # Drum instrument
138
+ drum_instrument = pretty_midi.Instrument(program=0, is_drum=True)
139
+
140
+ # Detect strong beats and onsets
141
+ tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
142
+ beat_times = librosa.frames_to_time(beats, sr=sr)
143
+
144
+ # Add drum hits on beats
145
+ for beat_time in beat_times:
146
+ # Kick drum on strong beats
147
+ note = pretty_midi.Note(
148
+ velocity=100,
149
+ pitch=36, # Kick drum
150
+ start=beat_time,
151
+ end=beat_time + 0.1
152
+ )
153
+ drum_instrument.notes.append(note)
154
+
155
+ midi.instruments.append(drum_instrument)
156
+ return midi
157
+
158
+ def estimate_pitch(self, segment, sr):
159
+ """Estimate fundamental frequency from audio segment"""
160
+ try:
161
+ # Use autocorrelation for pitch detection
162
+ corr = np.correlate(segment, segment, mode='full')
163
+ corr = corr[len(corr)//2:]
164
+
165
+ # Find the first peak after zero lag (fundamental frequency)
166
+ d = np.diff(corr)
167
+ start = np.where(d > 0)[0]
168
+ if len(start) > 0:
169
+ start = start[0]
170
+ peak = np.argmax(corr[start:]) + start
171
+ freq = sr / peak if peak > 0 else 0
172
+ return freq
173
+ except:
174
+ pass
175
+ return 0
176
+
177
+ def humanize_midi(self, midi_data, intensity=0.7, style="organic"):
178
+ """Humanize the MIDI data"""
179
+ tempo = midi_data.estimate_tempo() if len(midi_data.instruments) > 0 else 120
180
+
181
+ for instrument in midi_data.instruments:
182
+ inst_type = "drums" if instrument.is_drum else "melody"
183
+ profile = self.groove_profiles[inst_type]
184
+
185
+ for note in instrument.notes:
186
+ # Humanize timing
187
+ timing_shift = np.random.normal(0, profile["timing_var"] * intensity)
188
+ note.start = max(0, note.start + timing_shift)
189
+
190
+ # Humanize duration (except drums)
191
+ if not instrument.is_drum:
192
+ duration_shift = np.random.normal(0, profile["timing_var"] * 0.3 * intensity)
193
+ note.end = max(note.start + 0.1, note.end + duration_shift)
194
+
195
+ # Humanize velocity
196
+ vel_shift = np.random.randint(-profile["velocity_var"], profile["velocity_var"])
197
+ new_velocity = note.velocity + int(vel_shift * intensity)
198
+ note.velocity = max(20, min(127, new_velocity))
199
+
200
+ return midi_data
201
 
202
+ def process_audio_files(files, intensity, style, conversion_method):
203
  if not files:
204
+ return None, None, "Please upload audio files (MP3, WAV, etc.)"
205
 
206
+ converter = MP3ToHumanized()
207
  processed_files = []
 
208
 
209
  for file in files:
210
+ try:
211
+ # Convert audio to MIDI
212
+ midi_data = converter.audio_to_midi(file.name, conversion_method)
213
+
214
+ # Humanize the MIDI
215
+ humanized_midi = converter.humanize_midi(midi_data, intensity, style)
216
+
217
+ # Save humanized MIDI
218
+ output_path = tempfile.mktemp(suffix='_humanized.mid')
219
+ humanized_midi.write(output_path)
220
+ processed_files.append(output_path)
221
+
222
+ except Exception as e:
223
+ return None, None, f"Error processing {file.name}: {str(e)}"
224
 
225
  if processed_files:
226
+ # Create audio preview from first file
227
+ preview_audio = None
228
+ try:
229
+ # Convert MIDI back to audio for preview
230
+ midi_data = pretty_midi.PrettyMIDI(processed_files[0])
231
+ audio_data = midi_data.synthesize()
232
+ preview_path = tempfile.mktemp(suffix='_preview.wav')
233
+ sf.write(preview_path, audio_data, 44100)
234
+ preview_audio = preview_path
235
+ except:
236
+ preview_audio = None
237
+
238
+ return processed_files, preview_audio, f"✅ Successfully processed {len(processed_files)} files!"
239
  else:
240
  return None, None, "❌ No files were processed successfully."
241
 
242
+ # Create Gradio interface
243
+ with gr.Blocks(theme=gr.themes.Soft(), title="MP3 HumanizeBot") as demo:
244
  gr.Markdown("""
245
+ # 🎵 MP3 HumanizeBot
246
+ **Convert MP3/Audio to MIDI and remove AI traces to sound human-made!**
247
 
248
+ Upload audio files from AI music generators, convert to MIDI, and apply natural humanization.
249
  """)
250
 
251
  with gr.Row():
252
  with gr.Column(scale=1):
253
+ gr.Markdown("### 📁 Upload Audio Files")
254
 
255
  file_input = gr.File(
256
  file_count="multiple",
257
+ file_types=[".mp3", ".wav", ".ogg", ".m4a", ".flac"],
258
+ label="Upload Audio Files",
259
  type="filepath"
260
  )
261
 
262
+ conversion_method = gr.Radio(
263
+ ["basic", "melody", "rhythm"],
264
+ value="basic",
265
+ label="🎵 Conversion Method",
266
+ info="Basic = general purpose, Melody = focus on tunes, Rhythm = focus on beats"
267
+ )
268
+
269
  intensity = gr.Slider(
270
  0.1, 1.0,
271
  value=0.7,
272
+ label="🎚️ Humanization Intensity"
 
273
  )
274
 
275
  style = gr.Radio(
276
  ["organic", "groovy", "gentle"],
277
  value="organic",
278
+ label="🎸 Humanization Style"
 
 
 
 
 
 
 
279
  )
280
 
281
  process_btn = gr.Button(
282
+ "✨ Convert & Humanize!",
283
  variant="primary",
284
  size="lg"
285
  )
 
293
  )
294
 
295
  audio_output = gr.Audio(
296
+ label="MIDI Audio Preview",
297
  interactive=False
298
  )
299
 
300
  status = gr.Textbox(
301
  label="Status",
302
  interactive=False,
303
+ max_lines=4
304
  )
305
 
306
+ with gr.Accordion("ℹ️ How It Works", open=False):
 
307
  gr.Markdown("""
308
+ **Process:**
309
+ 1. **Upload** your AI-generated audio files (MP3, WAV, etc.)
310
+ 2. **Convert** to MIDI using pitch and rhythm detection
311
+ 3. **Humanize** with timing and velocity variations
312
+ 4. **Download** humanized MIDI files
313
+
314
+ **Conversion Methods:**
315
+ - **Basic**: General purpose conversion for most music
316
+ - **Melody**: Focuses on extracting melodic content
317
+ - **Rhythm**: Focuses on drum patterns and beats
318
+
319
+ **Note**: Audio-to-MIDI conversion is challenging and works best with:
320
+ - Clear melodic lines
321
+ - Good audio quality
322
+ - Not too much reverb/effects
323
  """)
324
 
 
325
  process_btn.click(
326
+ fn=process_audio_files,
327
+ inputs=[file_input, intensity, style, conversion_method],
328
  outputs=[file_output, audio_output, status]
329
  )
 
 
 
 
 
330
 
331
  if __name__ == "__main__":
332
  demo.launch(debug=True)