FILMITO commited on
Commit
14849b4
Β·
verified Β·
1 Parent(s): feb9588

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -391
app.py CHANGED
@@ -1,460 +1,278 @@
1
  import gradio as gr
2
- import pretty_midi
3
  import numpy as np
4
  import tempfile
5
  import librosa
6
  import soundfile as sf
7
- import os
 
8
 
9
- class CompleteSongHumanizer:
10
  def __init__(self):
11
- # Complete band setups for each style
12
- self.band_setups = {
13
- "pop": {
14
- "drums": {"program": 0, "name": "Pop Drums"},
15
- "bass": {"program": 33, "name": "Bass Guitar"},
16
- "rhythm": {"program": 25, "name": "Acoustic Guitar"},
17
- "piano": {"program": 1, "name": "Piano"},
18
- "strings": {"program": 49, "name": "String Ensemble"},
19
- "synth": {"program": 81, "name": "Lead Synth"}
20
- },
21
- "electronic": {
22
- "drums": {"program": 0, "name": "EDM Drums"},
23
- "bass": {"program": 39, "name": "Synth Bass"},
24
- "lead": {"program": 81, "name": "Lead Synth"},
25
- "pad": {"program": 89, "name": "Warm Pad"},
26
- "fx": {"program": 103, "name": "FX"},
27
- "chords": {"program": 5, "name": "Electric Piano"}
28
- },
29
- "rock": {
30
- "drums": {"program": 0, "name": "Rock Drums"},
31
- "bass": {"program": 33, "name": "Bass Guitar"},
32
- "guitar1": {"program": 30, "name": "Distortion Guitar"},
33
- "guitar2": {"program": 27, "name": "Clean Guitar"},
34
- "keys": {"program": 5, "name": "Electric Piano"},
35
- "strings": {"program": 48, "name": "String Ensemble"}
36
- },
37
- "cinematic": {
38
- "drums": {"program": 0, "name": "Orchestral Percussion"},
39
- "strings1": {"program": 48, "name": "String Ensemble"},
40
- "strings2": {"program": 49, "name": "Slow Strings"},
41
- "brass": {"program": 61, "name": "French Horn"},
42
- "woodwinds": {"program": 68, "name": "Oboe"},
43
- "harp": {"program": 46, "name": "Harp"}
44
- }
45
- }
46
-
47
- # Song structures
48
- self.song_structures = {
49
- "pop": ["intro", "verse", "chorus", "verse", "chorus", "bridge", "chorus", "outro"],
50
- "electronic": ["intro", "build", "drop", "break", "build", "drop", "outro"],
51
- "rock": ["intro", "verse", "chorus", "verse", "chorus", "solo", "chorus", "outro"],
52
- "cinematic": ["intro", "theme", "build", "climax", "resolution", "outro"]
53
- }
54
 
55
- def create_complete_song(self, mp3_path, style="pop", intensity=0.7):
56
- """Create a complete humanized song from MP3"""
57
  try:
58
- # Load and analyze the audio
59
- y, sr = librosa.load(mp3_path, sr=22050, mono=True)
60
- duration = len(y) / sr
61
 
62
- # Create MIDI object
63
- midi = pretty_midi.PrettyMIDI()
64
-
65
- # Setup complete band
66
- band = self.setup_band(midi, style)
67
-
68
- # Analyze audio to get musical content
69
- audio_features = self.analyze_audio_features(y, sr, duration)
70
-
71
- # Create complete song structure
72
- self.build_complete_song(midi, band, style, audio_features, duration, intensity)
73
-
74
- # Apply advanced humanization
75
- self.apply_complete_humanization(midi, intensity)
76
-
77
- # Synthesize to audio
78
- audio_data = midi.synthesize()
79
 
80
- return audio_data, sr
81
 
82
  except Exception as e:
83
- raise Exception(f"Song creation failed: {str(e)}")
84
-
85
- def setup_band(self, midi, style):
86
- """Setup complete band instruments"""
87
- band = {}
88
- for role, inst_info in self.band_setups[style].items():
89
- instrument = pretty_midi.Instrument(
90
- program=inst_info["program"],
91
- is_drum=(role == "drums"),
92
- name=inst_info["name"]
93
- )
94
- midi.instruments.append(instrument)
95
- band[role] = instrument
96
- return band
97
-
98
- def analyze_audio_features(self, y, sr, duration):
99
- """Extract musical features from audio"""
100
- # Get tempo and beats
101
- tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr, units='time')
102
-
103
- # Detect onsets for melody extraction
104
- onset_frames = librosa.onset.onset_detect(y=y, sr=sr, hop_length=512, delta=0.08)
105
- onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=512)
106
-
107
- # Extract melody contour
108
- melody_contour = []
109
- for onset_time in onset_times[:50]: # Analyze first 50 onsets
110
- start_idx = int(onset_time * sr)
111
- end_idx = min(start_idx + int(0.4 * sr), len(y))
112
- if end_idx > start_idx:
113
- segment = y[start_idx:end_idx]
114
- freq = self.detect_pitch(segment, sr)
115
- if 100 < freq < 1000:
116
- melody_contour.append((onset_time, freq))
117
-
118
- # Detect energy changes for song sections
119
- energy = librosa.feature.rms(y=y)[0]
120
- energy_times = librosa.times_like(energy, sr=sr)
121
-
122
- return {
123
- 'tempo': tempo if tempo else 120,
124
- 'beats': beat_frames if len(beat_frames) > 0 else np.linspace(0, duration, 32),
125
- 'melody_contour': melody_contour,
126
- 'energy': list(zip(energy_times, energy)),
127
- 'duration': duration
128
- }
129
-
130
- def detect_pitch(self, segment, sr):
131
- """Detect pitch in audio segment"""
132
- try:
133
- # Simple FFT-based pitch detection
134
- fft = np.fft.rfft(segment * np.hanning(len(segment)))
135
- freqs = np.fft.rfftfreq(len(segment), 1/sr)
136
- mags = np.abs(fft)
137
-
138
- # Find strongest frequency in reasonable range
139
- mask = (freqs > 80) & (freqs < 1200)
140
- if np.any(mask):
141
- peak_idx = np.argmax(mags[mask])
142
- return freqs[mask][peak_idx]
143
- except:
144
- pass
145
- return 440
146
-
147
- def build_complete_song(self, midi, band, style, features, duration, intensity):
148
- """Build complete song with structure"""
149
- structure = self.song_structures[style]
150
- section_duration = duration / len(structure)
151
-
152
- for section_idx, section_name in enumerate(structure):
153
- start_time = section_idx * section_duration
154
- end_time = (section_idx + 1) * section_duration
155
-
156
- # Create section-specific music
157
- self.create_section(
158
- band, style, section_name, section_idx,
159
- start_time, end_time, features, intensity
160
- )
161
-
162
- def create_section(self, band, style, section_name, section_idx, start_time, end_time, features, intensity):
163
- """Create music for a specific song section"""
164
- section_duration = end_time - start_time
165
-
166
- # Get beats for this section
167
- section_beats = [t for t in features['beats'] if start_time <= t < end_time]
168
- if not section_beats:
169
- section_beats = np.linspace(start_time, end_time, 8)
170
-
171
- # Section-specific arrangements
172
- if section_name in ["intro", "outro"]:
173
- self.create_intro_outro(band, style, section_name, start_time, end_time, section_beats)
174
- elif section_name in ["verse", "theme"]:
175
- self.create_verse(band, style, start_time, end_time, section_beats, features)
176
- elif section_name in ["chorus", "drop", "climax"]:
177
- self.create_chorus(band, style, start_time, end_time, section_beats, features)
178
- elif section_name in ["bridge", "break", "solo"]:
179
- self.create_bridge(band, style, start_time, end_time, section_beats, features)
180
- elif section_name in ["build"]:
181
- self.create_build(band, style, start_time, end_time, section_beats, features)
182
-
183
- def create_intro_outro(self, band, style, section_name, start_time, end_time, beats):
184
- """Create intro/outro sections"""
185
- # Drums - simple pattern
186
- if 'drums' in band:
187
- for i, beat_time in enumerate(beats):
188
- if i % 4 == 0: # Kick on downbeat
189
- band['drums'].notes.append(self.create_note(36, 80, beat_time, 0.3))
190
- if section_name == "intro" and i % 2 == 0: # Hi-hat in intro
191
- band['drums'].notes.append(self.create_note(42, 60, beat_time, 0.1))
192
-
193
- # Bass - simple root notes
194
- if 'bass' in band:
195
- root_note = 36 if style != "cinematic" else 48
196
- for i, beat_time in enumerate(beats[::2]):
197
- band['bass'].notes.append(self.create_note(root_note, 70, beat_time, 0.8))
198
-
199
- # Pad/strings - atmospheric
200
- pad_instrument = next((inst for role, inst in band.items() if 'pad' in role or 'string' in role), None)
201
- if pad_instrument:
202
- chord_notes = self.get_chord_for_section(style, section_name, 0)
203
- for note_pitch in chord_notes:
204
- pad_instrument.notes.append(self.create_note(note_pitch, 50, start_time, end_time - start_time))
205
 
206
- def create_verse(self, band, style, start_time, end_time, beats, features):
207
- """Create verse section"""
208
- # Full drum pattern
209
- if 'drums' in band:
210
- for i, beat_time in enumerate(beats):
211
- # Kick on 1 and 3
212
- if i % 4 in [0, 2]:
213
- band['drums'].notes.append(self.create_note(36, 85, beat_time, 0.3))
214
- # Snare on 2 and 4
215
- if i % 4 in [1, 3]:
216
- band['drums'].notes.append(self.create_note(38, 80, beat_time, 0.25))
217
- # Hi-hats
218
- if style in ["pop", "electronic"]:
219
- band['drums'].notes.append(self.create_note(42, 65, beat_time, 0.1))
220
-
221
- # Bass line
222
- if 'bass' in band:
223
- bass_line = self.create_bass_line(beats, style)
224
- for note in bass_line:
225
- band['bass'].notes.append(note)
226
-
227
- # Melody from audio analysis
228
- if 'lead' in band or 'guitar1' in band:
229
- lead_instrument = band.get('lead') or band.get('guitar1')
230
- if lead_instrument:
231
- melody = self.extract_melody_for_section(features['melody_contour'], start_time, end_time)
232
- for time, freq in melody:
233
- midi_note = self.freq_to_midi(freq)
234
- if 48 <= midi_note <= 84:
235
- lead_instrument.notes.append(self.create_note(midi_note, 80, time, 0.4))
236
 
237
- def create_chorus(self, band, style, start_time, end_time, beats, features):
238
- """Create chorus/drop section - more intense"""
239
- # Energetic drums
240
- if 'drums' in band:
241
- for i, beat_time in enumerate(beats):
242
- # Stronger kicks
243
- if i % 4 in [0, 2]:
244
- band['drums'].notes.append(self.create_note(36, 95, beat_time, 0.4))
245
- # Louder snares
246
- if i % 4 in [1, 3]:
247
- band['drums'].notes.append(self.create_note(38, 90, beat_time, 0.3))
248
- # More hi-hats
249
- if style in ["pop", "electronic"]:
250
- band['drums'].notes.append(self.create_note(42, 75, beat_time, 0.15))
251
- # Crash cymbal on first beat
252
- if i == 0:
253
- band['drums'].notes.append(self.create_note(49, 100, beat_time, 1.0))
254
-
255
- # More active bass
256
- if 'bass' in band:
257
- for i, beat_time in enumerate(beats):
258
- bass_note = 36 + (i % 4) * 2
259
- band['bass'].notes.append(self.create_note(bass_note, 85, beat_time, 0.6))
260
-
261
- # All instruments play
262
- for role, instrument in band.items():
263
- if role not in ['drums', 'bass'] and not instrument.is_drum:
264
- chord_notes = self.get_chord_for_section(style, "chorus", 0)
265
- for note_pitch in chord_notes:
266
- instrument.notes.append(self.create_note(note_pitch, 70, start_time, end_time - start_time))
267
 
268
- def create_bass_line(self, beats, style):
269
- """Create melodic bass line"""
270
- bass_notes = []
271
- if style == "pop":
272
- pattern = [36, 38, 41, 43, 41, 38, 36, 35]
273
- elif style == "electronic":
274
- pattern = [36, 36, 39, 39, 41, 41, 39, 36]
275
- elif style == "rock":
276
- pattern = [36, 38, 36, 41, 36, 38, 36, 43]
277
- else: # cinematic
278
- pattern = [36, 39, 43, 46, 43, 39, 36, 34]
279
-
280
- for i, beat_time in enumerate(beats[::2]): # Every other beat
281
- note_pitch = pattern[i % len(pattern)]
282
- bass_notes.append(self.create_note(note_pitch, 80, beat_time, 0.9))
283
-
284
- return bass_notes
 
 
 
 
285
 
286
- def extract_melody_for_section(self, melody_contour, start_time, end_time):
287
- """Extract melody notes for a specific section"""
288
- section_melody = []
289
- for time, freq in melody_contour:
290
- if start_time <= time < end_time:
291
- section_melody.append((time, freq))
292
-
293
- # If no melody detected, create one
294
- if not section_melody:
295
- section_duration = end_time - start_time
296
- for i in range(8):
297
- time = start_time + (i / 8) * section_duration
298
- freq = 440 * (2 ** (i / 12)) # Rising pattern
299
- section_melody.append((time, freq))
300
-
301
- return section_melody
 
302
 
303
- def get_chord_for_section(self, style, section_name, section_idx):
304
- """Get appropriate chords for section"""
305
- if style == "pop":
306
- chords = [[60, 64, 67], [65, 69, 72], [67, 71, 74], [62, 65, 69]] # C, F, G, Am
307
- elif style == "electronic":
308
- chords = [[65, 69, 72], [70, 74, 77], [72, 76, 79], [67, 70, 74]] # F, Bb, C, Gm
309
- elif style == "rock":
310
- chords = [[59, 62, 65], [64, 67, 71], [65, 69, 72], [62, 65, 69]] # Bm, Em, F, Am
311
- else: # cinematic
312
- chords = [[60, 63, 67], [65, 68, 72], [67, 70, 74], [62, 65, 69]] # Cm, Fm, Gm, Ab
313
-
314
- return chords[section_idx % len(chords)]
315
 
316
- def freq_to_midi(self, frequency):
317
- """Convert frequency to MIDI note number"""
318
- return int(69 + 12 * np.log2(frequency / 440.0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
 
320
- def create_note(self, pitch, velocity, start, duration):
321
- """Helper to create a note"""
322
- return pretty_midi.Note(
323
- velocity=velocity,
324
- pitch=pitch,
325
- start=start,
326
- end=start + duration
327
- )
 
 
 
 
 
328
 
329
- def apply_complete_humanization(self, midi, intensity):
330
- """Apply realistic humanization to entire song"""
331
- for instrument in midi.instruments:
332
- # Different humanization for different instrument types
333
- if instrument.is_drum:
334
- timing_variance = 0.01 # Drums are tighter
335
- velocity_variance = 8
336
- elif any(role in instrument.name.lower() for role in ['bass', 'pad', 'string']):
337
- timing_variance = 0.02 # Background instruments
338
- velocity_variance = 10
339
- else:
340
- timing_variance = 0.03 # Lead instruments have more feel
341
- velocity_variance = 15
342
-
343
- for note in instrument.notes:
344
- # Timing variations
345
- note.start += np.random.normal(0, timing_variance * intensity)
346
- note.start = max(0, note.start)
347
-
348
- # Velocity variations
349
- vel_change = int(np.random.normal(0, velocity_variance * intensity))
350
- note.velocity = max(30, min(127, note.velocity + vel_change))
351
-
352
- # Duration variations (except drums)
353
- if not instrument.is_drum:
354
- dur_change = np.random.normal(0, 0.05 * intensity)
355
- note.end = max(note.start + 0.1, note.end + dur_change)
356
 
357
- def create_complete_song(input_mp3, style, intensity):
358
- """Main function to create complete song"""
359
  if input_mp3 is None:
360
  return None, "Please upload an MP3 file"
361
 
362
- humanizer = CompleteSongHumanizer()
363
 
364
  try:
365
- # Create complete humanized song
366
- audio_data, sr = humanizer.create_complete_song(input_mp3, style, intensity)
367
 
368
  # Save as MP3
369
- output_path = tempfile.mktemp(suffix='_complete_song.mp3')
370
  sf.write(output_path, audio_data, sr)
371
 
372
- return output_path, "🎡 Complete song created! Your humanized masterpiece is ready!"
373
 
374
  except Exception as e:
375
  return None, f"❌ Error: {str(e)}"
376
 
377
- # Professional interface
378
- with gr.Blocks(theme=gr.themes.Soft(), title="Complete Song Humanizer") as demo:
379
  gr.Markdown("""
380
- # 🎡 Complete Song Humanizer
381
- **Transform AI Music into Full, Professional Human Performances**
382
 
383
- *Creates complete songs with verses, choruses, and full band arrangements*
384
  """)
385
 
386
  with gr.Row():
387
  with gr.Column(scale=1):
388
- gr.Markdown("### 1. Upload Your AI Song")
389
  input_audio = gr.Audio(
390
  sources=["upload"],
391
  type="filepath",
392
- label="Upload MP3 File"
393
- )
394
-
395
- gr.Markdown("### 2. Choose Song Style")
396
- style = gr.Radio(
397
- ["pop", "electronic", "rock", "cinematic"],
398
- value="pop",
399
- label="Music Genre",
400
- info="Each style creates different band arrangements"
401
  )
402
 
 
403
  intensity = gr.Slider(
404
- 0.1, 1.0, value=0.8,
405
- label="Human Feel Intensity",
406
- info="How natural and human-like the performance sounds"
407
  )
408
 
409
- create_btn = gr.Button(
410
- "🎹 Create Complete Song",
411
  variant="primary",
412
  size="lg"
413
  )
414
 
415
  with gr.Column(scale=1):
416
- gr.Markdown("### 3. Your Complete Song")
417
  output_audio = gr.Audio(
418
- label="Your Humanized Masterpiece",
419
  type="filepath",
420
  interactive=False
421
  )
422
 
423
  status = gr.Textbox(
424
- label="Creation Status",
425
  interactive=False
426
  )
427
 
428
- # Song details
429
- with gr.Accordion("🎼 What You're Getting", open=True):
430
  gr.Markdown("""
431
- **Each song includes:**
432
-
433
- **🎡 Complete Song Structure:**
434
- - Intro, Verses, Choruses, Bridge, Outro
435
- - Professional arrangement with buildup and climax
436
- - Dynamic changes between sections
437
-
438
- **🎸 Full Band Arrangement:**
439
- - **Pop**: Drums, Bass, Guitar, Piano, Strings, Synth (6 instruments)
440
- - **Electronic**: EDM Drums, Synth Bass, Lead, Pad, FX, Electric Piano (6 instruments)
441
- - **Rock**: Rock Drums, Bass, 2 Guitars, Keys, Strings (6 instruments)
442
- - **Cinematic**: Orchestral Drums, 2 String sections, Brass, Woodwinds, Harp (6 instruments)
443
-
444
- **πŸŽ›οΈ Professional Production:**
445
- - Realistic human timing variations
446
- - Dynamic velocity changes
447
- - Section-specific arrangements
448
- - Melodic development
449
- - Harmonic progression
450
-
451
- **⏱️ Processing Time:** ~15-30 seconds for a complete song
 
 
 
 
 
 
 
 
 
452
  """)
453
 
454
- # Creation process
455
- create_btn.click(
456
- fn=create_complete_song,
457
- inputs=[input_audio, style, intensity],
458
  outputs=[output_audio, status]
459
  )
460
 
 
1
  import gradio as gr
 
2
  import numpy as np
3
  import tempfile
4
  import librosa
5
  import soundfile as sf
6
+ import noisereduce as nr
7
+ from scipy import signal
8
 
9
+ class AIHumanizer:
10
  def __init__(self):
11
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ def humanize_audio(self, audio_path, intensity=0.7):
14
+ """Remove AI artifacts and make audio sound human-made"""
15
  try:
16
+ # Load the full song
17
+ y, sr = librosa.load(audio_path, sr=44100, mono=False)
 
18
 
19
+ # If stereo, process both channels
20
+ if len(y.shape) > 1:
21
+ processed_channels = []
22
+ for channel in y:
23
+ processed_channel = self.process_channel(channel, sr, intensity)
24
+ processed_channels.append(processed_channel)
25
+ y_processed = np.vstack(processed_channels)
26
+ else:
27
+ y_processed = self.process_channel(y, sr, intensity)
 
 
 
 
 
 
 
 
28
 
29
+ return y_processed, sr
30
 
31
  except Exception as e:
32
+ raise Exception(f"Humanization failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ def process_channel(self, y, sr, intensity):
35
+ """Process a single audio channel to remove AI artifacts"""
36
+ # 1. Reduce robotic/metallic frequencies (common in AI audio)
37
+ y_processed = self.reduce_ai_artifacts(y, sr, intensity)
38
+
39
+ # 2. Add natural human timing variations
40
+ y_processed = self.add_timing_variations(y_processed, sr, intensity)
41
+
42
+ # 3. Add subtle pitch variations (like human singers/instruments)
43
+ y_processed = self.add_pitch_variations(y_processed, sr, intensity)
44
+
45
+ # 4. Add natural room ambiance
46
+ y_processed = self.add_room_ambiance(y_processed, sr, intensity)
47
+
48
+ # 5. Add subtle analog warmth
49
+ y_processed = self.add_analog_warmth(y_processed, sr, intensity)
50
+
51
+ # 6. Reduce perfect quantization
52
+ y_processed = self.reduce_perfect_quantization(y_processed, sr, intensity)
53
+
54
+ return y_processed
 
 
 
 
 
 
 
 
 
55
 
56
+ def reduce_ai_artifacts(self, y, sr, intensity):
57
+ """Reduce common AI audio artifacts"""
58
+ # Reduce metallic/robotic frequencies (common in AI vocals)
59
+ # These are often in the 2kHz-6kHz range
60
+ sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
61
+ y_filtered = signal.sosfilt(sos, y)
62
+
63
+ # Blend with original based on intensity
64
+ y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
65
+
66
+ return y_processed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ def add_timing_variations(self, y, sr, intensity):
69
+ """Add subtle timing variations like human musicians"""
70
+ # Create a time-stretching effect with small random variations
71
+ original_length = len(y)
72
+
73
+ # Small random speed variations (like human timing)
74
+ speed_variation = 1.0 + np.random.normal(0, 0.005 * intensity)
75
+
76
+ # Resample to create timing variations
77
+ new_length = int(original_length / speed_variation)
78
+ indices = np.linspace(0, original_length-1, new_length)
79
+ y_varied = np.interp(indices, np.arange(original_length), y)
80
+
81
+ # Resample back to original length
82
+ if len(y_varied) != original_length:
83
+ indices_back = np.linspace(0, len(y_varied)-1, original_length)
84
+ y_varied = np.interp(indices_back, np.arange(len(y_varied)), y_varied)
85
+
86
+ # Blend with original
87
+ blend_factor = 0.1 * intensity
88
+ return y * (1 - blend_factor) + y_varied * blend_factor
89
 
90
+ def add_pitch_variations(self, y, sr, intensity):
91
+ """Add subtle pitch variations like human performance"""
92
+ # Small pitch variations (vibrato effect)
93
+ t = np.linspace(0, len(y)/sr, len(y))
94
+
95
+ # Create subtle vibrato
96
+ vibrato_rate = 5.0 # Hz
97
+ vibrato_depth = 0.3 * intensity # Semitones
98
+
99
+ pitch_variation = np.sin(2 * np.pi * vibrato_rate * t) * vibrato_depth
100
+
101
+ # Apply pitch shifting using phase vocoder
102
+ y_pitched = self.pitch_shift_pv(y, sr, pitch_variation)
103
+
104
+ # Blend with original
105
+ blend_factor = 0.15 * intensity
106
+ return y * (1 - blend_factor) + y_pitched * blend_factor
107
 
108
+ def pitch_shift_pv(self, y, sr, pitch_variation):
109
+ """Simple phase vocoder pitch shifting"""
110
+ # Simple implementation - in practice you'd use librosa's phase_vocoder
111
+ # For now, we'll use a simplified version
112
+ try:
113
+ import librosa
114
+ # Use small constant pitch shift for simplicity
115
+ n_steps = 0.1 * np.mean(np.abs(pitch_variation))
116
+ y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
117
+ return y_shifted
118
+ except:
119
+ return y
120
 
121
+ def add_room_ambiance(self, y, sr, intensity):
122
+ """Add natural room reverb to remove sterile AI sound"""
123
+ # Create simple impulse response for natural room
124
+ impulse_length = int(0.3 * sr) # 300ms reverb
125
+ impulse = np.zeros(impulse_length)
126
+
127
+ # Early reflections
128
+ early_reflections = int(0.02 * sr) # 20ms
129
+ impulse[early_reflections] = 0.7
130
+
131
+ # Late reverb tail
132
+ decay = np.exp(-np.linspace(0, 10, impulse_length - early_reflections))
133
+ impulse[early_reflections:] = decay * 0.4
134
+
135
+ # Apply convolution reverb
136
+ y_reverb = signal.convolve(y, impulse, mode='same') / np.max(np.abs(impulse))
137
+
138
+ # Blend with original
139
+ blend_factor = 0.08 * intensity
140
+ return y * (1 - blend_factor) + y_reverb * blend_factor
141
 
142
+ def add_analog_warmth(self, y, sr, intensity):
143
+ """Add analog-style warmth and saturation"""
144
+ # Soft clipping saturation
145
+ saturation_amount = 1.0 + 0.5 * intensity
146
+ y_saturated = np.tanh(y * saturation_amount) / saturation_amount
147
+
148
+ # Add subtle tape warmth (low-end boost)
149
+ sos_warmth = signal.butter(2, 150, 'highpass', fs=sr, output='sos')
150
+ y_warmth = signal.sosfilt(sos_warmth, y_saturated)
151
+
152
+ # Blend
153
+ blend_factor = 0.2 * intensity
154
+ return y * (1 - blend_factor) + y_warmth * blend_factor
155
 
156
+ def reduce_perfect_quantization(self, y, sr, intensity):
157
+ """Reduce perfectly quantized timing"""
158
+ # Add subtle random amplitude variations
159
+ t = np.linspace(0, len(y)/sr, len(y))
160
+
161
+ # Low-frequency amplitude modulation (like human dynamics)
162
+ lfo_rate = 0.5 # Hz
163
+ lfo_depth = 0.05 * intensity
164
+ amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
165
+
166
+ # Random micro-variations
167
+ random_variation = 1.0 + np.random.normal(0, 0.02 * intensity, len(y))
168
+
169
+ # Combine variations
170
+ total_variation = amplitude_variation * random_variation
171
+
172
+ return y * total_variation
 
 
 
 
 
 
 
 
 
 
173
 
174
+ def humanize_song(input_mp3, intensity):
175
+ """Main humanization function"""
176
  if input_mp3 is None:
177
  return None, "Please upload an MP3 file"
178
 
179
+ humanizer = AIHumanizer()
180
 
181
  try:
182
+ # Process the entire song to remove AI artifacts
183
+ audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
184
 
185
  # Save as MP3
186
+ output_path = tempfile.mktemp(suffix='_humanized.mp3')
187
  sf.write(output_path, audio_data, sr)
188
 
189
+ return output_path, "βœ… Song humanized! AI artifacts removed and human feel added."
190
 
191
  except Exception as e:
192
  return None, f"❌ Error: {str(e)}"
193
 
194
+ # Simple interface focused on humanization
195
+ with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
196
  gr.Markdown("""
197
+ # 🎡 AI Song Humanizer
198
+ **Remove AI Detection from Your Music - Make it Sound Human-Made**
199
 
200
+ *Upload AI-generated songs β†’ Remove robotic artifacts β†’ Download human-sounding music*
201
  """)
202
 
203
  with gr.Row():
204
  with gr.Column(scale=1):
205
+ gr.Markdown("### 1. Upload AI-Generated Song")
206
  input_audio = gr.Audio(
207
  sources=["upload"],
208
  type="filepath",
209
+ label="Upload your complete AI song (MP3, WAV, etc.)"
 
 
 
 
 
 
 
 
210
  )
211
 
212
+ gr.Markdown("### 2. Adjust Humanization")
213
  intensity = gr.Slider(
214
+ 0.1, 1.0, value=0.7,
215
+ label="Humanization Strength",
216
+ info="How much to remove AI artifacts and add human feel"
217
  )
218
 
219
+ process_btn = gr.Button(
220
+ "✨ Remove AI Artifacts",
221
  variant="primary",
222
  size="lg"
223
  )
224
 
225
  with gr.Column(scale=1):
226
+ gr.Markdown("### 3. Download Human Version")
227
  output_audio = gr.Audio(
228
+ label="Human-Sounding Song",
229
  type="filepath",
230
  interactive=False
231
  )
232
 
233
  status = gr.Textbox(
234
+ label="Processing Status",
235
  interactive=False
236
  )
237
 
238
+ with gr.Accordion("πŸ” What This Actually Does", open=True):
 
239
  gr.Markdown("""
240
+ **This tool processes your EXISTING complete song to:**
241
+
242
+ 🎡 **Remove AI Audio Artifacts:**
243
+ - Reduces metallic/robotic frequencies
244
+ - Removes perfect quantization
245
+ - Eliminates sterile digital sound
246
+
247
+ 🎀 **Add Human Performance Elements:**
248
+ - Subtle timing variations (like human musicians)
249
+ - Natural pitch fluctuations (vibrato, human imperfection)
250
+ - Dynamic amplitude changes
251
+
252
+ πŸŽ›οΈ **Add Analog Character:**
253
+ - Natural room ambiance and reverb
254
+ - Analog-style warmth and saturation
255
+ - Tape-like characteristics
256
+
257
+ **You keep:**
258
+ - Your original melody
259
+ - Your original arrangement
260
+ - Your original vocals/instruments
261
+ - Your complete song structure
262
+
263
+ **What changes:**
264
+ - Sounds like humans performed it
265
+ - No more "AI sound"
266
+ - Natural imperfections added
267
+ - Warmer, more organic sound
268
+
269
+ **Perfect for:** Making AI-generated songs undetectable as AI!
270
  """)
271
 
272
+ # Processing
273
+ process_btn.click(
274
+ fn=humanize_song,
275
+ inputs=[input_audio, intensity],
276
  outputs=[output_audio, status]
277
  )
278