FILMITO commited on
Commit
20ef8ef
Β·
verified Β·
1 Parent(s): 49a971e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -139
app.py CHANGED
@@ -3,8 +3,8 @@ import numpy as np
3
  import tempfile
4
  import librosa
5
  import soundfile as sf
6
- import noisereduce as nr
7
  from scipy import signal
 
8
 
9
  class AIHumanizer:
10
  def __init__(self):
@@ -13,39 +13,51 @@ class AIHumanizer:
13
  def humanize_audio(self, audio_path, intensity=0.7):
14
  """Remove AI artifacts and make audio sound human-made"""
15
  try:
16
- # Load the full song
17
- y, sr = librosa.load(audio_path, sr=44100, mono=False)
 
 
 
 
18
 
19
  # If stereo, process both channels
20
  if len(y.shape) > 1:
 
21
  processed_channels = []
22
- for channel in y:
 
23
  processed_channel = self.process_channel(channel, sr, intensity)
24
  processed_channels.append(processed_channel)
25
- y_processed = np.vstack(processed_channels)
26
  else:
 
27
  y_processed = self.process_channel(y, sr, intensity)
 
28
 
 
29
  return y_processed, sr
30
 
31
  except Exception as e:
 
32
  raise Exception(f"Humanization failed: {str(e)}")
33
 
34
  def process_channel(self, y, sr, intensity):
35
  """Process a single audio channel to remove AI artifacts"""
36
- # 1. Reduce robotic/metallic frequencies (common in AI audio)
 
 
37
  y_processed = self.reduce_ai_artifacts(y, sr, intensity)
38
 
39
- # 2. Add natural human timing variations
40
  y_processed = self.add_timing_variations(y_processed, sr, intensity)
41
 
42
- # 3. Add subtle pitch variations (like human singers/instruments)
43
  y_processed = self.add_pitch_variations(y_processed, sr, intensity)
44
 
45
- # 4. Add natural room ambiance
46
  y_processed = self.add_room_ambiance(y_processed, sr, intensity)
47
 
48
- # 5. Add subtle analog warmth
49
  y_processed = self.add_analog_warmth(y_processed, sr, intensity)
50
 
51
  # 6. Reduce perfect quantization
@@ -55,116 +67,142 @@ class AIHumanizer:
55
 
56
  def reduce_ai_artifacts(self, y, sr, intensity):
57
  """Reduce common AI audio artifacts"""
58
- # Reduce metallic/robotic frequencies (common in AI vocals)
59
- # These are often in the 2kHz-6kHz range
60
- sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
61
- y_filtered = signal.sosfilt(sos, y)
62
-
63
- # Blend with original based on intensity
64
- y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
65
-
66
- return y_processed
67
 
68
  def add_timing_variations(self, y, sr, intensity):
69
- """Add subtle timing variations like human musicians"""
70
- # Create a time-stretching effect with small random variations
71
- original_length = len(y)
72
-
73
- # Small random speed variations (like human timing)
74
- speed_variation = 1.0 + np.random.normal(0, 0.005 * intensity)
75
-
76
- # Resample to create timing variations
77
- new_length = int(original_length / speed_variation)
78
- indices = np.linspace(0, original_length-1, new_length)
79
- y_varied = np.interp(indices, np.arange(original_length), y)
80
-
81
- # Resample back to original length
82
- if len(y_varied) != original_length:
83
- indices_back = np.linspace(0, len(y_varied)-1, original_length)
84
- y_varied = np.interp(indices_back, np.arange(len(y_varied)), y_varied)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- # Blend with original
87
- blend_factor = 0.1 * intensity
88
- return y * (1 - blend_factor) + y_varied * blend_factor
89
 
90
  def add_pitch_variations(self, y, sr, intensity):
91
- """Add subtle pitch variations like human performance"""
92
- # Small pitch variations (vibrato effect)
93
- t = np.linspace(0, len(y)/sr, len(y))
94
-
95
- # Create subtle vibrato
96
- vibrato_rate = 5.0 # Hz
97
- vibrato_depth = 0.3 * intensity # Semitones
98
-
99
- pitch_variation = np.sin(2 * np.pi * vibrato_rate * t) * vibrato_depth
100
-
101
- # Apply pitch shifting using phase vocoder
102
- y_pitched = self.pitch_shift_pv(y, sr, pitch_variation)
103
-
104
- # Blend with original
105
- blend_factor = 0.15 * intensity
106
- return y * (1 - blend_factor) + y_pitched * blend_factor
107
-
108
- def pitch_shift_pv(self, y, sr, pitch_variation):
109
- """Simple phase vocoder pitch shifting"""
110
- # Simple implementation - in practice you'd use librosa's phase_vocoder
111
- # For now, we'll use a simplified version
112
  try:
113
- import librosa
114
- # Use small constant pitch shift for simplicity
115
- n_steps = 0.1 * np.mean(np.abs(pitch_variation))
116
- y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
117
- return y_shifted
 
 
118
  except:
119
  return y
120
 
121
  def add_room_ambiance(self, y, sr, intensity):
122
- """Add natural room reverb to remove sterile AI sound"""
123
- # Create simple impulse response for natural room
124
- impulse_length = int(0.3 * sr) # 300ms reverb
 
 
 
 
 
 
125
  impulse = np.zeros(impulse_length)
126
 
127
  # Early reflections
128
- early_reflections = int(0.02 * sr) # 20ms
129
- impulse[early_reflections] = 0.7
 
130
 
131
  # Late reverb tail
132
- decay = np.exp(-np.linspace(0, 10, impulse_length - early_reflections))
133
- impulse[early_reflections:] = decay * 0.4
 
 
 
134
 
135
- # Apply convolution reverb
136
- y_reverb = signal.convolve(y, impulse, mode='same') / np.max(np.abs(impulse))
 
137
 
138
- # Blend with original
139
- blend_factor = 0.08 * intensity
140
- return y * (1 - blend_factor) + y_reverb * blend_factor
 
 
 
 
 
 
 
 
 
141
 
142
  def add_analog_warmth(self, y, sr, intensity):
143
- """Add analog-style warmth and saturation"""
144
  # Soft clipping saturation
145
- saturation_amount = 1.0 + 0.5 * intensity
146
  y_saturated = np.tanh(y * saturation_amount) / saturation_amount
147
 
148
- # Add subtle tape warmth (low-end boost)
149
- sos_warmth = signal.butter(2, 150, 'highpass', fs=sr, output='sos')
150
- y_warmth = signal.sosfilt(sos_warmth, y_saturated)
151
-
152
- # Blend
153
- blend_factor = 0.2 * intensity
154
- return y * (1 - blend_factor) + y_warmth * blend_factor
 
 
 
 
155
 
156
  def reduce_perfect_quantization(self, y, sr, intensity):
157
- """Reduce perfectly quantized timing"""
158
  # Add subtle random amplitude variations
159
  t = np.linspace(0, len(y)/sr, len(y))
160
 
161
- # Low-frequency amplitude modulation (like human dynamics)
162
- lfo_rate = 0.5 # Hz
163
- lfo_depth = 0.05 * intensity
164
  amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
165
 
166
  # Random micro-variations
167
- random_variation = 1.0 + np.random.normal(0, 0.02 * intensity, len(y))
168
 
169
  # Combine variations
170
  total_variation = amplitude_variation * random_variation
@@ -174,102 +212,107 @@ class AIHumanizer:
174
  def humanize_song(input_mp3, intensity):
175
  """Main humanization function"""
176
  if input_mp3 is None:
177
- return None, "Please upload an MP3 file"
178
 
179
  humanizer = AIHumanizer()
180
 
181
  try:
 
 
182
  # Process the entire song to remove AI artifacts
183
  audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
184
 
185
- # Save as MP3
186
- output_path = tempfile.mktemp(suffix='_humanized.mp3')
 
 
 
 
 
 
 
187
  sf.write(output_path, audio_data, sr)
 
188
 
189
  return output_path, "βœ… Song humanized! AI artifacts removed and human feel added."
190
 
191
  except Exception as e:
192
- return None, f"❌ Error: {str(e)}"
 
 
193
 
194
- # Simple interface focused on humanization
195
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
196
  gr.Markdown("""
197
  # 🎡 AI Song Humanizer
198
- **Remove AI Detection from Your Music - Make it Sound Human-Made**
199
 
200
- *Upload AI-generated songs β†’ Remove robotic artifacts β†’ Download human-sounding music*
201
  """)
202
 
203
  with gr.Row():
204
  with gr.Column(scale=1):
205
- gr.Markdown("### 1. Upload AI-Generated Song")
206
  input_audio = gr.Audio(
207
- sources=["upload"],
208
  type="filepath",
209
- label="Upload your complete AI song (MP3, WAV, etc.)"
 
210
  )
211
 
212
- gr.Markdown("### 2. Adjust Humanization")
213
  intensity = gr.Slider(
214
  0.1, 1.0, value=0.7,
215
- label="Humanization Strength",
216
- info="How much to remove AI artifacts and add human feel"
217
  )
218
 
219
  process_btn = gr.Button(
220
- "✨ Remove AI Artifacts",
221
  variant="primary",
222
  size="lg"
223
  )
224
 
225
  with gr.Column(scale=1):
226
- gr.Markdown("### 3. Download Human Version")
227
  output_audio = gr.Audio(
228
- label="Human-Sounding Song",
229
  type="filepath",
230
  interactive=False
231
  )
232
 
233
  status = gr.Textbox(
234
- label="Processing Status",
235
- interactive=False
 
236
  )
237
 
238
- with gr.Accordion("πŸ” What This Actually Does", open=True):
239
  gr.Markdown("""
240
- **This tool processes your EXISTING complete song to:**
241
 
242
- 🎡 **Remove AI Audio Artifacts:**
243
- - Reduces metallic/robotic frequencies
244
- - Removes perfect quantization
245
- - Eliminates sterile digital sound
246
-
247
- 🎀 **Add Human Performance Elements:**
248
- - Subtle timing variations (like human musicians)
249
- - Natural pitch fluctuations (vibrato, human imperfection)
250
- - Dynamic amplitude changes
251
-
252
- πŸŽ›οΈ **Add Analog Character:**
253
- - Natural room ambiance and reverb
254
- - Analog-style warmth and saturation
255
- - Tape-like characteristics
256
-
257
- **You keep:**
258
- - Your original melody
259
- - Your original arrangement
260
- - Your original vocals/instruments
261
  - Your complete song structure
262
-
263
- **What changes:**
264
- - Sounds like humans performed it
265
- - No more "AI sound"
266
- - Natural imperfections added
267
- - Warmer, more organic sound
268
-
269
- **Perfect for:** Making AI-generated songs undetectable as AI!
 
 
 
 
 
 
 
 
 
270
  """)
271
 
272
- # Processing
273
  process_btn.click(
274
  fn=humanize_song,
275
  inputs=[input_audio, intensity],
@@ -277,4 +320,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
277
  )
278
 
279
  if __name__ == "__main__":
280
- demo.launch()
 
3
  import tempfile
4
  import librosa
5
  import soundfile as sf
 
6
  from scipy import signal
7
+ import os
8
 
9
  class AIHumanizer:
10
  def __init__(self):
 
13
  def humanize_audio(self, audio_path, intensity=0.7):
14
  """Remove AI artifacts and make audio sound human-made"""
15
  try:
16
+ print(f"Loading audio from: {audio_path}")
17
+
18
+ # Load the full song - handle both mono and stereo
19
+ y, sr = librosa.load(audio_path, sr=None, mono=False)
20
+
21
+ print(f"Audio loaded: shape={y.shape}, sr={sr}, duration={len(y)/sr:.2f}s")
22
 
23
  # If stereo, process both channels
24
  if len(y.shape) > 1:
25
+ print("Processing stereo audio...")
26
  processed_channels = []
27
+ for i, channel in enumerate(y):
28
+ print(f"Processing channel {i+1}...")
29
  processed_channel = self.process_channel(channel, sr, intensity)
30
  processed_channels.append(processed_channel)
31
+ y_processed = np.array(processed_channels)
32
  else:
33
+ print("Processing mono audio...")
34
  y_processed = self.process_channel(y, sr, intensity)
35
+ y_processed = np.array([y_processed]) # Make it 2D for consistency
36
 
37
+ print("Audio processing completed successfully")
38
  return y_processed, sr
39
 
40
  except Exception as e:
41
+ print(f"Error in humanize_audio: {str(e)}")
42
  raise Exception(f"Humanization failed: {str(e)}")
43
 
44
  def process_channel(self, y, sr, intensity):
45
  """Process a single audio channel to remove AI artifacts"""
46
+ print(f"Processing channel: {len(y)} samples, intensity={intensity}")
47
+
48
+ # 1. Reduce robotic frequencies
49
  y_processed = self.reduce_ai_artifacts(y, sr, intensity)
50
 
51
+ # 2. Add timing variations
52
  y_processed = self.add_timing_variations(y_processed, sr, intensity)
53
 
54
+ # 3. Add pitch variations
55
  y_processed = self.add_pitch_variations(y_processed, sr, intensity)
56
 
57
+ # 4. Add room ambiance
58
  y_processed = self.add_room_ambiance(y_processed, sr, intensity)
59
 
60
+ # 5. Add analog warmth
61
  y_processed = self.add_analog_warmth(y_processed, sr, intensity)
62
 
63
  # 6. Reduce perfect quantization
 
67
 
68
  def reduce_ai_artifacts(self, y, sr, intensity):
69
  """Reduce common AI audio artifacts"""
70
+ # Reduce harsh frequencies in the 2kHz-6kHz range (common AI artifacts)
71
+ if sr > 4000: # Only if sample rate is high enough
72
+ sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
73
+ y_filtered = signal.sosfilt(sos, y)
74
+
75
+ # Blend with original based on intensity
76
+ y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
77
+ return y_processed
78
+ return y
79
 
80
  def add_timing_variations(self, y, sr, intensity):
81
+ """Add subtle timing variations"""
82
+ if intensity < 0.1:
83
+ return y
84
+
85
+ # Create small random speed variations
86
+ segment_size = int(sr * 2.0) # 2-second segments
87
+ segments = []
88
+
89
+ for i in range(0, len(y), segment_size):
90
+ segment = y[i:i+segment_size]
91
+ if len(segment) > 100: # Only process if segment is long enough
92
+ # Small speed variation
93
+ speed_var = 1.0 + np.random.normal(0, 0.004 * intensity)
94
+ new_length = int(len(segment) / speed_var)
95
+
96
+ if new_length > 0 and len(segment) > 0:
97
+ # Simple resampling for timing variation
98
+ original_indices = np.arange(len(segment))
99
+ new_indices = np.linspace(0, len(segment)-1, new_length)
100
+ segment_varied = np.interp(new_indices, original_indices, segment)
101
+
102
+ # Resample back to original length if needed
103
+ if len(segment_varied) != len(segment):
104
+ if len(segment_varied) > len(segment):
105
+ segment_varied = segment_varied[:len(segment)]
106
+ else:
107
+ segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied)))
108
+
109
+ segments.append(segment_varied)
110
+ else:
111
+ segments.append(segment)
112
+ else:
113
+ segments.append(segment)
114
 
115
+ if segments:
116
+ return np.concatenate(segments)
117
+ return y
118
 
119
  def add_pitch_variations(self, y, sr, intensity):
120
+ """Add subtle pitch variations"""
121
+ if intensity < 0.2:
122
+ return y
123
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  try:
125
+ # Use librosa for pitch shifting (more reliable)
126
+ n_steps = np.random.normal(0, 0.1 * intensity)
127
+ y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps, bins_per_octave=24)
128
+
129
+ # Blend with original
130
+ blend_factor = 0.15 * intensity
131
+ return y * (1 - blend_factor) + y_shifted * blend_factor
132
  except:
133
  return y
134
 
135
  def add_room_ambiance(self, y, sr, intensity):
136
+ """Add natural room reverb"""
137
+ if intensity < 0.1:
138
+ return y
139
+
140
+ # Simple impulse response for natural room
141
+ impulse_length = int(0.2 * sr) # 200ms reverb
142
+ if impulse_length < 10:
143
+ return y
144
+
145
  impulse = np.zeros(impulse_length)
146
 
147
  # Early reflections
148
+ early_reflections = int(0.01 * sr) # 10ms
149
+ if early_reflections < len(impulse):
150
+ impulse[early_reflections] = 0.6
151
 
152
  # Late reverb tail
153
+ reverb_start = min(early_reflections + 1, len(impulse))
154
+ if reverb_start < len(impulse):
155
+ tail_length = len(impulse) - reverb_start
156
+ decay = np.exp(-np.linspace(0, 8, tail_length))
157
+ impulse[reverb_start:] = decay * 0.3
158
 
159
+ # Normalize impulse
160
+ if np.max(np.abs(impulse)) > 0:
161
+ impulse = impulse / np.max(np.abs(impulse))
162
 
163
+ # Apply convolution
164
+ try:
165
+ y_reverb = signal.convolve(y, impulse, mode='same')
166
+ # Normalize to prevent clipping
167
+ if np.max(np.abs(y_reverb)) > 0:
168
+ y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
169
+
170
+ # Blend with original
171
+ blend_factor = 0.08 * intensity
172
+ return y * (1 - blend_factor) + y_reverb * blend_factor
173
+ except:
174
+ return y
175
 
176
  def add_analog_warmth(self, y, sr, intensity):
177
+ """Add analog-style warmth"""
178
  # Soft clipping saturation
179
+ saturation_amount = 1.0 + 0.3 * intensity
180
  y_saturated = np.tanh(y * saturation_amount) / saturation_amount
181
 
182
+ # Add subtle warmth with EQ
183
+ try:
184
+ # Gentle low-end boost
185
+ sos = signal.butter(2, 80, 'highpass', fs=sr, output='sos')
186
+ y_warm = signal.sosfilt(sos, y_saturated)
187
+
188
+ # Blend
189
+ blend_factor = 0.1 * intensity
190
+ return y * (1 - blend_factor) + y_warm * blend_factor
191
+ except:
192
+ return y_saturated
193
 
194
  def reduce_perfect_quantization(self, y, sr, intensity):
195
+ """Reduce perfectly quantized timing with amplitude variations"""
196
  # Add subtle random amplitude variations
197
  t = np.linspace(0, len(y)/sr, len(y))
198
 
199
+ # Low-frequency amplitude modulation
200
+ lfo_rate = 0.3 + 0.4 * intensity # Hz
201
+ lfo_depth = 0.03 * intensity
202
  amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
203
 
204
  # Random micro-variations
205
+ random_variation = 1.0 + np.random.normal(0, 0.01 * intensity, len(y))
206
 
207
  # Combine variations
208
  total_variation = amplitude_variation * random_variation
 
212
  def humanize_song(input_mp3, intensity):
213
  """Main humanization function"""
214
  if input_mp3 is None:
215
+ return None, "Please upload an audio file"
216
 
217
  humanizer = AIHumanizer()
218
 
219
  try:
220
+ print("Starting humanization process...")
221
+
222
  # Process the entire song to remove AI artifacts
223
  audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
224
 
225
+ print(f"Humanization complete. Saving audio: shape={audio_data.shape}, sr={sr}")
226
+
227
+ # Save as WAV (more reliable than MP3)
228
+ output_path = tempfile.mktemp(suffix='_humanized.wav')
229
+
230
+ # Ensure data is in correct format
231
+ if len(audio_data.shape) > 1:
232
+ audio_data = audio_data.T # Transpose for soundfile
233
+
234
  sf.write(output_path, audio_data, sr)
235
+ print(f"Audio saved successfully to: {output_path}")
236
 
237
  return output_path, "βœ… Song humanized! AI artifacts removed and human feel added."
238
 
239
  except Exception as e:
240
+ error_msg = f"❌ Error: {str(e)}"
241
+ print(error_msg)
242
+ return None, error_msg
243
 
244
+ # Simple and reliable interface
245
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
246
  gr.Markdown("""
247
  # 🎡 AI Song Humanizer
248
+ **Remove AI Detection - Make Your Songs Sound Human-Made**
249
 
250
+ *Upload your AI-generated song β†’ Remove robotic artifacts β†’ Download natural-sounding version*
251
  """)
252
 
253
  with gr.Row():
254
  with gr.Column(scale=1):
255
+ gr.Markdown("### 1. Upload AI Song")
256
  input_audio = gr.Audio(
257
+ sources=["upload", "microphone"],
258
  type="filepath",
259
+ label="Upload your complete AI-generated song",
260
+ editable=True
261
  )
262
 
263
+ gr.Markdown("### 2. Humanization Strength")
264
  intensity = gr.Slider(
265
  0.1, 1.0, value=0.7,
266
+ label="How much human feel to add",
267
+ info="Lower = subtle, Higher = more natural/organic"
268
  )
269
 
270
  process_btn = gr.Button(
271
+ "🎹 Humanize This Song",
272
  variant="primary",
273
  size="lg"
274
  )
275
 
276
  with gr.Column(scale=1):
277
+ gr.Markdown("### 3. Download Result")
278
  output_audio = gr.Audio(
279
+ label="Your Human-Sounding Song",
280
  type="filepath",
281
  interactive=False
282
  )
283
 
284
  status = gr.Textbox(
285
+ label="Status",
286
+ interactive=False,
287
+ max_lines=3
288
  )
289
 
290
+ with gr.Accordion("πŸ’‘ How It Works", open=True):
291
  gr.Markdown("""
292
+ **This tool processes your EXISTING song to remove AI characteristics:**
293
 
294
+ βœ… **Keeps Everything Original:**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  - Your complete song structure
296
+ - All vocals and instruments
297
+ - Melody and arrangement
298
+ - Everything you created
299
+
300
+ πŸŽ›οΈ **Removes AI Artifacts:**
301
+ - Robotic/metallic frequencies
302
+ - Perfect digital quantization
303
+ - Sterile, artificial sound
304
+ - AI-generated frequency patterns
305
+
306
+ 🎡 **Adds Human Elements:**
307
+ - Natural timing variations
308
+ - Subtle pitch fluctuations
309
+ - Room ambiance and warmth
310
+ - Analog-style character
311
+
312
+ **Result:** Your same song, but it sounds like humans performed it!
313
  """)
314
 
315
+ # Processing function
316
  process_btn.click(
317
  fn=humanize_song,
318
  inputs=[input_audio, intensity],
 
320
  )
321
 
322
  if __name__ == "__main__":
323
+ demo.launch(debug=True)