FILMITO commited on
Commit
058c00b
Β·
verified Β·
1 Parent(s): 20ef8ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -161
app.py CHANGED
@@ -15,24 +15,24 @@ class AIHumanizer:
15
  try:
16
  print(f"Loading audio from: {audio_path}")
17
 
18
- # Load the full song - handle both mono and stereo
19
  y, sr = librosa.load(audio_path, sr=None, mono=False)
20
 
21
- print(f"Audio loaded: shape={y.shape}, sr={sr}, duration={len(y)/sr:.2f}s")
22
 
23
  # If stereo, process both channels
24
  if len(y.shape) > 1:
25
  print("Processing stereo audio...")
26
  processed_channels = []
27
- for i, channel in enumerate(y):
28
  print(f"Processing channel {i+1}...")
29
- processed_channel = self.process_channel(channel, sr, intensity)
30
  processed_channels.append(processed_channel)
31
  y_processed = np.array(processed_channels)
32
  else:
33
  print("Processing mono audio...")
34
  y_processed = self.process_channel(y, sr, intensity)
35
- y_processed = np.array([y_processed]) # Make it 2D for consistency
36
 
37
  print("Audio processing completed successfully")
38
  return y_processed, sr
@@ -43,232 +43,235 @@ class AIHumanizer:
43
 
44
  def process_channel(self, y, sr, intensity):
45
  """Process a single audio channel to remove AI artifacts"""
46
- print(f"Processing channel: {len(y)} samples, intensity={intensity}")
 
 
 
47
 
48
  # 1. Reduce robotic frequencies
49
- y_processed = self.reduce_ai_artifacts(y, sr, intensity)
50
 
51
  # 2. Add timing variations
52
- y_processed = self.add_timing_variations(y_processed, sr, intensity)
53
 
54
  # 3. Add pitch variations
55
- y_processed = self.add_pitch_variations(y_processed, sr, intensity)
56
 
57
  # 4. Add room ambiance
58
- y_processed = self.add_room_ambiance(y_processed, sr, intensity)
59
 
60
  # 5. Add analog warmth
61
- y_processed = self.add_analog_warmth(y_processed, sr, intensity)
62
 
63
  # 6. Reduce perfect quantization
64
- y_processed = self.reduce_perfect_quantization(y_processed, sr, intensity)
65
 
66
- return y_processed
67
 
68
  def reduce_ai_artifacts(self, y, sr, intensity):
69
  """Reduce common AI audio artifacts"""
70
- # Reduce harsh frequencies in the 2kHz-6kHz range (common AI artifacts)
71
- if sr > 4000: # Only if sample rate is high enough
72
- sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
73
- y_filtered = signal.sosfilt(sos, y)
74
-
75
- # Blend with original based on intensity
76
- y_processed = y * (1 - intensity*0.3) + y_filtered * (intensity*0.3)
77
- return y_processed
 
 
 
78
  return y
79
 
80
  def add_timing_variations(self, y, sr, intensity):
81
  """Add subtle timing variations"""
82
- if intensity < 0.1:
83
  return y
84
 
85
- # Create small random speed variations
86
- segment_size = int(sr * 2.0) # 2-second segments
87
- segments = []
88
-
89
- for i in range(0, len(y), segment_size):
90
- segment = y[i:i+segment_size]
91
- if len(segment) > 100: # Only process if segment is long enough
92
- # Small speed variation
93
- speed_var = 1.0 + np.random.normal(0, 0.004 * intensity)
94
- new_length = int(len(segment) / speed_var)
95
 
96
- if new_length > 0 and len(segment) > 0:
97
- # Simple resampling for timing variation
98
- original_indices = np.arange(len(segment))
99
- new_indices = np.linspace(0, len(segment)-1, new_length)
100
- segment_varied = np.interp(new_indices, original_indices, segment)
 
 
 
 
 
 
 
101
 
102
- # Resample back to original length if needed
103
- if len(segment_varied) != len(segment):
104
- if len(segment_varied) > len(segment):
105
- segment_varied = segment_varied[:len(segment)]
106
- else:
107
- segment_varied = np.pad(segment_varied, (0, len(segment) - len(segment_varied)))
108
 
109
- segments.append(segment_varied)
110
  else:
111
  segments.append(segment)
112
- else:
113
- segments.append(segment)
114
-
115
- if segments:
116
  return np.concatenate(segments)
117
- return y
 
118
 
119
  def add_pitch_variations(self, y, sr, intensity):
120
  """Add subtle pitch variations"""
121
- if intensity < 0.2:
122
  return y
123
 
124
  try:
125
- # Use librosa for pitch shifting (more reliable)
126
- n_steps = np.random.normal(0, 0.1 * intensity)
127
- y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps, bins_per_octave=24)
128
 
129
- # Blend with original
130
- blend_factor = 0.15 * intensity
131
  return y * (1 - blend_factor) + y_shifted * blend_factor
132
  except:
133
  return y
134
 
135
  def add_room_ambiance(self, y, sr, intensity):
136
  """Add natural room reverb"""
137
- if intensity < 0.1:
138
- return y
139
-
140
- # Simple impulse response for natural room
141
- impulse_length = int(0.2 * sr) # 200ms reverb
142
- if impulse_length < 10:
143
  return y
144
 
145
- impulse = np.zeros(impulse_length)
146
-
147
- # Early reflections
148
- early_reflections = int(0.01 * sr) # 10ms
149
- if early_reflections < len(impulse):
150
- impulse[early_reflections] = 0.6
151
-
152
- # Late reverb tail
153
- reverb_start = min(early_reflections + 1, len(impulse))
154
- if reverb_start < len(impulse):
155
- tail_length = len(impulse) - reverb_start
156
- decay = np.exp(-np.linspace(0, 8, tail_length))
157
- impulse[reverb_start:] = decay * 0.3
158
-
159
- # Normalize impulse
160
- if np.max(np.abs(impulse)) > 0:
161
- impulse = impulse / np.max(np.abs(impulse))
162
-
163
- # Apply convolution
164
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  y_reverb = signal.convolve(y, impulse, mode='same')
166
- # Normalize to prevent clipping
167
  if np.max(np.abs(y_reverb)) > 0:
168
  y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
169
 
170
- # Blend with original
171
- blend_factor = 0.08 * intensity
172
  return y * (1 - blend_factor) + y_reverb * blend_factor
173
  except:
174
  return y
175
 
176
  def add_analog_warmth(self, y, sr, intensity):
177
  """Add analog-style warmth"""
178
- # Soft clipping saturation
179
- saturation_amount = 1.0 + 0.3 * intensity
180
- y_saturated = np.tanh(y * saturation_amount) / saturation_amount
181
-
182
- # Add subtle warmth with EQ
183
  try:
184
- # Gentle low-end boost
185
- sos = signal.butter(2, 80, 'highpass', fs=sr, output='sos')
186
- y_warm = signal.sosfilt(sos, y_saturated)
187
 
188
- # Blend
189
- blend_factor = 0.1 * intensity
 
 
 
 
190
  return y * (1 - blend_factor) + y_warm * blend_factor
191
  except:
192
- return y_saturated
193
 
194
  def reduce_perfect_quantization(self, y, sr, intensity):
195
- """Reduce perfectly quantized timing with amplitude variations"""
196
- # Add subtle random amplitude variations
197
- t = np.linspace(0, len(y)/sr, len(y))
198
-
199
- # Low-frequency amplitude modulation
200
- lfo_rate = 0.3 + 0.4 * intensity # Hz
201
- lfo_depth = 0.03 * intensity
202
- amplitude_variation = 1.0 + np.sin(2 * np.pi * lfo_rate * t) * lfo_depth
203
-
204
- # Random micro-variations
205
- random_variation = 1.0 + np.random.normal(0, 0.01 * intensity, len(y))
206
-
207
- # Combine variations
208
- total_variation = amplitude_variation * random_variation
209
 
210
- return y * total_variation
 
211
 
212
- def humanize_song(input_mp3, intensity):
213
  """Main humanization function"""
214
- if input_mp3 is None:
215
  return None, "Please upload an audio file"
216
 
217
  humanizer = AIHumanizer()
218
 
219
  try:
220
- print("Starting humanization process...")
221
 
222
- # Process the entire song to remove AI artifacts
223
- audio_data, sr = humanizer.humanize_audio(input_mp3, intensity)
224
 
225
- print(f"Humanization complete. Saving audio: shape={audio_data.shape}, sr={sr}")
 
226
 
227
- # Save as WAV (more reliable than MP3)
228
- output_path = tempfile.mktemp(suffix='_humanized.wav')
229
 
230
- # Ensure data is in correct format
231
- if len(audio_data.shape) > 1:
232
- audio_data = audio_data.T # Transpose for soundfile
233
 
234
- sf.write(output_path, audio_data, sr)
235
- print(f"Audio saved successfully to: {output_path}")
 
 
 
 
 
236
 
237
- return output_path, "βœ… Song humanized! AI artifacts removed and human feel added."
 
238
 
239
  except Exception as e:
240
  error_msg = f"❌ Error: {str(e)}"
241
- print(error_msg)
242
  return None, error_msg
243
 
244
- # Simple and reliable interface
245
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
246
  gr.Markdown("""
247
  # 🎡 AI Song Humanizer
248
- **Remove AI Detection - Make Your Songs Sound Human-Made**
249
 
250
- *Upload your AI-generated song β†’ Remove robotic artifacts β†’ Download natural-sounding version*
251
  """)
252
 
253
  with gr.Row():
254
  with gr.Column(scale=1):
255
  gr.Markdown("### 1. Upload AI Song")
256
  input_audio = gr.Audio(
257
- sources=["upload", "microphone"],
258
  type="filepath",
259
- label="Upload your complete AI-generated song",
260
- editable=True
261
  )
262
 
263
  gr.Markdown("### 2. Humanization Strength")
264
  intensity = gr.Slider(
265
  0.1, 1.0, value=0.7,
266
- label="How much human feel to add",
267
- info="Lower = subtle, Higher = more natural/organic"
268
  )
269
 
270
  process_btn = gr.Button(
271
- "🎹 Humanize This Song",
272
  variant="primary",
273
  size="lg"
274
  )
@@ -276,43 +279,27 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
276
  with gr.Column(scale=1):
277
  gr.Markdown("### 3. Download Result")
278
  output_audio = gr.Audio(
279
- label="Your Human-Sounding Song",
280
- type="filepath",
281
- interactive=False
282
  )
283
 
284
  status = gr.Textbox(
285
  label="Status",
286
- interactive=False,
287
- max_lines=3
288
  )
289
 
290
- with gr.Accordion("πŸ’‘ How It Works", open=True):
291
  gr.Markdown("""
292
- **This tool processes your EXISTING song to remove AI characteristics:**
293
-
294
- βœ… **Keeps Everything Original:**
295
- - Your complete song structure
296
- - All vocals and instruments
297
- - Melody and arrangement
298
- - Everything you created
299
-
300
- πŸŽ›οΈ **Removes AI Artifacts:**
301
- - Robotic/metallic frequencies
302
- - Perfect digital quantization
303
- - Sterile, artificial sound
304
- - AI-generated frequency patterns
305
-
306
- 🎡 **Adds Human Elements:**
307
- - Natural timing variations
308
- - Subtle pitch fluctuations
309
- - Room ambiance and warmth
310
- - Analog-style character
311
 
312
- **Result:** Your same song, but it sounds like humans performed it!
313
  """)
314
 
315
- # Processing function
316
  process_btn.click(
317
  fn=humanize_song,
318
  inputs=[input_audio, intensity],
@@ -320,4 +307,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
320
  )
321
 
322
  if __name__ == "__main__":
323
- demo.launch(debug=True)
 
15
  try:
16
  print(f"Loading audio from: {audio_path}")
17
 
18
+ # Load the full song
19
  y, sr = librosa.load(audio_path, sr=None, mono=False)
20
 
21
+ print(f"Audio loaded: shape={y.shape if hasattr(y, 'shape') else 'mono'}, sr={sr}")
22
 
23
  # If stereo, process both channels
24
  if len(y.shape) > 1:
25
  print("Processing stereo audio...")
26
  processed_channels = []
27
+ for i in range(y.shape[0]):
28
  print(f"Processing channel {i+1}...")
29
+ processed_channel = self.process_channel(y[i], sr, intensity)
30
  processed_channels.append(processed_channel)
31
  y_processed = np.array(processed_channels)
32
  else:
33
  print("Processing mono audio...")
34
  y_processed = self.process_channel(y, sr, intensity)
35
+ y_processed = np.array([y_processed])
36
 
37
  print("Audio processing completed successfully")
38
  return y_processed, sr
 
43
 
44
  def process_channel(self, y, sr, intensity):
45
  """Process a single audio channel to remove AI artifacts"""
46
+ print(f"Processing channel: {len(y)} samples")
47
+
48
+ # Store original for blending
49
+ y_original = y.copy()
50
 
51
  # 1. Reduce robotic frequencies
52
+ y = self.reduce_ai_artifacts(y, sr, intensity)
53
 
54
  # 2. Add timing variations
55
+ y = self.add_timing_variations(y, sr, intensity)
56
 
57
  # 3. Add pitch variations
58
+ y = self.add_pitch_variations(y, sr, intensity)
59
 
60
  # 4. Add room ambiance
61
+ y = self.add_room_ambiance(y, sr, intensity)
62
 
63
  # 5. Add analog warmth
64
+ y = self.add_analog_warmth(y, sr, intensity)
65
 
66
  # 6. Reduce perfect quantization
67
+ y = self.reduce_perfect_quantization(y, sr, intensity)
68
 
69
+ return y
70
 
71
  def reduce_ai_artifacts(self, y, sr, intensity):
72
  """Reduce common AI audio artifacts"""
73
+ if sr > 4000 and intensity > 0.1:
74
+ try:
75
+ # Reduce harsh frequencies in the 2kHz-6kHz range
76
+ sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
77
+ y_filtered = signal.sosfilt(sos, y)
78
+
79
+ # Blend with original
80
+ blend_factor = 0.3 * intensity
81
+ return y * (1 - blend_factor) + y_filtered * blend_factor
82
+ except:
83
+ return y
84
  return y
85
 
86
  def add_timing_variations(self, y, sr, intensity):
87
  """Add subtle timing variations"""
88
+ if intensity < 0.2:
89
  return y
90
 
91
+ try:
92
+ # Simple approach: small random stretches
93
+ segment_size = int(sr * 1.0) # 1-second segments
94
+ if len(y) < segment_size * 2:
95
+ return y
 
 
 
 
 
96
 
97
+ segments = []
98
+ for i in range(0, len(y), segment_size):
99
+ segment = y[i:i+segment_size]
100
+ if len(segment) == segment_size:
101
+ # Small random stretch
102
+ stretch = 1.0 + np.random.uniform(-0.01, 0.01) * intensity
103
+ new_len = int(segment_size * stretch)
104
+
105
+ # Resample
106
+ x_old = np.linspace(0, 1, segment_size)
107
+ x_new = np.linspace(0, 1, new_len)
108
+ segment_stretched = np.interp(x_new, x_old, segment)
109
 
110
+ # Trim or pad to original length
111
+ if len(segment_stretched) > segment_size:
112
+ segment_stretched = segment_stretched[:segment_size]
113
+ else:
114
+ segment_stretched = np.pad(segment_stretched, (0, segment_size - len(segment_stretched)))
 
115
 
116
+ segments.append(segment_stretched)
117
  else:
118
  segments.append(segment)
119
+
 
 
 
120
  return np.concatenate(segments)
121
+ except:
122
+ return y
123
 
124
  def add_pitch_variations(self, y, sr, intensity):
125
  """Add subtle pitch variations"""
126
+ if intensity < 0.3:
127
  return y
128
 
129
  try:
130
+ # Small random pitch shifts
131
+ n_steps = np.random.uniform(-0.2, 0.2) * intensity
132
+ y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
133
 
134
+ # Blend
135
+ blend_factor = 0.2 * intensity
136
  return y * (1 - blend_factor) + y_shifted * blend_factor
137
  except:
138
  return y
139
 
140
  def add_room_ambiance(self, y, sr, intensity):
141
  """Add natural room reverb"""
142
+ if intensity < 0.2:
 
 
 
 
 
143
  return y
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  try:
146
+ # Simple reverb impulse
147
+ impulse_len = int(0.15 * sr)
148
+ if impulse_len < 10:
149
+ return y
150
+
151
+ impulse = np.zeros(impulse_len)
152
+ # Early reflection
153
+ early = int(0.01 * sr)
154
+ if early < impulse_len:
155
+ impulse[early] = 0.8
156
+ # Reverb tail
157
+ tail_start = min(early + 1, impulse_len)
158
+ if tail_start < impulse_len:
159
+ tail_len = impulse_len - tail_start
160
+ decay = np.exp(-np.linspace(0, 6, tail_len))
161
+ impulse[tail_start:] = decay * 0.4
162
+
163
+ # Apply convolution
164
  y_reverb = signal.convolve(y, impulse, mode='same')
165
+ # Normalize
166
  if np.max(np.abs(y_reverb)) > 0:
167
  y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
168
 
169
+ # Blend
170
+ blend_factor = 0.1 * intensity
171
  return y * (1 - blend_factor) + y_reverb * blend_factor
172
  except:
173
  return y
174
 
175
  def add_analog_warmth(self, y, sr, intensity):
176
  """Add analog-style warmth"""
177
+ if intensity < 0.1:
178
+ return y
179
+
 
 
180
  try:
181
+ # Soft clipping
182
+ saturation = 1.0 + 0.4 * intensity
183
+ y_warm = np.tanh(y * saturation) / saturation
184
 
185
+ # Gentle low boost
186
+ if sr > 1000:
187
+ sos = signal.butter(2, 100, 'high', fs=sr, output='sos')
188
+ y_warm = signal.sosfilt(sos, y_warm)
189
+
190
+ blend_factor = 0.15 * intensity
191
  return y * (1 - blend_factor) + y_warm * blend_factor
192
  except:
193
+ return y
194
 
195
  def reduce_perfect_quantization(self, y, sr, intensity):
196
+ """Reduce perfectly quantized timing"""
197
+ if intensity < 0.1:
198
+ return y
199
+
200
+ # Add subtle amplitude variations
201
+ t = np.arange(len(y)) / sr
202
+ # Slow LFO for natural dynamics
203
+ lfo1 = 1.0 + np.sin(2 * np.pi * 0.3 * t) * 0.02 * intensity
204
+ # Faster LFO for micro-variations
205
+ lfo2 = 1.0 + np.sin(2 * np.pi * 2.0 * t) * 0.01 * intensity
206
+ # Random noise
207
+ noise = 1.0 + np.random.normal(0, 0.005 * intensity, len(y))
 
 
208
 
209
+ combined = lfo1 * lfo2 * noise
210
+ return y * combined
211
 
212
+ def humanize_song(input_audio, intensity):
213
  """Main humanization function"""
214
+ if input_audio is None:
215
  return None, "Please upload an audio file"
216
 
217
  humanizer = AIHumanizer()
218
 
219
  try:
220
+ print("Starting humanization...")
221
 
222
+ # Get the file path from the audio input
223
+ audio_path = input_audio
224
 
225
+ # Process the audio
226
+ audio_data, sr = humanizer.humanize_audio(audio_path, intensity)
227
 
228
+ print(f"Processing complete. Saving audio...")
 
229
 
230
+ # Save as WAV
231
+ output_path = tempfile.mktemp(suffix='_humanized.wav')
 
232
 
233
+ # Handle stereo/mono properly
234
+ if audio_data.shape[0] == 1:
235
+ # Mono
236
+ sf.write(output_path, audio_data[0], sr)
237
+ else:
238
+ # Stereo - transpose for soundfile
239
+ sf.write(output_path, audio_data.T, sr)
240
 
241
+ print(f"Saved to: {output_path}")
242
+ return output_path, "βœ… Success! Your song now sounds human-made. Download below."
243
 
244
  except Exception as e:
245
  error_msg = f"❌ Error: {str(e)}"
246
+ print(f"Error details: {error_msg}")
247
  return None, error_msg
248
 
249
+ # Simple and compatible interface
250
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
251
  gr.Markdown("""
252
  # 🎡 AI Song Humanizer
253
+ **Remove AI Detection - Make Songs Sound Human-Made**
254
 
255
+ *Upload AI song β†’ Remove robotic sound β†’ Download human version*
256
  """)
257
 
258
  with gr.Row():
259
  with gr.Column(scale=1):
260
  gr.Markdown("### 1. Upload AI Song")
261
  input_audio = gr.Audio(
262
+ sources=["upload"],
263
  type="filepath",
264
+ label="Upload your AI-generated song"
 
265
  )
266
 
267
  gr.Markdown("### 2. Humanization Strength")
268
  intensity = gr.Slider(
269
  0.1, 1.0, value=0.7,
270
+ label="How much human feel to add"
 
271
  )
272
 
273
  process_btn = gr.Button(
274
+ "✨ Humanize Song",
275
  variant="primary",
276
  size="lg"
277
  )
 
279
  with gr.Column(scale=1):
280
  gr.Markdown("### 3. Download Result")
281
  output_audio = gr.Audio(
282
+ label="Human-Sounding Version",
283
+ type="filepath"
 
284
  )
285
 
286
  status = gr.Textbox(
287
  label="Status",
288
+ interactive=False
 
289
  )
290
 
291
+ with gr.Accordion("πŸ“– How It Works", open=False):
292
  gr.Markdown("""
293
+ **This tool:**
294
+ - Takes your complete AI-generated song
295
+ - Removes robotic/AI artifacts
296
+ - Adds natural human performance variations
297
+ - Keeps your original music intact
298
+ - Makes it sound like humans performed it
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
+ **Perfect for making AI music undetectable!**
301
  """)
302
 
 
303
  process_btn.click(
304
  fn=humanize_song,
305
  inputs=[input_audio, intensity],
 
307
  )
308
 
309
  if __name__ == "__main__":
310
+ demo.launch()