HumanizeBot / app.py
FILMITO's picture
Update app.py
058c00b verified
import gradio as gr
import numpy as np
import tempfile
import librosa
import soundfile as sf
from scipy import signal
import os
class AIHumanizer:
def __init__(self):
pass
def humanize_audio(self, audio_path, intensity=0.7):
"""Remove AI artifacts and make audio sound human-made"""
try:
print(f"Loading audio from: {audio_path}")
# Load the full song
y, sr = librosa.load(audio_path, sr=None, mono=False)
print(f"Audio loaded: shape={y.shape if hasattr(y, 'shape') else 'mono'}, sr={sr}")
# If stereo, process both channels
if len(y.shape) > 1:
print("Processing stereo audio...")
processed_channels = []
for i in range(y.shape[0]):
print(f"Processing channel {i+1}...")
processed_channel = self.process_channel(y[i], sr, intensity)
processed_channels.append(processed_channel)
y_processed = np.array(processed_channels)
else:
print("Processing mono audio...")
y_processed = self.process_channel(y, sr, intensity)
y_processed = np.array([y_processed])
print("Audio processing completed successfully")
return y_processed, sr
except Exception as e:
print(f"Error in humanize_audio: {str(e)}")
raise Exception(f"Humanization failed: {str(e)}")
def process_channel(self, y, sr, intensity):
"""Process a single audio channel to remove AI artifacts"""
print(f"Processing channel: {len(y)} samples")
# Store original for blending
y_original = y.copy()
# 1. Reduce robotic frequencies
y = self.reduce_ai_artifacts(y, sr, intensity)
# 2. Add timing variations
y = self.add_timing_variations(y, sr, intensity)
# 3. Add pitch variations
y = self.add_pitch_variations(y, sr, intensity)
# 4. Add room ambiance
y = self.add_room_ambiance(y, sr, intensity)
# 5. Add analog warmth
y = self.add_analog_warmth(y, sr, intensity)
# 6. Reduce perfect quantization
y = self.reduce_perfect_quantization(y, sr, intensity)
return y
def reduce_ai_artifacts(self, y, sr, intensity):
"""Reduce common AI audio artifacts"""
if sr > 4000 and intensity > 0.1:
try:
# Reduce harsh frequencies in the 2kHz-6kHz range
sos = signal.butter(4, [1900, 6100], 'bandstop', fs=sr, output='sos')
y_filtered = signal.sosfilt(sos, y)
# Blend with original
blend_factor = 0.3 * intensity
return y * (1 - blend_factor) + y_filtered * blend_factor
except:
return y
return y
def add_timing_variations(self, y, sr, intensity):
"""Add subtle timing variations"""
if intensity < 0.2:
return y
try:
# Simple approach: small random stretches
segment_size = int(sr * 1.0) # 1-second segments
if len(y) < segment_size * 2:
return y
segments = []
for i in range(0, len(y), segment_size):
segment = y[i:i+segment_size]
if len(segment) == segment_size:
# Small random stretch
stretch = 1.0 + np.random.uniform(-0.01, 0.01) * intensity
new_len = int(segment_size * stretch)
# Resample
x_old = np.linspace(0, 1, segment_size)
x_new = np.linspace(0, 1, new_len)
segment_stretched = np.interp(x_new, x_old, segment)
# Trim or pad to original length
if len(segment_stretched) > segment_size:
segment_stretched = segment_stretched[:segment_size]
else:
segment_stretched = np.pad(segment_stretched, (0, segment_size - len(segment_stretched)))
segments.append(segment_stretched)
else:
segments.append(segment)
return np.concatenate(segments)
except:
return y
def add_pitch_variations(self, y, sr, intensity):
"""Add subtle pitch variations"""
if intensity < 0.3:
return y
try:
# Small random pitch shifts
n_steps = np.random.uniform(-0.2, 0.2) * intensity
y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
# Blend
blend_factor = 0.2 * intensity
return y * (1 - blend_factor) + y_shifted * blend_factor
except:
return y
def add_room_ambiance(self, y, sr, intensity):
"""Add natural room reverb"""
if intensity < 0.2:
return y
try:
# Simple reverb impulse
impulse_len = int(0.15 * sr)
if impulse_len < 10:
return y
impulse = np.zeros(impulse_len)
# Early reflection
early = int(0.01 * sr)
if early < impulse_len:
impulse[early] = 0.8
# Reverb tail
tail_start = min(early + 1, impulse_len)
if tail_start < impulse_len:
tail_len = impulse_len - tail_start
decay = np.exp(-np.linspace(0, 6, tail_len))
impulse[tail_start:] = decay * 0.4
# Apply convolution
y_reverb = signal.convolve(y, impulse, mode='same')
# Normalize
if np.max(np.abs(y_reverb)) > 0:
y_reverb = y_reverb / np.max(np.abs(y_reverb)) * np.max(np.abs(y))
# Blend
blend_factor = 0.1 * intensity
return y * (1 - blend_factor) + y_reverb * blend_factor
except:
return y
def add_analog_warmth(self, y, sr, intensity):
"""Add analog-style warmth"""
if intensity < 0.1:
return y
try:
# Soft clipping
saturation = 1.0 + 0.4 * intensity
y_warm = np.tanh(y * saturation) / saturation
# Gentle low boost
if sr > 1000:
sos = signal.butter(2, 100, 'high', fs=sr, output='sos')
y_warm = signal.sosfilt(sos, y_warm)
blend_factor = 0.15 * intensity
return y * (1 - blend_factor) + y_warm * blend_factor
except:
return y
def reduce_perfect_quantization(self, y, sr, intensity):
"""Reduce perfectly quantized timing"""
if intensity < 0.1:
return y
# Add subtle amplitude variations
t = np.arange(len(y)) / sr
# Slow LFO for natural dynamics
lfo1 = 1.0 + np.sin(2 * np.pi * 0.3 * t) * 0.02 * intensity
# Faster LFO for micro-variations
lfo2 = 1.0 + np.sin(2 * np.pi * 2.0 * t) * 0.01 * intensity
# Random noise
noise = 1.0 + np.random.normal(0, 0.005 * intensity, len(y))
combined = lfo1 * lfo2 * noise
return y * combined
def humanize_song(input_audio, intensity):
"""Main humanization function"""
if input_audio is None:
return None, "Please upload an audio file"
humanizer = AIHumanizer()
try:
print("Starting humanization...")
# Get the file path from the audio input
audio_path = input_audio
# Process the audio
audio_data, sr = humanizer.humanize_audio(audio_path, intensity)
print(f"Processing complete. Saving audio...")
# Save as WAV
output_path = tempfile.mktemp(suffix='_humanized.wav')
# Handle stereo/mono properly
if audio_data.shape[0] == 1:
# Mono
sf.write(output_path, audio_data[0], sr)
else:
# Stereo - transpose for soundfile
sf.write(output_path, audio_data.T, sr)
print(f"Saved to: {output_path}")
return output_path, "βœ… Success! Your song now sounds human-made. Download below."
except Exception as e:
error_msg = f"❌ Error: {str(e)}"
print(f"Error details: {error_msg}")
return None, error_msg
# Simple and compatible interface
with gr.Blocks(theme=gr.themes.Soft(), title="AI Song Humanizer") as demo:
gr.Markdown("""
# 🎡 AI Song Humanizer
**Remove AI Detection - Make Songs Sound Human-Made**
*Upload AI song β†’ Remove robotic sound β†’ Download human version*
""")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 1. Upload AI Song")
input_audio = gr.Audio(
sources=["upload"],
type="filepath",
label="Upload your AI-generated song"
)
gr.Markdown("### 2. Humanization Strength")
intensity = gr.Slider(
0.1, 1.0, value=0.7,
label="How much human feel to add"
)
process_btn = gr.Button(
"✨ Humanize Song",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
gr.Markdown("### 3. Download Result")
output_audio = gr.Audio(
label="Human-Sounding Version",
type="filepath"
)
status = gr.Textbox(
label="Status",
interactive=False
)
with gr.Accordion("πŸ“– How It Works", open=False):
gr.Markdown("""
**This tool:**
- Takes your complete AI-generated song
- Removes robotic/AI artifacts
- Adds natural human performance variations
- Keeps your original music intact
- Makes it sound like humans performed it
**Perfect for making AI music undetectable!**
""")
process_btn.click(
fn=humanize_song,
inputs=[input_audio, intensity],
outputs=[output_audio, status]
)
if __name__ == "__main__":
demo.launch()