Spaces:

mippia
/

AI-Music-Detection-FST

Running on Zero

Seonghyeon Go

change import index

acb0b29 4 months ago

7.75 kB

	import spaces
	import gradio as gr
	import torch
	import librosa
	import numpy as np
	from inference import inference
	from huggingface_hub import hf_hub_download
	import os
	from pathlib import Path


	def download_models_from_hub():
	"""
	Download model checkpoints from Hugging Face Model Hub
	"""
	model_dir = Path("checkpoints")
	model_dir.mkdir(exist_ok=True)

	# Original checkpoint filenames on HF Hub
	models = {
	"main": "EmbeddingModel_MERT_768-epoch=0073-val_loss=0.1058-val_acc=0.9585-val_f1=0.9366-val_precision=0.9936-val_recall=0.8857.ckpt",
	"backup": "step=007000-val_loss=0.1831-val_acc=0.9278.ckpt"
	}

	downloaded_models = {}

	for model_name, filename in models.items():
	local_path = model_dir / filename

	if not local_path.exists():
	print(f"📥 Downloading {model_name} model from Hugging Face Hub...")
	model_path = hf_hub_download(
	repo_id="mippia/FST-checkpoints",
	filename=filename,
	local_dir=str(model_dir),
	local_dir_use_symlinks=False
	)
	print(f"✅ {model_name} model downloaded successfully!")
	downloaded_models[model_name] = str(local_path)
	else:
	print(f"✅ {model_name} model already exists locally")
	downloaded_models[model_name] = str(local_path)

	return downloaded_models

	@spaces.GPU
	def detect_ai_audio(audio_file):
	"""
	Detect whether the uploaded audio file was generated by AI
	"""
	if audio_file is None:
	return """
	<div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #ff6b6b22, #ff6b6b11);">
	<div style="font-size: 18px; color: #ff6b6b;">⚠️ Please upload an audio file</div>
	</div>
	"""

	try:
	result = inference(audio_file)

	# Format result with better styling
	if "AI" in str(result).upper() or "artificial" in str(result).lower() or "fake" in str(result).lower():
	status = "AI Generated"
	color = "#ff6b6b"
	confidence = "High confidence this audio was generated by AI"
	else:
	status = "Human Generated"
	color = "#51cf66"
	confidence = "High confidence this audio was created by humans"

	formatted_result = f"""
	<div style="text-align: center; padding: 25px; border-radius: 15px; background: linear-gradient(135deg, {color}22, {color}11); border: 2px solid {color}33;">
	<div style="font-size: 28px; font-weight: bold; color: {color}; margin-bottom: 10px;">{status}</div>
	<div style="font-size: 16px; color: #666; margin-bottom: 8px;">{confidence}</div>
	<div style="font-size: 14px; color: #888;">Raw output: {result}</div>
	</div>
	"""

	return formatted_result

	except Exception as e:
	error_result = f"""
	<div style="text-align: center; padding: 20px; border-radius: 10px; background: linear-gradient(135deg, #ff6b6b22, #ff6b6b11);">
	<div style="font-size: 20px; font-weight: bold; color: #ff6b6b; margin-bottom: 8px;">Error</div>
	<div style="font-size: 14px; color: #666;">Failed to process audio: {str(e)}</div>
	</div>
	"""
	return error_result

	# Custom CSS for modern design
	custom_css = """
	/* Global background gradient */
	.gradio-container {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	min-height: 100vh;
	}

	/* Main container styling */
	.main-container {
	background: rgba(255, 255, 255, 0.95) !important;
	backdrop-filter: blur(10px) !important;
	border-radius: 20px !important;
	box-shadow: 0 20px 40px rgba(0,0,0,0.1) !important;
	margin: 20px !important;
	padding: 30px !important;
	}

	/* Title styling */
	h1 {
	background: linear-gradient(135deg, #667eea, #764ba2) !important;
	-webkit-background-clip: text !important;
	-webkit-text-fill-color: transparent !important;
	text-align: center !important;
	font-size: 3em !important;
	font-weight: 800 !important;
	margin-bottom: 10px !important;
	}

	/* Description text */
	.gradio-markdown p {
	text-align: center !important;
	font-size: 1.2em !important;
	color: #555 !important;
	margin-bottom: 30px !important;
	}

	/* Audio upload component */
	.upload-container {
	background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
	border-radius: 15px !important;
	padding: 20px !important;
	border: none !important;
	box-shadow: 0 10px 30px rgba(240, 147, 251, 0.3) !important;
	transition: all 0.3s ease !important;
	}

	.upload-container:hover {
	transform: translateY(-5px) !important;
	box-shadow: 0 15px 40px rgba(240, 147, 251, 0.4) !important;
	}

	/* Output container */
	.output-container {
	background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%) !important;
	border-radius: 15px !important;
	padding: 20px !important;
	border: none !important;
	box-shadow: 0 10px 30px rgba(168, 237, 234, 0.3) !important;
	min-height: 150px !important;
	}

	/* Button styling */
	.gr-button {
	background: linear-gradient(135deg, #667eea, #764ba2) !important;
	border: none !important;
	border-radius: 25px !important;
	padding: 12px 30px !important;
	font-weight: 600 !important;
	color: white !important;
	box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4) !important;
	transition: all 0.3s ease !important;
	}

	.gr-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
	}

	/* Animation */
	@keyframes fadeInUp {
	from {
	opacity: 0;
	transform: translateY(30px);
	}
	to {
	opacity: 1;
	transform: translateY(0);
	}
	}

	.gradio-container > div {
	animation: fadeInUp 0.8s ease-out !important;
	}

	/* Responsive design */
	@media (max-width: 768px) {
	h1 {
	font-size: 2em !important;
	}

	.main-container {
	margin: 10px !important;
	padding: 20px !important;
	}
	}
	"""

	# Initialize the app
	print("🚀 Starting FST AI Audio Detection App...")
	print("📦 Initializing models...")

	# Download models at startup
	models = download_models_from_hub()

	# Check if main model is available
	if models.get("main"):
	print("✅ Main model ready for inference")
	else:
	print("⚠️ Warning: Main model not available, app may not work properly")

	# Create Gradio interface
	demo = gr.Interface(
	fn=detect_ai_audio,
	inputs=gr.Audio(
	type="filepath",
	label="Upload Audio File",
	elem_classes=["upload-container"]
	),
	outputs=gr.HTML(
	label="Detection Result",
	elem_classes=["output-container"]
	),
	title="AI Audio Detector",
	description="""
	<div style="text-align: center; font-size: 1.2em; color: #555; margin: 20px 0;">
	<p><strong>Advanced AI technology</strong> to accurately detect whether uploaded audio was generated by AI!</p>
	<p>Supported formats: MP3, WAV, M4A, FLAC and various audio formats</p>
	<p>Powered by Fusion Segment Transformer (FST) - ICASSP 2026</p>
	</div>
	""",
	examples=[],
	css=custom_css,
	theme=gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="purple",
	neutral_hue="gray",
	font=[gr.themes.GoogleFont("Inter"), "Arial", "sans-serif"]
	),
	elem_classes=["main-container"]
	)

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	show_api=False,
	show_error=True
	)