Spaces:

MCP-1st-Birthday
/

voicekit

Running

App Files Files Community

voicekit / app.py

jjin6573

Upload folder using huggingface_hub

7ae2c28 verified 13 days ago

raw

history blame contribute delete

109 kB

	"""
	VoiceKit - MCP Server for Voice Analysis

	6 MCP tools for voice processing (all accept base64 audio):
	- Embedding extraction, voice comparison, acoustic analysis
	- Speech-to-text, voice isolation, similarity analysis

	MCP Endpoint: https://mcp-1st-birthday-voicekit.hf.space/gradio_api/mcp/sse
	"""

	import gradio as gr
	import base64
	import os
	import json
	import tempfile
	import math
	import re

	# Set Gradio temp directory to current directory
	GRADIO_TEMP_DIR = os.path.join(os.getcwd(), "gradio_temp")
	os.makedirs(GRADIO_TEMP_DIR, exist_ok=True)
	os.environ['GRADIO_TEMP_DIR'] = GRADIO_TEMP_DIR
	tempfile.tempdir = GRADIO_TEMP_DIR

	# Modal connection (requires MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets)
	try:
	import modal
	AudioAnalyzer = modal.Cls.from_name("voice-semantle", "AudioAnalyzer")
	analyzer = AudioAnalyzer()
	modal_available = True
	print("Modal connected!")
	except Exception as e:
	modal_available = False
	analyzer = None
	print(f"Modal not available: {e}")


	# Load README.md and convert to HTML
	def load_readme_as_html():
	"""Load README.md and convert markdown to HTML"""
	try:
	with open("README.md", "r", encoding="utf-8") as f:
	content = f.read()

	# Remove YAML front matter
	content = re.sub(r'^---\n.*?\n---\n', '', content, flags=re.DOTALL)

	html = content

	# Headers
	html = re.sub(r'^### (.+)$', r'<h3>\1</h3>', html, flags=re.MULTILINE)
	html = re.sub(r'^## (.+)$', r'<h2>\1</h2>', html, flags=re.MULTILINE)
	html = re.sub(r'^# (.+)$', r'<h1>\1</h1>', html, flags=re.MULTILINE)

	# Code blocks - preserve content without adding extra newlines
	def format_code_block(match):
	code = match.group(2).strip()
	# Replace internal newlines with a placeholder, then restore after processing
	# This prevents the paragraph logic from adding extra breaks
	code_escaped = code.replace('\n', '<!-- NEWLINE -->')
	return f'<pre><code>{code_escaped}</code></pre>'
	html = re.sub(r'```(\w)\n(.?)```', format_code_block, html, flags=re.DOTALL)

	# Images - convert relative paths to HuggingFace raw file URLs
	# Handle both <img> tags and markdown image syntax
	HF_BASE_URL = "https://huggingface.co/spaces/MCP-1st-Birthday/voicekit/resolve/main"

	def convert_image_path(match):
	src = match.group(1)
	# If it's a relative path (not starting with http), convert to HF URL
	if not src.startswith('http'):
	src = f"{HF_BASE_URL}/{src}"
	return f'<img src="{src}" style="max-width:100%; height:auto; border-radius:8px; margin:12px 0;">'

	html = re.sub(r'<img src="([^"]+)"[^>]*>', convert_image_path, html)

	# Inline code (but not inside <pre><code> blocks)
	html = re.sub(r'`([^`]+)`', r'<code>\1</code>', html)

	# Bold
	html = re.sub(r'\\(.+?)\\', r'<strong>\1</strong>', html)

	# Links
	html = re.sub(r'\[([^\]]+)\]$([^)]+)$', r'<a href="\2" target="_blank">\1</a>', html)

	# Tables
	lines = html.split('\n')
	in_table = False
	table_html = []
	new_lines = []

	for line in lines:
	if '\|' in line and line.strip().startswith('\|'):
	if not in_table:
	in_table = True
	table_html = ['<table>']

	if re.match(r'^\\|[\s\-:\|]+\\|$', line.strip()):
	continue

	cells = [c.strip() for c in line.strip().split('\|')[1:-1]]
	if len(table_html) == 1:
	table_html.append('<thead><tr>')
	for cell in cells:
	table_html.append(f'<th>{cell}</th>')
	table_html.append('</tr></thead><tbody>')
	else:
	table_html.append('<tr>')
	for cell in cells:
	table_html.append(f'<td>{cell}</td>')
	table_html.append('</tr>')
	else:
	if in_table:
	table_html.append('</tbody></table>')
	new_lines.append(''.join(table_html))
	table_html = []
	in_table = False
	new_lines.append(line)

	if in_table:
	table_html.append('</tbody></table>')
	new_lines.append(''.join(table_html))

	html = '\n'.join(new_lines)

	# Lists
	html = re.sub(r'^- (.+)$', r'<li>\1</li>', html, flags=re.MULTILINE)
	html = re.sub(r'(<li>.*</li>\n?)+', r'<ul>\g<0></ul>', html)

	# Paragraphs - skip lines that are inside pre/code blocks
	lines = html.split('\n')
	result = []
	for line in lines:
	stripped = line.strip()
	if stripped and not stripped.startswith('<') and not stripped.startswith('```'):
	result.append(f'<p>{stripped}</p>')
	else:
	result.append(line)

	# Join and restore newlines in code blocks
	final_html = '\n'.join(result)
	final_html = final_html.replace('<!-- NEWLINE -->', '\n')

	# Escape curly braces for f-string compatibility
	final_html = final_html.replace('{', '{{').replace('}', '}}')
	return final_html
	except Exception as e:
	return f"<p>Error loading README: {e}</p>"

	readme_html = load_readme_as_html()


	def file_to_base64(file_path: str) -> str:
	"""Convert file path to base64 string"""
	if not file_path:
	return ""
	with open(file_path, "rb") as f:
	return base64.b64encode(f.read()).decode()


	# ============================================================================
	# MCP Tools (all accept base64 directly)
	# ============================================================================

	def extract_embedding(audio_base64: str) -> str:
	"""
	Extract voice embedding using Wav2Vec2.

	Returns a 768-dimensional vector representing voice characteristics.
	Useful for voice comparison, speaker identification, etc.

	Args:
	audio_base64: Audio file as base64 encoded string

	Returns:
	embedding (768-dim list), model, dim
	"""
	if not modal_available:
	return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
	if not audio_base64:
	return json.dumps({"error": "No audio provided"})

	try:
	result = analyzer.extract_embedding.remote(audio_base64)
	if "embedding" in result:
	result["embedding_preview"] = result["embedding"][:5] + ["..."]
	result["embedding_length"] = len(result["embedding"])
	del result["embedding"]
	return json.dumps(result, ensure_ascii=False, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)})


	def match_voice(audio1_base64: str, audio2_base64: str) -> str:
	"""
	Compare similarity between two voices.

	Extracts Wav2Vec2 embeddings and calculates cosine similarity.
	Useful for checking if the same person spoke with similar tone.

	Args:
	audio1_base64: First audio as base64 encoded string
	audio2_base64: Second audio as base64 encoded string

	Returns:
	similarity (0-1), tone_score (0-100)
	"""
	if not modal_available:
	return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
	if not audio1_base64 or not audio2_base64:
	return json.dumps({"error": "Both audio files required"})

	try:
	result = analyzer.compare_voices.remote(audio1_base64, audio2_base64)
	return json.dumps(result, ensure_ascii=False, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)})


	def analyze_acoustics(audio_base64: str) -> str:
	"""
	Analyze acoustic features of audio.

	Extracts pitch, energy, rhythm, tempo, and spectral characteristics.
	Useful for understanding voice expressiveness and characteristics.

	Args:
	audio_base64: Audio file as base64 encoded string

	Returns:
	pitch, energy, rhythm, tempo, spectral information
	"""
	if not modal_available:
	return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
	if not audio_base64:
	return json.dumps({"error": "No audio provided"})

	try:
	result = analyzer.analyze_acoustic_features.remote(audio_base64)
	return json.dumps(result, ensure_ascii=False, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)})


	def transcribe_audio(audio_base64: str, language: str = "en") -> str:
	"""
	Convert audio to text (Speech-to-Text).

	Uses ElevenLabs Scribe v1 model for high-quality speech recognition.
	Supports various languages.

	Args:
	audio_base64: Audio file as base64 encoded string
	language: Language code (e.g., "en", "ko", "ja"). Default is "en"

	Returns:
	text, language, model
	"""
	if not modal_available:
	return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
	if not audio_base64:
	return json.dumps({"error": "No audio provided"})

	try:
	result = analyzer.transcribe_audio.remote(audio_base64, language)
	return json.dumps(result, ensure_ascii=False, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)})


	def isolate_voice(audio_base64: str) -> str:
	"""
	Remove background music (BGM) and extract voice only.

	Uses ElevenLabs Voice Isolator to remove music, noise, etc.
	Useful for memes, songs, and other audio with background sounds.

	Args:
	audio_base64: Audio file as base64 encoded string

	Returns:
	isolated_audio_base64, metadata (bgm_detected, sizes, duration)
	"""
	if not modal_available:
	return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
	if not audio_base64:
	return json.dumps({"error": "No audio provided"})

	try:
	result = analyzer.isolate_voice.remote(audio_base64)
	return json.dumps(result, ensure_ascii=False, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)})


	def grade_voice(
	user_audio_base64: str,
	reference_audio_base64: str,
	reference_text: str = "",
	category: str = "meme"
	) -> str:
	"""
	Comprehensively compare and analyze user voice with reference voice.

	Evaluates with 5 metrics:
	- pronunciation: Pronunciation accuracy (STT-based)
	- tone: Voice timbre similarity (Wav2Vec2 embedding)
	- pitch: Pitch matching
	- rhythm: Rhythm sense
	- energy: Energy expressiveness

	Args:
	user_audio_base64: User audio as base64 encoded string
	reference_audio_base64: Reference audio as base64 encoded string
	reference_text: Reference text (optional, enables pronunciation scoring)
	category: Category (meme, song, movie) - determines weights

	Returns:
	overall_score, metrics, weak_points, strong_points, feedback
	"""
	if not modal_available:
	return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."})
	if not user_audio_base64 or not reference_audio_base64:
	return json.dumps({"error": "Both user and reference audio required"})

	try:
	result = analyzer.analyze_audio.remote(
	user_audio_base64=user_audio_base64,
	reference_audio_base64=reference_audio_base64,
	reference_text=reference_text if reference_text else None,
	challenge_id="mcp_analysis",
	category=category,
	)
	# Simplify output for backend/API use
	metrics = result.get("metrics", {})
	simple_result = {
	"pitch": metrics.get("pitch", 0),
	"rhythm": metrics.get("rhythm", 0),
	"energy": metrics.get("energy", 0),
	"pronunciation": metrics.get("pronunciation", 0),
	"transcript": metrics.get("transcript", 0),
	"overall": result.get("overall_score", 0),
	"user_text": result.get("user_text", "")
	}
	return json.dumps(simple_result, ensure_ascii=False, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)})


	# ============================================================================
	# Demo Functions for UI
	# ============================================================================

	def demo_acoustic_analysis(audio_file):
	"""Acoustic Analysis - Analyze pitch, energy, rhythm, tempo"""
	if not audio_file:
	return create_acoustic_empty()

	audio_b64 = file_to_base64(audio_file)
	result_json = analyze_acoustics(audio_b64)

	try:
	result = json.loads(result_json)
	if "error" in result:
	return f'''<div style="color: #ef4444; padding: 20px; background: #fee; border-radius: 12px; border: 1px solid #fca5a5;">
	<strong>Error in result:</strong><br>{result.get("error", "Unknown error")}
	</div>'''
	return create_acoustic_visualization(result)
	except Exception as e:
	return f'''<div style="color: #ef4444; padding: 20px; background: #fee; border-radius: 12px; border: 1px solid #fca5a5;">
	<strong>Parsing Error:</strong> {str(e)}<br><br>
	<strong>Raw Result (first 500 chars):</strong><br>
	<code style="display: block; padding: 10px; background: white; border-radius: 4px; overflow-x: auto; font-size: 12px;">{result_json[:500]}</code>
	</div>'''


	def demo_transcribe_audio(audio_file, language):
	"""Audio Transcription"""
	if not audio_file:
	return create_transcription_empty()

	audio_b64 = file_to_base64(audio_file)
	result_json = transcribe_audio(audio_b64, language)

	try:
	result = json.loads(result_json)
	if "error" in result:
	return create_transcription_empty()
	text = result.get("text", "")
	return create_transcription_visualization(text)
	except:
	return create_transcription_empty()


	def demo_clean_extraction(audio_file):
	"""Clean Audio Extraction - returns audio file only"""
	if not audio_file:
	return None

	audio_b64 = file_to_base64(audio_file)
	result_json = isolate_voice(audio_b64)

	try:
	result = json.loads(result_json)
	if "error" in result:
	return None

	# Convert isolated audio base64 back to file
	import tempfile
	isolated_audio_bytes = base64.b64decode(result["isolated_audio_base64"])
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	tmp.write(isolated_audio_bytes)
	isolated_audio_path = tmp.name

	return isolated_audio_path
	except:
	return None


	def demo_extract_embedding(audio_file):
	"""Extract Embedding - extract voice fingerprint"""
	if not audio_file:
	return create_embedding_empty()

	audio_b64 = file_to_base64(audio_file)
	result_json = extract_embedding(audio_b64)

	try:
	result = json.loads(result_json)
	if "error" in result:
	return f'''<div style="color: #ef4444; padding: 20px; background: #fee; border-radius: 12px; border: 1px solid #fca5a5;">
	<strong>Error in result:</strong><br>{result.get("error", "Unknown error")}
	</div>'''
	return create_embedding_visualization(result)
	except Exception as e:
	return f'''<div style="color: #ef4444; padding: 20px; background: #fee; border-radius: 12px; border: 1px solid #fca5a5;">
	<strong>Parsing Error:</strong> {str(e)}<br><br>
	<strong>Raw Result (first 500 chars):</strong><br>
	<code style="display: block; padding: 10px; background: white; border-radius: 4px; overflow-x: auto; font-size: 12px;">{result_json[:500]}</code>
	</div>'''


	def demo_match_voice(audio1, audio2):
	"""Compare Voices - compare two voice similarities"""
	if not audio1 or not audio2:
	return create_compare_empty()

	audio1_b64 = file_to_base64(audio1)
	audio2_b64 = file_to_base64(audio2)
	result_json = match_voice(audio1_b64, audio2_b64)

	try:
	result = json.loads(result_json)
	if "error" in result:
	return create_compare_empty()
	return create_compare_visualization(result)
	except:
	return create_compare_empty()


	def demo_voice_similarity(user_audio, ref_audio):
	"""Voice Similarity - comprehensive 5-metric analysis"""
	if not user_audio or not ref_audio:
	return create_similarity_empty()

	user_b64 = file_to_base64(user_audio)
	ref_b64 = file_to_base64(ref_audio)
	result_json = grade_voice(user_b64, ref_b64, "", "meme")

	try:
	result = json.loads(result_json)
	if "error" in result:
	return create_similarity_empty()
	return create_similarity_visualization(result)
	except:
	return create_similarity_empty()


	# ============================================================================
	# Visualization Functions
	# ============================================================================

	def create_acoustic_empty():
	"""Empty state for acoustic analysis"""
	return """
	<div style="
	background: rgba(10, 10, 26, 0.4);
	border: 1px solid rgba(124, 58, 237, 0.2);
	border-radius: 16px;
	padding: 30px 20px;
	text-align: center;
	height: 100%;
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	">
	<div style="margin-bottom: 12px; opacity: 0.5;">
	<svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;">
	<path d="M22 10C22 10 20 4 17 4C14 4 12 16 9 16C6 16 4 10 2 10" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<g transform="translate(13, 11)">
	<circle cx="5" cy="5" r="4" stroke="#7c3aed" stroke-width="1.5"/>
	<path d="M8 8L11 11" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round"/>
	</g>
	</svg>
	</div>
	<div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;">
	Upload audio to analyze acoustic features
	</div>
	</div>
	"""


	def create_acoustic_visualization(result):
	"""Acoustic analysis visualization with radar chart"""
	pitch = result.get("pitch", {})
	energy = result.get("energy", {})
	rhythm = result.get("rhythm", {})
	tempo = result.get("tempo", 0)
	spectral = result.get("spectral", {})

	# Use pre-calculated scores from Modal backend (already 0-100)
	pitch_norm = pitch.get("score", 0)
	energy_norm = energy.get("score", 0)
	rhythm_norm = rhythm.get("score", 0)
	spectral_norm = spectral.get("score", 0)

	# Tempo: normalize BPM to 0-100 (60-180 BPM range)
	tempo_bpm = tempo
	tempo_norm = min(100, max(0, (tempo_bpm - 60) / 120 * 100)) if tempo_bpm > 0 else 0

	# Radar chart calculation
	center_x, center_y = 150, 150
	radius = 110

	# 5 metrics in order: Pitch(top), Energy(top-right), Rhythm(bottom-right), Tempo(bottom-left), Spectral(top-left)
	metrics = [
	("Pitch", pitch_norm, -90), # 0° - 90° = -90° (top)
	("Energy", energy_norm, -18), # 72° - 90° = -18° (top-right)
	("Rhythm", rhythm_norm, 54), # 144° - 90° = 54° (bottom-right)
	("Tempo", tempo_norm, 126), # 216° - 90° = 126° (bottom-left)
	("Spectral", spectral_norm, 198) # 288° - 90° = 198° (top-left)
	]

	# Calculate polygon points for data
	data_points = []
	for _, value, angle_deg in metrics:
	angle_rad = math.radians(angle_deg)
	point_radius = (value / 100) * radius
	x = center_x + point_radius * math.cos(angle_rad)
	y = center_y + point_radius * math.sin(angle_rad)
	data_points.append(f"{x:.2f},{y:.2f}")

	# Background concentric pentagons (20, 40, 60, 80, 100)
	def create_pentagon_points(scale):
	points = []
	for _, _, angle_deg in metrics:
	angle_rad = math.radians(angle_deg)
	r = radius * scale
	x = center_x + r * math.cos(angle_rad)
	y = center_y + r * math.sin(angle_rad)
	points.append(f"{x:.2f},{y:.2f}")
	return " ".join(points)

	background_pentagons = ""
	for scale in [0.2, 0.4, 0.6, 0.8, 1.0]:
	background_pentagons += f'<polygon points="{create_pentagon_points(scale)}" fill="none" stroke="rgba(124, 58, 237, 0.15)" stroke-width="1"/>'

	# Axis lines from center to vertices
	axis_lines = ""
	for _, _, angle_deg in metrics:
	angle_rad = math.radians(angle_deg)
	x = center_x + radius * math.cos(angle_rad)
	y = center_y + radius * math.sin(angle_rad)
	axis_lines += f'<line x1="{center_x}" y1="{center_y}" x2="{x:.2f}" y2="{y:.2f}" stroke="rgba(124, 58, 237, 0.3)" stroke-width="1"/>'

	# Labels at vertices
	labels = ""
	for label, value, angle_deg in metrics:
	angle_rad = math.radians(angle_deg)
	# Position label outside the pentagon
	label_radius = radius + 25
	x = center_x + label_radius * math.cos(angle_rad)
	y = center_y + label_radius * math.sin(angle_rad)
	labels += f'''<text x="{x:.2f}" y="{y:.2f}" text-anchor="middle" dominant-baseline="middle" fill="#a5b4fc" font-size="11" font-weight="600">
	{label}
	<tspan x="{x:.2f}" dy="12" fill="#a855f7" font-size="13" font-weight="700">{int(value)}</tspan>
	</text>'''

	return f"""
	<div style="
	background: rgba(10, 10, 26, 0.6);
	border: 1px solid rgba(124, 58, 237, 0.3);
	border-radius: 16px;
	padding: 20px;
	display: flex;
	align-items: center;
	justify-content: center;
	">
	<svg width="300" height="300" viewBox="0 0 300 300">
	<!-- Background pentagons -->
	{background_pentagons}

	<!-- Axis lines -->
	{axis_lines}

	<!-- Data polygon -->
	<polygon points="{' '.join(data_points)}"
	fill="rgba(124, 58, 237, 0.3)"
	stroke="#a855f7"
	stroke-width="2"/>

	<!-- Data points -->
	{''.join([f'<circle cx="{pt.split(",")[0]}" cy="{pt.split(",")[1]}" r="4" fill="#a855f7"/>' for pt in data_points])}

	<!-- Labels -->
	{labels}
	</svg>
	</div>
	"""


	def create_mimicry_empty():
	"""Empty state for voice mimicry game"""
	return """
	<div style="
	background: rgba(10, 10, 26, 0.4);
	border: 1px solid rgba(124, 58, 237, 0.2);
	border-radius: 16px;
	padding: 30px 20px;
	text-align: center;
	height: 100%;
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	">
	<div style="margin-bottom: 12px; opacity: 0.5;">
	<svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;">
	<defs>
	<linearGradient id="micGradEmpty" x1="0%" y1="0%" x2="100%" y2="100%">
	<stop offset="0%" style="stop-color:#8b5cf6"/>
	<stop offset="100%" style="stop-color:#6366f1"/>
	</linearGradient>
	</defs>
	<path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z" fill="url(#micGradEmpty)"/>
	<path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z" fill="url(#micGradEmpty)"/>
	</svg>
	</div>
	<div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;">
	Upload reference and your voice to see similarity scores
	</div>
	</div>
	"""


	def create_mimicry_visualization(result):
	"""Voice mimicry score visualization with progress bars"""
	pronunciation = result.get("pronunciation", 0)
	tone = result.get("transcript", 0) # Tone score
	pitch = result.get("pitch", 0)
	rhythm = result.get("rhythm", 0)
	energy = result.get("energy", 0)

	def create_progress_bar(label, value):
	return f"""
	<div style="display: flex; align-items: center; gap: 12px; margin-bottom: 10px;">
	<div style="flex: 1;">
	<div style="font-size: 11px; color: #cbd5e1; margin-bottom: 4px;">{label}</div>
	<div style="
	height: 6px;
	background: rgba(124, 58, 237, 0.2);
	border-radius: 3px;
	overflow: hidden;
	">
	<div style="
	height: 100%;
	width: {value}%;
	background: linear-gradient(90deg, #6366f1, #22d3ee);
	border-radius: 3px;
	"></div>
	</div>
	</div>
	<div style="
	font-size: 14px;
	font-weight: 700;
	color: #22d3ee;
	min-width: 32px;
	text-align: right;
	">{value}</div>
	</div>
	"""

	return f"""
	<div style="
	background: rgba(10, 10, 26, 0.6);
	border: 1px solid rgba(124, 58, 237, 0.3);
	border-radius: 16px;
	padding: 20px;
	height: 100%;
	display: flex;
	flex-direction: column;
	">
	<div style="
	display: flex;
	align-items: center;
	gap: 10px;
	margin-bottom: 16px;
	padding-bottom: 14px;
	border-bottom: 1px solid rgba(124, 58, 237, 0.2);
	">
	<div style="
	width: 40px;
	height: 40px;
	border-radius: 10px;
	background: linear-gradient(135deg, #7c3aed, #6366f1);
	display: flex;
	align-items: center;
	justify-content: center;
	flex-shrink: 0;
	">
	<svg width="24" height="24" viewBox="0 0 24 24" fill="none">
	<circle cx="12" cy="12" r="10" fill="rgba(255, 255, 255, 0.2)" stroke="white" stroke-width="1.5"/>
	<text x="12" y="16" text-anchor="middle" font-size="10" fill="white" font-weight="bold">AI</text>
	</svg>
	</div>
	<div style="flex: 1; min-width: 0;">
	<div style="font-size: 10px; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;">CLAUDE</div>
	<div style="font-size: 11px; color: #cbd5e1; line-height: 1.4;">
	Wow, that voice input, takes analytical skills of course but I'll handle it
	</div>
	</div>
	</div>

	<div style="flex: 1;">
	{create_progress_bar("Pronunciation", pronunciation)}
	{create_progress_bar("Tone", tone)}
	{create_progress_bar("Pitch", pitch)}
	{create_progress_bar("Rhythm", rhythm)}
	{create_progress_bar("Energy", energy)}
	</div>
	</div>
	"""


	def create_transcription_empty():
	"""Empty state for transcription"""
	return """
	<div style="
	background: rgba(10, 10, 26, 0.4);
	border: 1px solid rgba(124, 58, 237, 0.2);
	border-radius: 12px;
	padding: 20px;
	text-align: center;
	color: #a5b4fc;
	font-size: 13px;
	">
	Upload audio to transcribe
	</div>
	"""


	def create_transcription_visualization(text):
	"""Simple text display for transcription result"""
	return f"""
	<div style="
	background: rgba(10, 10, 26, 0.6);
	border: 1px solid rgba(124, 58, 237, 0.3);
	border-radius: 12px;
	padding: 20px;
	color: #e2e8f0;
	font-size: 20px;
	line-height: 1.6;
	white-space: pre-wrap;
	word-wrap: break-word;
	">
	{text if text else "Transcription completed"}
	</div>
	"""


	def create_embedding_empty():
	"""Empty state for embedding extraction"""
	return """
	<div style="
	background: rgba(10, 10, 26, 0.4);
	border: 1px solid rgba(124, 58, 237, 0.2);
	border-radius: 16px;
	padding: 30px 20px;
	text-align: center;
	height: 100%;
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	">
	<div style="margin-bottom: 12px; opacity: 0.5;">
	<svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;">
	<path d="M21 16V8L12 4L3 8V16L12 20L21 16Z" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<path d="M12 4V12M12 12V20M12 12L21 8M12 12L3 8" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<circle cx="12" cy="12" r="2" fill="#A855F7"/>
	</svg>
	</div>
	<div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;">
	Upload audio to extract voice embedding
	</div>
	</div>
	"""


	def create_embedding_visualization(result):
	"""Embedding extraction visualization"""
	model = result.get("model", "Wav2Vec2")
	dim = result.get("embedding_length", result.get("dim", 768))
	preview = result.get("embedding_preview", [])

	# Filter only numeric values to avoid format errors with strings like "..."
	if preview:
	numeric_preview = [v for v in preview if isinstance(v, (int, float))]
	preview_str = ", ".join([f"{v:.4f}" for v in numeric_preview]) if numeric_preview else "..."
	else:
	preview_str = "..."

	return f"""
	<div style="
	background: rgba(10, 10, 26, 0.6);
	border: 1px solid rgba(124, 58, 237, 0.3);
	border-radius: 16px;
	padding: 20px;
	height: 100%;
	display: flex;
	flex-direction: column;
	">
	<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; padding: 10px; background: rgba(124, 58, 237, 0.1); border-radius: 8px;">
	<div style="font-size: 16px; color: #cbd5e1; font-weight: 600;">Model</div>
	<div style="font-size: 18px; font-weight: 700; color: #22d3ee;">{model}</div>
	</div>
	<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; padding: 10px; background: rgba(124, 58, 237, 0.1); border-radius: 8px;">
	<div style="font-size: 16px; color: #cbd5e1; font-weight: 600;">Dimensions</div>
	<div style="font-size: 18px; font-weight: 700; color: #22d3ee;">{dim}</div>
	</div>
	<div style="padding: 10px; background: rgba(124, 58, 237, 0.1); border-radius: 8px;">
	<div style="font-size: 16px; color: #cbd5e1; font-weight: 600; margin-bottom: 8px;">Preview</div>
	<div style="font-size: 14px; font-family: monospace; color: #22d3ee; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">
	[{preview_str}]
	</div>
	</div>
	</div>
	"""


	def create_compare_empty():
	"""Empty state for voice comparison"""
	return """
	<div style="
	background: rgba(10, 10, 26, 0.4);
	border: 1px solid rgba(124, 58, 237, 0.2);
	border-radius: 16px;
	padding: 30px 20px;
	text-align: center;
	height: 100%;
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	">
	<div style="margin-bottom: 12px; opacity: 0.5;">
	<svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;">
	<path d="M2 10V14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M5 8V16" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M8 11V13" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M22 10V14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M19 7V17" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M16 11V13" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M10 12H14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	</svg>
	</div>
	<div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;">
	Upload two audio files to compare voices
	</div>
	</div>
	"""


	def create_compare_visualization(result):
	"""Voice comparison visualization with similarity score"""
	similarity = result.get("similarity", 0)
	tone_score = result.get("tone_score", 0)

	# Convert similarity to percentage
	similarity_pct = int(similarity * 100)

	# Color based on similarity - Purple theme matching VOICE SIMILARITY
	if similarity_pct >= 80:
	color = "#c084fc" # Light purple (high score)
	elif similarity_pct >= 60:
	color = "#a855f7" # Medium purple (medium score)
	else:
	color = "#7c3aed" # Dark purple (low score)

	return f"""
	<div style="
	background: rgba(10, 10, 26, 0.6);
	border: 1px solid rgba(124, 58, 237, 0.3);
	border-radius: 16px;
	padding: 20px;
	height: 100%;
	display: flex;
	align-items: flex-end;
	justify-content: center;
	padding-bottom: 40px;
	">
	<div style="
	width: 160px;
	height: 160px;
	border-radius: 50%;
	background: conic-gradient({color} 0deg {similarity_pct * 3.6}deg, rgba(124, 58, 237, 0.2) {similarity_pct * 3.6}deg 360deg);
	display: flex;
	align-items: center;
	justify-content: center;
	">
	<div style="
	width: 130px;
	height: 130px;
	border-radius: 50%;
	background: rgba(10, 10, 26, 0.9);
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	">
	<span style="font-size: 40px; font-weight: 700; color: {color};">{similarity_pct}</span>
	<span style="font-size: 11px; color: #a5b4fc; letter-spacing: 0.5px;">SIMILARITY</span>
	</div>
	</div>
	</div>
	"""


	def create_similarity_empty():
	"""Empty state for voice similarity analysis"""
	return """
	<div style="
	background: rgba(10, 10, 26, 0.4);
	border: 1px solid rgba(124, 58, 237, 0.2);
	border-radius: 16px;
	padding: 30px 20px;
	text-align: center;
	height: 100%;
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	">
	<div style="margin-bottom: 12px; opacity: 0.5;">
	<svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;">
	<circle cx="12" cy="12" r="9" stroke="#A855F7" stroke-width="1" opacity="0.3"/>
	<path d="M12 5L18 9L16.5 18H7.5L6 9L12 5Z" fill="#A855F7" fill-opacity="0.4" stroke="#A855F7" stroke-width="2" stroke-linejoin="round"/>
	<circle cx="12" cy="5" r="1.5" fill="#A855F7"/>
	</svg>
	</div>
	<div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;">
	Upload audio files for comprehensive similarity analysis
	</div>
	</div>
	"""


	def create_similarity_visualization(result):
	"""Voice similarity visualization with radar chart"""
	overall = result.get("overall", 0)

	pronunciation = result.get("pronunciation", 0)
	transcript = result.get("transcript", 0)
	pitch = result.get("pitch", 0)
	rhythm = result.get("rhythm", 0)
	energy = result.get("energy", 0)

	# Color based on overall score - Purple theme
	if overall >= 80:
	color = "#c084fc" # Light purple (high score)
	elif overall >= 60:
	color = "#a855f7" # Medium purple (medium score)
	else:
	color = "#7c3aed" # Dark purple (low score)

	# Radar chart calculation
	center_x, center_y = 150, 150
	radius = 110

	# 5 metrics in order: Pronunciation(top), Transcript(top-right), Pitch(bottom-right), Energy(bottom-left), Rhythm(top-left)
	metrics = [
	("Pronunciation", pronunciation, -90), # 0° - 90° = -90° (top)
	("Transcript", transcript, -18), # 72° - 90° = -18° (top-right)
	("Pitch", pitch, 54), # 144° - 90° = 54° (bottom-right)
	("Energy", energy, 126), # 216° - 90° = 126° (bottom-left)
	("Rhythm", rhythm, 198) # 288° - 90° = 198° (top-left)
	]

	# Calculate polygon points for data
	data_points = []
	for _, value, angle_deg in metrics:
	angle_rad = math.radians(angle_deg)
	point_radius = (value / 100) * radius
	x = center_x + point_radius * math.cos(angle_rad)
	y = center_y + point_radius * math.sin(angle_rad)
	data_points.append(f"{x:.2f},{y:.2f}")

	# Background concentric pentagons (20, 40, 60, 80, 100)
	def create_pentagon_points(scale):
	points = []
	for _, _, angle_deg in metrics:
	angle_rad = math.radians(angle_deg)
	r = radius * scale
	x = center_x + r * math.cos(angle_rad)
	y = center_y + r * math.sin(angle_rad)
	points.append(f"{x:.2f},{y:.2f}")
	return " ".join(points)

	background_pentagons = ""
	for scale in [0.2, 0.4, 0.6, 0.8, 1.0]:
	background_pentagons += f'<polygon points="{create_pentagon_points(scale)}" fill="none" stroke="rgba(124, 58, 237, 0.15)" stroke-width="1"/>'

	# Axis lines from center to vertices
	axis_lines = ""
	for _, _, angle_deg in metrics:
	angle_rad = math.radians(angle_deg)
	x = center_x + radius * math.cos(angle_rad)
	y = center_y + radius * math.sin(angle_rad)
	axis_lines += f'<line x1="{center_x}" y1="{center_y}" x2="{x:.2f}" y2="{y:.2f}" stroke="rgba(124, 58, 237, 0.3)" stroke-width="1"/>'

	# Labels at vertices
	labels = ""
	for label, value, angle_deg in metrics:
	angle_rad = math.radians(angle_deg)
	# Position label outside the pentagon
	label_radius = radius + 25
	x = center_x + label_radius * math.cos(angle_rad)
	y = center_y + label_radius * math.sin(angle_rad)
	labels += f'''<text x="{x:.2f}" y="{y:.2f}" text-anchor="middle" dominant-baseline="middle" fill="#a5b4fc" font-size="11" font-weight="600">
	{label}
	<tspan x="{x:.2f}" dy="12" fill="#a855f7" font-size="13" font-weight="700">{value}</tspan>
	</text>'''

	return f"""
	<div style="
	background: rgba(10, 10, 26, 0.6);
	border: 1px solid rgba(124, 58, 237, 0.3);
	border-radius: 16px;
	padding: 20px;
	display: flex;
	align-items: center;
	gap: 30px;
	">
	<!-- Left: Overall Score Donut -->
	<div style="flex: 1; display: flex; align-items: center; justify-content: center;">
	<div style="
	width: 160px;
	height: 160px;
	border-radius: 50%;
	background: conic-gradient({color} 0deg {overall * 3.6}deg, rgba(124, 58, 237, 0.2) {overall * 3.6}deg 360deg);
	display: flex;
	align-items: center;
	justify-content: center;
	">
	<div style="
	width: 130px;
	height: 130px;
	border-radius: 50%;
	background: rgba(10, 10, 26, 0.9);
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	">
	<span style="font-size: 40px; font-weight: 700; color: {color};">{overall}</span>
	<span style="font-size: 11px; color: #a5b4fc; letter-spacing: 0.5px;">OVERALL</span>
	</div>
	</div>
	</div>

	<!-- Right: Radar Chart -->
	<div style="flex: 1; display: flex; align-items: center; justify-content: center;">
	<svg width="300" height="300" viewBox="0 0 300 300">
	<!-- Background pentagons -->
	{background_pentagons}

	<!-- Axis lines -->
	{axis_lines}

	<!-- Data polygon -->
	<polygon points="{' '.join(data_points)}"
	fill="rgba(124, 58, 237, 0.3)"
	stroke="#a855f7"
	stroke-width="2"/>

	<!-- Data points -->
	{''.join([f'<circle cx="{pt.split(",")[0]}" cy="{pt.split(",")[1]}" r="4" fill="#a855f7"/>' for pt in data_points])}

	<!-- Labels -->
	{labels}
	</svg>
	</div>
	</div>
	"""


	# Clean audio functions removed - using gr.Audio component directly




	# ============================================================================
	# Gradio Interface with Dark Theme
	# ============================================================================

	custom_css = """
	/* ===== DARK THEME STYLING (CSS-ONLY) ===== */
	/* This CSS forces dark mode appearance regardless of system/Gradio theme */
	/* All colors are SOLID (not rgba/transparent) to ensure consistent appearance */

	:root {
	color-scheme: dark !important;
	--body-background-fill: #0a0a1a !important;
	--background-fill-primary: #0d0d1a !important;
	--background-fill-secondary: #12122a !important;
	--block-background-fill: #0d0d1a !important;
	--input-background-fill: #1a1a35 !important;
	--body-text-color: #e0e7ff !important;
	--block-title-text-color: #a5b4fc !important;
	--block-label-text-color: #a5b4fc !important;
	--input-text-color: #e0e7ff !important;
	--neutral-50: #0a0a1a !important;
	--neutral-100: #0d0d1a !important;
	--neutral-200: #12122a !important;
	--neutral-300: #1a1a35 !important;
	--neutral-400: #2d2d4a !important;
	--neutral-500: #4a4a6a !important;
	--neutral-600: #7c7c9a !important;
	--neutral-700: #a5b4fc !important;
	--neutral-800: #c7d2fe !important;
	--neutral-900: #e0e7ff !important;
	--neutral-950: #ffffff !important;
	}

	/* Force dark mode on html and body */
	html, body {
	background: #0a0a1a !important;
	background-color: #0a0a1a !important;
	color: #e0e7ff !important;
	}

	/* ===== GLOBAL STYLES ===== */
	body {
	background: linear-gradient(180deg, #0a0a1a 0%, #0f0f23 100%) !important;
	background-color: #0a0a1a !important;
	color: #ffffff !important;
	font-family: system-ui, -apple-system, sans-serif;
	}

	/* Override Gradio's light mode backgrounds AND text colors */
	.dark, .light, [data-theme="light"], [data-theme="dark"],
	html[data-theme="light"], html[data-theme="dark"],
	body.light, body.dark {
	--body-background-fill: #0a0a1a !important;
	--background-fill-primary: #0d0d1a !important;
	--background-fill-secondary: #12122a !important;
	--block-background-fill: #0d0d1a !important;
	--input-background-fill: #1a1a35 !important;
	--body-text-color: #e0e7ff !important;
	--block-title-text-color: #a5b4fc !important;
	--block-label-text-color: #a5b4fc !important;
	--input-text-color: #e0e7ff !important;
	--neutral-50: #0a0a1a !important;
	--neutral-100: #0d0d1a !important;
	--neutral-200: #12122a !important;
	--neutral-300: #1a1a35 !important;
	--neutral-400: #2d2d4a !important;
	--neutral-500: #4a4a6a !important;
	--neutral-600: #7c7c9a !important;
	--neutral-700: #a5b4fc !important;
	--neutral-800: #c7d2fe !important;
	--neutral-900: #e0e7ff !important;
	--neutral-950: #ffffff !important;
	color: #e0e7ff !important;
	background: #0a0a1a !important;
	background-color: #0a0a1a !important;
	}

	.gradio-container {
	max-width: 100% !important;
	width: 100% !important;
	padding: 0px 16px 20px 16px !important;
	background: #0a0a1a !important;
	background-color: #0a0a1a !important;
	margin: 0 !important;
	}

	.gradio-container > .main,
	.gradio-container .main,
	.main {
	max-width: 100% !important;
	width: 100% !important;
	padding-left: 0 !important;
	padding-right: 0 !important;
	margin: 0 auto !important;
	}

	.contain {
	max-width: 100% !important;
	padding: 0 !important;
	}

	/* Force full width on all Gradio internal containers */
	.gradio-container > div,
	.gradio-container > div > div,
	#component-0,
	.wrap,
	.app,
	.contain,
	footer,
	.gradio-row,
	.gradio-column,
	.svelte-1gfkn6j,
	[class*="svelte-"] {
	max-width: 100% !important;
	}

	.gradio-row {
	max-width: 100% !important;
	width: 100% !important;
	margin: 0 !important;
	padding: 0 !important;
	}

	/* ===== HEADER (FLOATING, NO CARD) ===== */
	.header-main {
	display: flex;
	justify-content: space-between;
	align-items: center;
	margin-bottom: 0;
	padding: 0;
	}

	.header-left {
	display: flex;
	align-items: center;
	gap: 16px;
	}

	.header-icon {
	font-size: 48px;
	filter: drop-shadow(0 4px 12px rgba(99, 102, 241, 0.6));
	}

	.header-title {
	font-size: 42px;
	font-weight: 900;
	color: #e0e7ff;
	margin: 0;
	letter-spacing: -0.5px;
	}

	.header-subtitle {
	color: #c7d2fe;
	font-size: 20px;
	font-weight: 700;
	margin-left: 6px;
	}

	/* ===== DOCS BUTTON ===== */
	.docs-button {
	display: flex;
	align-items: center;
	gap: 8px;
	padding: 10px 20px;
	background: linear-gradient(135deg, rgba(124, 58, 237, 0.3), rgba(99, 102, 241, 0.3));
	border: 1px solid rgba(124, 58, 237, 0.5);
	border-radius: 12px;
	color: #e0e7ff;
	font-size: 14px;
	font-weight: 600;
	cursor: pointer;
	transition: all 0.3s ease;
	text-transform: uppercase;
	letter-spacing: 0.5px;
	}

	.docs-button:hover {
	background: linear-gradient(135deg, rgba(124, 58, 237, 0.5), rgba(99, 102, 241, 0.5));
	border-color: rgba(124, 58, 237, 0.8);
	transform: translateY(-2px);
	box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4);
	}

	.docs-button svg {
	width: 18px;
	height: 18px;
	}

	/* ===== DOCS MODAL ===== */
	.docs-modal-overlay {
	display: none;
	position: fixed !important;
	inset: 0 !important;
	width: 100vw !important;
	height: 100vh !important;
	background: rgba(0, 0, 0, 0.85) !important;
	backdrop-filter: blur(10px) !important;
	z-index: 99999 !important;
	justify-content: center !important;
	align-items: flex-start !important;
	padding-top: 60px !important;
	box-sizing: border-box !important;
	/* Modal positioned near top of viewport */
	overflow: hidden !important;
	}

	.docs-modal-overlay.active {
	display: flex !important;
	}

	.docs-modal {
	position: relative !important;
	background: #0d0d1a !important;
	border: 2px solid #7c3aed !important;
	border-radius: 20px !important;
	width: calc(100vw - 80px) !important;
	max-width: 1200px !important;
	max-height: 55vh !important;
	overflow: hidden !important;
	box-shadow: 0 25px 80px rgba(0, 0, 0, 0.9) !important;
	/* Remove margin that could affect centering */
	margin: 0 !important;
	/* Prevent any transform inheritance issues */
	transform: none !important;
	}

	.docs-modal-header {
	display: flex !important;
	justify-content: space-between !important;
	align-items: center !important;
	padding: 20px 24px !important;
	border-bottom: 2px solid #7c3aed !important;
	background: #1a1a2e !important;
	}

	.docs-modal-title {
	font-size: 20px;
	font-weight: 700;
	color: #e0e7ff;
	display: flex;
	align-items: center;
	gap: 10px;
	}

	.docs-modal-close {
	background: rgba(124, 58, 237, 0.3);
	border: 2px solid rgba(124, 58, 237, 0.5);
	border-radius: 12px;
	color: #e0e7ff;
	font-size: 28px;
	font-weight: 300;
	cursor: pointer;
	padding: 4px 14px;
	line-height: 1;
	transition: all 0.2s;
	}

	.docs-modal-close:hover {
	background: rgba(124, 58, 237, 0.4);
	border-color: rgba(124, 58, 237, 0.6);
	}

	.docs-modal-content {
	padding: 24px !important;
	overflow-y: auto !important;
	max-height: calc(55vh - 80px) !important;
	color: #c7d2fe !important;
	font-size: 15px !important;
	line-height: 1.7 !important;
	background: #0d0d1a !important;
	}

	.docs-modal-content h1 { font-size: 28px; color: #e0e7ff; margin: 0 0 16px 0; padding-bottom: 12px; border-bottom: 2px solid rgba(124, 58, 237, 0.3); }
	.docs-modal-content h2 { font-size: 22px; color: #e0e7ff; margin: 24px 0 12px 0; }
	.docs-modal-content h3 { font-size: 18px; color: #a5b4fc; margin: 20px 0 10px 0; }
	.docs-modal-content p { margin: 12px 0; }
	.docs-modal-content ul, .docs-modal-content ol { margin: 12px 0; padding-left: 24px; }
	.docs-modal-content li { margin: 6px 0; }
	.docs-modal-content code { background: rgba(124, 58, 237, 0.2); padding: 2px 6px; border-radius: 4px; font-family: 'SF Mono', 'Monaco', 'Consolas', monospace; font-size: 13px; color: #c4b5fd; }
	.docs-modal-content pre { background: rgba(0, 0, 0, 0.4); border: 1px solid rgba(124, 58, 237, 0.2); border-radius: 12px; padding: 16px; overflow-x: auto; margin: 16px 0; white-space: pre; }
	.docs-modal-content pre code { background: transparent; padding: 0; color: #a5b4fc; white-space: pre; display: block; }
	.docs-modal-content table { width: 100%; border-collapse: collapse; margin: 16px 0; }
	.docs-modal-content th, .docs-modal-content td { padding: 10px 12px; text-align: left; border: 1px solid rgba(124, 58, 237, 0.2); }
	.docs-modal-content th { background: rgba(124, 58, 237, 0.15); color: #e0e7ff; font-weight: 600; }
	.docs-modal-content td { color: #c7d2fe; }
	.docs-modal-content a { color: #a78bfa; text-decoration: none; }
	.docs-modal-content a:hover { text-decoration: underline; }
	.docs-modal-content strong { color: #e0e7ff; }
	.docs-modal-content img { max-width: 100%; max-height: 400px; height: auto; border-radius: 8px; margin: 12px 0; object-fit: contain; }

	/* ===== CARD STYLES ===== */
	.card {
	background: #0f0f23 !important;
	background-color: #0f0f23 !important;
	border: 1px solid #3d2a6b !important;
	border-radius: 20px;
	padding: 30px;
	box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4);
	transition: all 0.3s ease;
	height: 100%;
	display: flex;
	flex-direction: column;
	}

	.card:hover {
	border-color: #5b3d99 !important;
	box-shadow: 0 12px 48px rgba(124, 58, 237, 0.3);
	}

	/* Ensure columns in top row have equal height */
	.gradio-row:first-of-type .gradio-column {
	display: flex !important;
	flex-direction: column !important;
	}

	.gradio-row:first-of-type .gradio-column > div {
	flex: 1 !important;
	display: flex !important;
	flex-direction: column !important;
	}

	/* Set minimum height for top row cards */
	.gradio-row:first-of-type .card {
	min-height: 550px;
	}

	.card-title {
	font-size: 16px;
	font-weight: 700;
	color: #a5b4fc;
	text-transform: uppercase;
	letter-spacing: 1px;
	margin-bottom: 20px;
	display: flex;
	align-items: center;
	}

	/* ===== ROW SPACING ===== */
	.gradio-row {
	gap: 24px !important;
	}

	/* ===== QUICK START - CODE BLOCK (TERMINAL/IDE STYLE) ===== */
	.terminal-window {
	background: #1a1b26;
	border: 1px solid rgba(124, 58, 237, 0.3);
	border-radius: 12px;
	overflow: hidden;
	margin-bottom: 16px;
	box-shadow: 0 8px 32px rgba(0, 0, 0, 0.6);
	}

	.terminal-header {
	background: #16161e;
	padding: 12px 16px;
	display: flex;
	align-items: center;
	justify-content: space-between;
	border-bottom: 1px solid rgba(124, 58, 237, 0.2);
	}

	.terminal-dots {
	display: flex;
	gap: 8px;
	}

	.terminal-dot {
	width: 12px;
	height: 12px;
	border-radius: 50%;
	}

	.terminal-dot.red {
	background: #ff5f56 !important;
	box-shadow: 0 0 8px rgba(255, 95, 86, 0.8) !important;
	}

	.terminal-dot.yellow {
	background: #ffbd2e !important;
	box-shadow: 0 0 8px rgba(255, 189, 46, 0.8) !important;
	}

	.terminal-dot.green {
	background: #27c93f !important;
	box-shadow: 0 0 8px rgba(39, 201, 63, 0.8) !important;
	}

	.terminal-title {
	font-size: 12px;
	color: #6b7280;
	font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
	font-weight: 500;
	}

	.terminal-body {
	background: #1a1b26;
	padding: 0;
	display: flex;
	}

	.line-numbers {
	background: #16161e;
	padding: 16px 12px;
	border-right: 1px solid rgba(124, 58, 237, 0.15);
	user-select: none;
	text-align: right;
	min-width: 48px;
	}

	.line-num {
	display: block;
	color: #4a5568;
	font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
	font-size: 14px;
	line-height: 1.8;
	}

	.code-content {
	flex: 1;
	padding: 16px 20px;
	overflow-x: auto;
	}

	.code-line {
	display: block;
	white-space: pre;
	font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
	font-size: 14px;
	line-height: 1.8;
	color: #a9b1d6;
	}

	.json-key {
	color: #7dcfff;
	font-weight: 500;
	}

	.json-string {
	color: #9ece6a;
	}

	.json-bracket {
	color: #bb9af7;
	font-weight: 600;
	}

	.json-colon {
	color: #c0caf5;
	}

	.json-comma {
	color: #c0caf5;
	}

	.copy-button {
	width: 100%;
	background: linear-gradient(135deg, #7c3aed, #6366f1) !important;
	border: none !important;
	border-radius: 12px !important;
	padding: 14px 24px !important;
	font-weight: 700 !important;
	font-size: 13px !important;
	color: white !important;
	text-transform: uppercase;
	letter-spacing: 1px;
	cursor: pointer;
	box-shadow: 0 4px 16px rgba(124, 58, 237, 0.4) !important;
	transition: all 0.3s ease !important;
	display: flex;
	align-items: center;
	justify-content: center;
	gap: 8px;
	}

	.copy-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 6px 24px rgba(124, 58, 237, 0.6) !important;
	}

	/* ===== TOOLS TABLE ===== */
	.tools-table,
	table.tools-table,
	.light .tools-table,
	.dark .tools-table,
	[data-theme="light"] .tools-table,
	[data-theme="dark"] .tools-table {
	width: 100%;
	border-collapse: separate;
	border-spacing: 0;
	background: #0d0d1f !important;
	background-color: #0d0d1f !important;
	border-radius: 12px;
	overflow: hidden;
	border: 1px solid #3d2a6b !important;
	margin-bottom: 0;
	flex: 1;
	color: #cbd5e1 !important;
	}

	.tools-table th,
	table.tools-table th,
	.light .tools-table th,
	.dark .tools-table th,
	[data-theme="light"] .tools-table th,
	[data-theme="dark"] .tools-table th {
	background: #1f1545 !important;
	background-color: #1f1545 !important;
	color: #a5b4fc !important;
	font-weight: 700;
	font-size: 16px;
	text-transform: uppercase;
	letter-spacing: 1.5px;
	padding: 20px 14px;
	text-align: left;
	border-bottom: 1px solid #3d2a6b !important;
	}

	.tools-table td,
	table.tools-table td,
	.light .tools-table td,
	.dark .tools-table td,
	[data-theme="light"] .tools-table td,
	[data-theme="dark"] .tools-table td {
	padding: 20px 14px;
	color: #cbd5e1 !important;
	background: #0d0d1f !important;
	background-color: #0d0d1f !important;
	font-size: 16px;
	border-bottom: 1px solid #1a1535 !important;
	}

	.tools-table tr:last-child td {
	border-bottom: none;
	}

	.tools-table tr:hover,
	.tools-table tr:hover td {
	background: #1a1540 !important;
	background-color: #1a1540 !important;
	}

	.tool-name,
	.light .tool-name,
	.dark .tool-name,
	[data-theme="light"] .tool-name,
	[data-theme="dark"] .tool-name {
	color: #22d3ee !important;
	font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
	font-weight: 600;
	font-size: 13px;
	vertical-align: middle;
	}

	/* ===== COMPOSITE SECTION ===== */
	.composite-section,
	.light .composite-section,
	.dark .composite-section,
	[data-theme="light"] .composite-section,
	[data-theme="dark"] .composite-section {
	background: #0d0d1f !important;
	background-color: #0d0d1f !important;
	border: 1px solid #3d2a6b !important;
	border-radius: 12px;
	padding: 20px;
	color: #cbd5e1 !important;
	}

	.composite-header,
	.light .composite-header,
	.dark .composite-header,
	[data-theme="light"] .composite-header,
	[data-theme="dark"] .composite-header {
	font-size: 11px;
	font-weight: 700;
	color: #a5b4fc !important;
	text-transform: uppercase;
	letter-spacing: 1.5px;
	margin-bottom: 12px;
	}

	.composite-content,
	.light .composite-content,
	.dark .composite-content,
	[data-theme="light"] .composite-content,
	[data-theme="dark"] .composite-content {
	color: #cbd5e1 !important;
	font-size: 12px;
	line-height: 1.6;
	margin-bottom: 16px;
	}

	.try-demo-button {
	width: 100%;
	background: transparent !important;
	border: 2px solid #7c3aed !important;
	border-radius: 12px !important;
	padding: 12px 24px !important;
	font-weight: 700 !important;
	font-size: 12px !important;
	color: #7c3aed !important;
	text-transform: uppercase;
	letter-spacing: 1px;
	cursor: pointer;
	transition: all 0.3s ease !important;
	}

	.try-demo-button:hover {
	background: rgba(124, 58, 237, 0.1) !important;
	border-color: #7c3aed !important;
	color: #8b5cf6 !important;
	}

	/* ===== BUTTONS ===== */
	button[variant="primary"] {
	background: linear-gradient(135deg, #7c3aed, #6366f1) !important;
	border: none !important;
	border-radius: 12px !important;
	padding: 14px 32px !important;
	font-weight: 700 !important;
	font-size: 14px !important;
	color: white !important;
	box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4) !important;
	transition: all 0.3s ease !important;
	}

	button[variant="primary"]:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 32px rgba(124, 58, 237, 0.6) !important;
	}

	/* ===== AUDIO COMPONENT ===== */
	.gradio-audio {
	background: rgba(30, 27, 75, 0.6) !important;
	border: 1px solid rgba(124, 58, 237, 0.3) !important;
	border-radius: 12px !important;
	}

	/* ===== TEXTBOX ===== */
	textarea {
	background: rgba(30, 27, 75, 0.6) !important;
	border: 1px solid rgba(124, 58, 237, 0.3) !important;
	border-radius: 12px !important;
	color: #e0e7ff !important;
	font-size: 13px !important;
	}

	/* ===== DROPDOWN ===== */
	select {
	background: rgba(30, 27, 75, 0.6) !important;
	border: 1px solid rgba(124, 58, 237, 0.3) !important;
	border-radius: 12px !important;
	color: #e0e7ff !important;
	}

	/* ===== LABELS ===== */
	label {
	color: #a5b4fc !important;
	font-weight: 600 !important;
	font-size: 12px !important;
	text-transform: uppercase;
	letter-spacing: 0.5px;
	}

	/* ===== HTML OUTPUT ===== */
	.gradio-html {
	background: transparent !important;
	border: none !important;
	}

	/* ===== DEMO ROW LAYOUT ===== */
	.demo-row {
	display: flex !important;
	gap: 24px !important;
	align-items: stretch !important;
	}

	/* Only apply card style to the outer column (demo-card-column) */
	.demo-card-column {
	display: flex !important;
	flex-direction: column !important;
	height: 700px !important;
	min-height: 700px !important;
	max-height: 700px !important;
	background: rgba(15, 15, 35, 0.8) !important;
	backdrop-filter: blur(20px) !important;
	border: 1px solid rgba(124, 58, 237, 0.3) !important;
	border-radius: 20px !important;
	padding: 4px 4px 2px 4px !important;
	box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4) !important;
	transition: all 0.3s ease !important;
	gap: 2px !important;
	overflow-y: auto !important;
	}

	.demo-card-column:hover {
	border-color: rgba(124, 58, 237, 0.5) !important;
	box-shadow: 0 12px 48px rgba(124, 58, 237, 0.3) !important;
	}

	/* Remove any border/background from inner elements */
	.demo-card-column > div,
	.demo-card-column > div > div,
	.demo-row > div > div {
	background: transparent !important;
	border: none !important;
	box-shadow: none !important;
	padding: 0 !important;
	border-radius: 0 !important;
	}

	/* Remove card background from inner HTML - we use column background instead */
	.demo-row .card {
	background: transparent !important;
	backdrop-filter: none !important;
	border: none !important;
	border-radius: 0 !important;
	padding: 0 !important;
	box-shadow: none !important;
	margin-bottom: 12px !important;
	}

	.demo-row .card:hover {
	border: none !important;
	box-shadow: none !important;
	}

	/* Ensure all inner components have transparent background */
	.demo-row .gradio-audio,
	.demo-row .gradio-dropdown,
	.demo-row .gradio-textbox,
	.demo-row .gradio-button {
	background: transparent !important;
	}

	/* Create a wrapper for input elements (flex container) */
	.demo-card-column > div:not(:last-child) {
	flex: 0 0 auto !important;
	}

	/* Adjust spacing for input elements in demo cards */
	.demo-row .gradio-audio {
	margin-top: 6px !important;
	margin-bottom: 0px !important;
	max-height: 50px !important;
	min-height: 40px !important;
	height: 45px !important;
	}

	/* Target all child elements of audio component */
	.demo-row .gradio-audio > div,
	.demo-row .gradio-audio .wrap,
	.demo-row .gradio-audio .upload-container,
	.demo-row .gradio-audio .record-container,
	.demo-row .gradio-audio * {
	max-height: 50px !important;
	}

	/* Audio player specific height reduction */
	.demo-row .gradio-audio audio {
	height: 26px !important;
	max-height: 26px !important;
	min-height: 26px !important;
	}

	/* Upload/record button container height */
	.demo-row .gradio-audio .upload-container,
	.demo-row .gradio-audio .record-container {
	min-height: 38px !important;
	max-height: 38px !important;
	padding: 4px !important;
	}

	/* Audio component buttons */
	.demo-row .gradio-audio button {
	height: 28px !important;
	min-height: 28px !important;
	max-height: 28px !important;
	padding: 4px 10px !important;
	font-size: 10px !important;
	}

	/* Hide text nodes in audio upload area - keep icons */
	.demo-row .gradio-audio .upload-text {
	display: none !important;
	}

	.demo-row .gradio-audio .placeholder {
	display: none !important;
	}

	.demo-row .gradio-audio span:not(:has(svg)) {
	font-size: 0 !important;
	}

	.demo-row .gradio-audio p {
	display: none !important;
	}

	/* Hide "Drop Audio Here", "- or -", "Click to Upload" text */
	.demo-row .gradio-audio .upload-container span,
	.demo-row .gradio-audio .upload-container p {
	font-size: 0 !important;
	line-height: 0 !important;
	}

	/* Keep SVG icons visible */
	.demo-row .gradio-audio svg {
	font-size: initial !important;
	}

	/* ADDITIONAL METHODS: Hide all text in audio upload area */
	.demo-row .gradio-audio label {
	font-size: 0 !important;
	}

	.demo-row .gradio-audio label span:not(:has(svg)) {
	display: none !important;
	}

	.demo-row .gradio-audio .file-preview {
	font-size: 0 !important;
	}

	.demo-row .gradio-audio .file-preview span {
	font-size: 0 !important;
	display: none !important;
	}

	.demo-row .gradio-audio [data-testid="upload-text"],
	.demo-row .gradio-audio [data-testid="file-preview-text"],
	.demo-row .gradio-audio .upload-text,
	.demo-row .gradio-audio .file-preview-text {
	display: none !important;
	visibility: hidden !important;
	font-size: 0 !important;
	}

	/* Target all text nodes (more aggressive) */
	.demo-row .gradio-audio *:not(svg):not(path):not(circle):not(rect):not(line) {
	color: transparent !important;
	}

	.demo-row .gradio-audio button {
	color: white !important;
	}

	/* Ensure icons remain visible */
	.demo-row .gradio-audio svg,
	.demo-row .gradio-audio svg * {
	color: initial !important;
	fill: currentColor !important;
	stroke: currentColor !important;
	}

	/* NUCLEAR OPTION: Hide everything in label, then show only necessary elements */
	.demo-row .gradio-audio label > div > div {
	display: none !important;
	}

	.demo-row .gradio-audio label::before {
	content: '' !important;
	}

	.demo-row .gradio-audio label * {
	visibility: hidden !important;
	}

	.demo-row .gradio-audio label svg {
	visibility: visible !important;
	}

	.demo-row .gradio-audio label button {
	visibility: visible !important;
	}

	.demo-row .gradio-audio label audio {
	visibility: visible !important;
	}

	/* Force hide any text content */
	.demo-row .gradio-audio label > div::after,
	.demo-row .gradio-audio label > div::before {
	content: '' !important;
	display: none !important;
	}

	/* Additional override for upload text elements */
	.demo-row .gradio-audio [class*="upload"],
	.demo-row .gradio-audio [class*="placeholder"],
	.demo-row .gradio-audio [class*="text"] {
	font-size: 0 !important;
	line-height: 0 !important;
	width: 0 !important;
	height: 0 !important;
	opacity: 0 !important;
	visibility: hidden !important;
	position: absolute !important;
	left: -9999px !important;
	}

	/* NUCLEAR OPTION 2: Complete removal of label content */
	.demo-row .gradio-audio label.block {
	display: none !important;
	}

	.demo-row .gradio-audio .file-upload {
	display: none !important;
	}

	/* Hide all direct text children */
	.demo-row .gradio-audio label > span:not(:has(button)):not(:has(audio)):not(:has(svg)) {
	display: none !important;
	}

	/* Gradio 6.0 specific selectors - upload area */
	.demo-row .gradio-audio [data-testid="upload-button"],
	.demo-row .gradio-audio [data-testid="file-upload"],
	.demo-row .gradio-audio .upload-area {
	display: none !important;
	}

	/* Hide all paragraph elements in audio component */
	.demo-row .gradio-audio label p,
	.demo-row .gradio-audio label span.text,
	.demo-row .gradio-audio label div.text {
	display: none !important;
	}

	/* More aggressive text hiding - target by content */
	.demo-row .gradio-audio *::before,
	.demo-row .gradio-audio *::after {
	content: '' !important;
	display: none !important;
	}

	/* Make sure only buttons and audio players are visible */
	.demo-row .gradio-audio > label > div > div:not(:has(button)):not(:has(audio)) {
	display: none !important;
	}

	/* Gradio Blocks specific - Hide wrapper divs that contain text */
	.demo-row .gradio-audio .wrap > div:not(:has(button)):not(:has(audio)):not(:has(svg)) {
	display: none !important;
	}

	/* Override for Gradio 6.x structure */
	.demo-row .gradio-audio [class*="svelte-"] span:not(:has(svg)):not(:has(button)) {
	display: none !important;
	}

	.demo-row .gradio-dropdown,
	.demo-row .gradio-textbox {
	margin-bottom: 2px !important;
	}

	.demo-row .gradio-row {
	margin-bottom: 2px !important;
	}

	/* IMPORTANT: Button alignment - push buttons to bottom with margin-top: auto */
	.demo-row .gradio-button {
	margin-top: auto !important;
	margin-bottom: 0px !important;
	flex-shrink: 0 !important;
	}

	/* Output area should not push button down - set flex: 1 */
	.demo-row .gradio-html {
	flex: 1 !important;
	margin-bottom: 0 !important;
	display: flex !important;
	flex-direction: column !important;
	max-height: 300px !important;
	overflow-y: auto !important;
	}

	/* Output audio component (clean_audio_output) height limit */
	.demo-row .gradio-audio[data-testid="audio-output"],
	.demo-row > div:last-child .gradio-audio {
	max-height: 120px !important;
	min-height: 60px !important;
	height: auto !important;
	margin-bottom: 0px !important;
	}


	/* ===== CUSTOM ACTION BUTTONS (DEMO CARDS) ===== */
	.custom-action-btn,
	.custom-action-btn button,
	.custom-action-btn button[data-testid="button"],
	button.custom-action-btn,
	.demo-row .custom-action-btn,
	.demo-row .custom-action-btn button {
	width: 100% !important;
	min-width: 100% !important;
	max-width: 100% !important;
	background: linear-gradient(135deg, #6366f1, #7c3aed) !important;
	border: none !important;
	border-radius: 12px !important;
	padding: 8px 16px !important;
	height: 38px !important;
	min-height: 38px !important;
	max-height: 38px !important;
	font-weight: 700 !important;
	font-size: 16px !important;
	letter-spacing: 1.5px !important;
	text-transform: uppercase !important;
	color: white !important;
	box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4) !important;
	transition: all 0.3s ease !important;
	}

	.custom-action-btn:hover,
	.custom-action-btn button:hover,
	.custom-action-btn button[data-testid="button"]:hover,
	button.custom-action-btn:hover,
	.demo-row .custom-action-btn:hover,
	.demo-row .custom-action-btn button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 32px rgba(124, 58, 237, 0.6) !important;
	background: linear-gradient(135deg, #6366f1, #7c3aed) !important;
	}

	/* ===== DECORATIVE ELEMENTS ===== */
	.diamond-decoration {
	position: fixed;
	bottom: 40px;
	right: 40px;
	width: 80px;
	height: 80px;
	border: 2px solid rgba(124, 58, 237, 0.2);
	transform: rotate(45deg);
	pointer-events: none;
	z-index: 1;
	}

	.star-decoration {
	display: none;
	}
	"""

	with gr.Blocks() as demo:
	# Inject custom CSS and decorative elements (positioned fixed, no DOM space)
	gr.HTML(f"""
	<style>{custom_css}</style>
	<div class="diamond-decoration"></div>
	<div class="star-decoration">
	<svg width="24" height="24" viewBox="0 0 24 24" fill="none">
	<path d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z" fill="#a5b4fc" opacity="0.4"/>
	</svg>
	</div>
	<script>
	// Force dark mode styling (no redirect, just add classes)
	(function() {{
	// Add dark mode classes immediately
	document.documentElement.classList.add('dark');
	document.documentElement.setAttribute('data-theme', 'dark');
	document.body.classList.add('dark');
	document.body.setAttribute('data-theme', 'dark');

	// Also observe for Gradio container and add dark class there
	const observer = new MutationObserver(function(mutations) {{
	const container = document.querySelector('.gradio-container');
	if (container && !container.classList.contains('dark')) {{
	container.classList.add('dark');
	container.setAttribute('data-theme', 'dark');
	}}
	}});
	observer.observe(document.body, {{ childList: true, subtree: true }});
	}})();

	// JavaScript to completely remove upload text from Audio components in demo-row
	function removeAudioUploadText() {{
	// Find all audio components in demo-row
	const demoRows = document.querySelectorAll('.demo-row');
	demoRows.forEach(row => {{
	const audioComponents = row.querySelectorAll('.gradio-audio');
	audioComponents.forEach(audio => {{
	// METHOD 1: Remove ALL text nodes (most aggressive)
	const walker = document.createTreeWalker(
	audio,
	NodeFilter.SHOW_TEXT,
	null,
	false
	);

	const textNodesToRemove = [];
	while(walker.nextNode()) {{
	const node = walker.currentNode;
	// Only keep text that's inside button or audio elements
	const parentTag = node.parentElement?.tagName?.toLowerCase();
	if (parentTag !== 'button' && parentTag !== 'audio') {{
	textNodesToRemove.push(node);
	}}
	}}

	textNodesToRemove.forEach(node => {{
	if (node.parentNode) {{
	node.parentNode.removeChild(node);
	}}
	}});

	// METHOD 2: Hide elements by class/data attributes
	const elementsToHide = audio.querySelectorAll(
	'[class="upload"], [class="placeholder"], [class*="text"], ' +
	'[data-testid="upload"], [data-testid="file"], ' +
	'label.block, .file-upload, p, span:not(:has(button)):not(:has(svg))'
	);
	elementsToHide.forEach(el => {{
	el.style.display = 'none';
	el.style.visibility = 'hidden';
	el.style.fontSize = '0';
	el.style.lineHeight = '0';
	el.style.width = '0';
	el.style.height = '0';
	el.style.opacity = '0';
	el.style.position = 'absolute';
	el.style.left = '-9999px';
	}});

	// METHOD 3: Remove label.block entirely if it exists
	const labelBlocks = audio.querySelectorAll('label.block');
	labelBlocks.forEach(label => {{
	// Only remove if it doesn't contain button or audio
	if (!label.querySelector('button') && !label.querySelector('audio')) {{
	label.remove();
	}}
	}});

	// METHOD 4: Clear innerHTML of divs that don't contain buttons/audio
	const allDivs = audio.querySelectorAll('div');
	allDivs.forEach(div => {{
	if (!div.querySelector('button') && !div.querySelector('audio') && !div.querySelector('svg')) {{
	// Check if div only contains text
	const hasOnlyText = Array.from(div.childNodes).every(node =>
	node.nodeType === Node.TEXT_NODE \|\|
	(node.nodeType === Node.ELEMENT_NODE && !node.querySelector('button, audio, svg'))
	);
	if (hasOnlyText) {{
	div.innerHTML = '';
	}}
	}}
	}});
	}});
	}});
	}}

	// Run immediately
	removeAudioUploadText();

	// Run after DOM changes (MutationObserver)
	const observer = new MutationObserver(() => {{
	removeAudioUploadText();
	}});

	// Start observing after a short delay to ensure Gradio has loaded
	setTimeout(() => {{
	observer.observe(document.body, {{
	childList: true,
	subtree: true
	}});
	}}, 500);

	// Also run on window load
	window.addEventListener('load', removeAudioUploadText);

	// Run periodically for the first 5 seconds (catch late renders)
	let attempts = 0;
	const interval = setInterval(() => {{
	removeAudioUploadText();
	attempts++;
	if (attempts > 10) {{
	clearInterval(interval);
	}}
	}}, 500);
	</script>
	""")

	# ==================== HEADER (FLOATING) ====================
	gr.HTML(f"""
	<div class="header-main">
	<div class="header-left">
	<span class="header-icon">
	<svg width="72" height="72" viewBox="0 0 52 52" fill="none">
	<defs>
	<linearGradient id="logoGradHeader" x1="0%" y1="0%" x2="100%" y2="100%">
	<stop offset="0%" style="stop-color:#7c3aed"/>
	<stop offset="100%" style="stop-color:#6366f1"/>
	</linearGradient>
	</defs>
	<!-- Left: Microphone (rounded capsule + stand) -->
	<!-- Microphone capsule (rounded rect) -->
	<rect x="8" y="12" width="9" height="14" rx="4.5" fill="url(#logoGradHeader)"/>
	<!-- Microphone grill lines (horizontal detail) -->
	<line x1="9" y1="16" x2="14" y2="16" stroke="#000000" stroke-width="0.8" stroke-linecap="round"/>
	<line x1="9" y1="19.5" x2="14" y2="19.5" stroke="#000000" stroke-width="0.8" stroke-linecap="round"/>
	<line x1="9" y1="23" x2="14" y2="23" stroke="#000000" stroke-width="0.8" stroke-linecap="round"/>
	<!-- Arc stand -->
	<path d="M6.5 26c0 2.5 2.2 5 6 5s6-2.5 6-5" stroke="url(#logoGradHeader)" stroke-width="2" fill="none" stroke-linecap="round"/>
	<!-- Pole -->
	<rect x="11.5" y="31" width="2" height="5" fill="url(#logoGradHeader)"/>
	<!-- Base -->
	<rect x="7.5" y="36" width="9" height="2" rx="1" fill="url(#logoGradHeader)"/>

	<!-- Right: Audio Wave Bars (4 vertical bars with different heights) -->
	<rect x="28" y="18" width="3" height="16" rx="1.5" fill="url(#logoGradHeader)" opacity="0.9"/>
	<rect x="34" y="14" width="3" height="24" rx="1.5" fill="url(#logoGradHeader)" opacity="0.95"/>
	<rect x="40" y="20" width="3" height="12" rx="1.5" fill="url(#logoGradHeader)" opacity="0.85"/>
	<rect x="46" y="22" width="3" height="8" rx="1.5" fill="url(#logoGradHeader)" opacity="0.8"/>
	</svg>
	</span>
	<div>
	<span class="header-title">VoiceKit</span>
	<span class="header-subtitle">MCP Server</span>
	</div>
	</div>
	<button class="docs-button" onclick="document.getElementById('docsModal').classList.add('active')">
	<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
	<path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/>
	<polyline points="14 2 14 8 20 8"/>
	<line x1="16" y1="13" x2="8" y2="13"/>
	<line x1="16" y1="17" x2="8" y2="17"/>
	<polyline points="10 9 9 9 8 9"/>
	</svg>
	DOCS
	</button>
	</div>

	<!-- DOCS Modal -->
	<div id="docsModal" class="docs-modal-overlay" onclick="if(event.target === this) this.classList.remove('active')">
	<div class="docs-modal">
	<div class="docs-modal-header">
	<div class="docs-modal-title">
	<svg width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="#a78bfa" stroke-width="2">
	<path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/>
	<polyline points="14 2 14 8 20 8"/>
	</svg>
	Documentation
	</div>
	<button class="docs-modal-close" onclick="document.getElementById('docsModal').classList.remove('active')">×</button>
	</div>
	<div class="docs-modal-content">
	{readme_html}
	</div>
	</div>
	</div>
	""")

	# ==================== TOP ROW: QUICK START + AVAILABLE TOOLS ====================
	with gr.Row(equal_height=True):
	# QUICK START CARD
	with gr.Column(scale=1):
	gr.HTML("""
	<div class="card" style="min-height: 550px;">
	<div class="card-title">
	<svg width="18" height="18" viewBox="0 0 24 24" fill="#7c3aed" style="margin-right: 8px;">
	<path d="M19.14 12.94c.04-.31.06-.63.06-.94 0-.31-.02-.63-.06-.94l2.03-1.58c.18-.14.23-.41.12-.61l-1.92-3.32c-.12-.22-.37-.29-.59-.22l-2.39.96c-.5-.38-1.03-.7-1.62-.94l-.36-2.54c-.04-.24-.24-.41-.48-.41h-3.84c-.24 0-.43.17-.47.41l-.36 2.54c-.59.24-1.13.57-1.62.94l-2.39-.96c-.22-.08-.47 0-.59.22L2.74 8.87c-.12.21-.08.47.12.61l2.03 1.58c-.04.31-.06.63-.06.94s.02.63.06.94l-2.03 1.58c-.18.14-.23.41-.12.61l1.92 3.32c.12.22.37.29.59.22l2.39-.96c.5.38 1.03.7 1.62.94l.36 2.54c.05.24.24.41.48.41h3.84c.24 0 .44-.17.47-.41l.36-2.54c.59-.24 1.13-.56 1.62-.94l2.39.96c.22.08.47 0 .59-.22l1.92-3.32c.12-.22.07-.47-.12-.61l-2.01-1.58zM12 15.6c-1.98 0-3.6-1.62-3.6-3.6s1.62-3.6 3.6-3.6 3.6 1.62 3.6 3.6-1.62 3.6-3.6 3.6z"/>
	</svg>
	QUICK START
	</div>

	<div class="terminal-window">
	<!-- Terminal Header with Dots and Filename -->
	<div class="terminal-header">
	<div class="terminal-dots">
	<div class="terminal-dot red"></div>
	<div class="terminal-dot yellow"></div>
	<div class="terminal-dot green"></div>
	</div>
	<div class="terminal-title">claude_desktop_config.json</div>
	<div style="width: 60px;"></div> <!-- Spacer for center alignment -->
	</div>

	<!-- Terminal Body with Line Numbers and Code -->
	<div class="terminal-body">
	<div class="line-numbers">
	<div class="line-num">1</div>
	<div class="line-num">2</div>
	<div class="line-num">3</div>
	<div class="line-num">4</div>
	<div class="line-num">5</div>
	<div class="line-num">6</div>
	<div class="line-num">7</div>
	<div class="line-num">8</div>
	<div class="line-num">9</div>
	<div class="line-num">10</div>
	<div class="line-num">11</div>
	<div class="line-num">12</div>
	</div>
	<div class="code-content">
	<div class="code-line"><span class="json-bracket">{</span></div>
	<div class="code-line"> <span class="json-key">"mcpServers"</span><span class="json-colon">:</span> <span class="json-bracket">{</span></div>
	<div class="code-line"> <span class="json-key">"voicekit"</span><span class="json-colon">:</span> <span class="json-bracket">{</span></div>
	<div class="code-line"> <span class="json-key">"command"</span><span class="json-colon">:</span> <span class="json-string">"npx"</span><span class="json-comma">,</span></div>
	<div class="code-line"> <span class="json-key">"args"</span><span class="json-colon">:</span> <span class="json-bracket">[</span></div>
	<div class="code-line"> <span class="json-string">"-y"</span><span class="json-comma">,</span></div>
	<div class="code-line"> <span class="json-string">"mcp-remote"</span><span class="json-comma">,</span></div>
	<div class="code-line"> <span class="json-string">"https://mcp-1st-birthday-voicekit.hf.space/gradio_api/mcp/sse"</span></div>
	<div class="code-line"> <span class="json-bracket">]</span></div>
	<div class="code-line"> <span class="json-bracket">}</span></div>
	<div class="code-line"> <span class="json-bracket">}</span></div>
	<div class="code-line"><span class="json-bracket">}</span></div>
	</div>
	</div>
	</div>

	<button class="copy-button" onclick="navigator.clipboard.writeText(JSON.stringify({mcpServers:{voicekit:{command:'npx',args:['-y','mcp-remote','https://mcp-1st-birthday-voicekit.hf.space/gradio_api/mcp/sse']}}},null,2))">
	<svg width="16" height="16" viewBox="0 0 24 24" fill="white" style="display: inline-block; vertical-align: middle;">
	<rect x="9" y="9" width="13" height="13" rx="2" ry="2" fill="white"/>
	<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" fill="none" stroke="white" stroke-width="2"/>
	</svg>
	COPY CONFIG
	</button>
	</div>
	""")

	# AVAILABLE TOOLS CARD
	with gr.Column(scale=1):
	gr.HTML("""
	<div class="card" style="min-height: 550px;">
	<div class="card-title">
	<svg width="18" height="18" viewBox="0 0 24 24" fill="#7c3aed" style="margin-right: 8px;">
	<path d="M22.7 19l-9.1-9.1c.9-2.3.4-5-1.5-6.9-2-2-5-2.4-7.4-1.3L9 6 6 9 1.6 4.7C.4 7.1.9 10.1 2.9 12.1c1.9 1.9 4.6 2.4 6.9 1.5l9.1 9.1c.4.4 1 .4 1.4 0l2.3-2.3c.5-.4.5-1.1.1-1.4z"/>
	</svg>
	AVAILABLE TOOLS
	</div>
	<table class="tools-table">
	<thead>
	<tr>
	<th>TOOL</th>
	<th>PURPOSE</th>
	<th>INPUT</th>
	<th>OUTPUT</th>
	</tr>
	</thead>
	<tbody>
	<tr>
	<td>
	<div style="display: flex; align-items: center; gap: 12px;">
	<svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M21 16V8L12 4L3 8V16L12 20L21 16Z" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<path d="M12 4V12M12 12V20M12 12L21 8M12 12L3 8" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<circle cx="12" cy="12" r="2" fill="#A855F7"/>
	<circle cx="16.5" cy="10" r="1.5" fill="#A855F7"/>
	<circle cx="7.5" cy="14" r="1.5" fill="#A855F7"/>
	<path d="M12 12L16.5 10M12 12L7.5 14" stroke="#A855F7" stroke-width="1.5" stroke-linecap="round"/>
	</svg>
	<span class="tool-name">extract_embedding</span>
	</div>
	</td>
	<td>Extract 768-dim voice fingerprint</td>
	<td>audio_base64</td>
	<td>embedding, model, dim</td>
	</tr>
	<tr>
	<td>
	<div style="display: flex; align-items: center; gap: 12px;">
	<svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M2 10V14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M5 8V16" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M8 11V13" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M22 10V14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M19 7V17" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M16 11V13" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M10 12H14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M10 12L11.5 10.5" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M10 12L11.5 13.5" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M14 12L12.5 10.5" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M14 12L12.5 13.5" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	</svg>
	<span class="tool-name">match_voice</span>
	</div>
	</td>
	<td>Compare two voice similarities</td>
	<td>audio1_base64, audio2_base64</td>
	<td>similarity, tone_score</td>
	</tr>
	<tr>
	<td>
	<div style="display: flex; align-items: center; gap: 12px;">
	<svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M22 10C22 10 20 4 17 4C14 4 12 16 9 16C6 16 4 10 2 10" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<g transform="translate(13, 11)">
	<circle cx="5" cy="5" r="4" stroke="#A855F7" stroke-width="1.5"/>
	<path d="M8 8L11 11" stroke="#A855F7" stroke-width="1.5" stroke-linecap="round"/>
	</g>
	</svg>
	<span class="tool-name">analyze_acoustics</span>
	</div>
	</td>
	<td>Analyze pitch, energy, rhythm, tempo</td>
	<td>audio_base64</td>
	<td>pitch, energy, rhythm, tempo</td>
	</tr>
	<tr>
	<td>
	<div style="display: flex; align-items: center; gap: 12px;">
	<svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M2 12C2 12 4 5 7 5C10 5 11 19 14 19C15.5 19 16.5 15 16.5 15" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<path d="M19 7H22" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M19 12H22" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	<path d="M19 17H22" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/>
	</svg>
	<span class="tool-name">transcribe_audio</span>
	</div>
	</td>
	<td>Convert speech to text</td>
	<td>audio_base64, language</td>
	<td>text, language, model</td>
	</tr>
	<tr>
	<td>
	<div style="display: flex; align-items: center; gap: 12px;">
	<svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M12 5V19" stroke="#A855F7" stroke-width="2.5" stroke-linecap="round"/>
	<path d="M9 8V16" stroke="#A855F7" stroke-width="2.5" stroke-linecap="round"/>
	<path d="M15 8V16" stroke="#A855F7" stroke-width="2.5" stroke-linecap="round"/>
	<path d="M5 4H3V20H5" stroke="#A855F7" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
	<path d="M19 4H21V20H19" stroke="#A855F7" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
	</svg>
	<span class="tool-name">isolate_voice</span>
	</div>
	</td>
	<td>Remove background music/noise</td>
	<td>audio_base64</td>
	<td>isolated_audio_base64, metadata</td>
	</tr>
	<tr>
	<td>
	<div style="display: flex; align-items: center; gap: 12px;">
	<svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<circle cx="12" cy="12" r="9" stroke="#A855F7" stroke-width="1" opacity="0.3"/>
	<path d="M12 3V21" stroke="#A855F7" stroke-width="1" opacity="0.3"/>
	<path d="M4.2 7.5L19.8 16.5" stroke="#A855F7" stroke-width="1" opacity="0.3"/>
	<path d="M19.8 7.5L4.2 16.5" stroke="#A855F7" stroke-width="1" opacity="0.3"/>
	<path d="M12 5L18 9L16.5 18H7.5L6 9L12 5Z" fill="#A855F7" fill-opacity="0.4" stroke="#A855F7" stroke-width="2" stroke-linejoin="round"/>
	<circle cx="12" cy="5" r="1.5" fill="#A855F7"/>
	<circle cx="18" cy="9" r="1.5" fill="#A855F7"/>
	<circle cx="16.5" cy="18" r="1.5" fill="#A855F7"/>
	<circle cx="7.5" cy="18" r="1.5" fill="#A855F7"/>
	<circle cx="6" cy="9" r="1.5" fill="#A855F7"/>
	</svg>
	<span class="tool-name">grade_voice</span>
	</div>
	</td>
	<td>5-metric comprehensive analysis</td>
	<td>user_audio, reference_audio, text, category</td>
	<td>overall, metrics, feedback</td>
	</tr>
	</tbody>
	</table>
	</div>
	""")

	# ==================== FIRST ROW: 3 DEMO CARDS ====================
	with gr.Row(equal_height=True, elem_classes="demo-row"):
	# EXTRACT EMBEDDING
	with gr.Column(scale=1, elem_classes="demo-card-column"):
	gr.HTML("""
	<div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;">
	<svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M21 16V8L12 4L3 8V16L12 20L21 16Z" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<path d="M12 4V12M12 12V20M12 12L21 8M12 12L3 8" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<circle cx="12" cy="12" r="2" fill="#7c3aed"/>
	<circle cx="16.5" cy="10" r="1.5" fill="#7c3aed"/>
	<circle cx="7.5" cy="14" r="1.5" fill="#7c3aed"/>
	<path d="M12 12L16.5 10M12 12L7.5 14" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round"/>
	</svg>
	<div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;">
	EXTRACT EMBEDDING
	</div>
	</div>
	""")
	embedding_audio = gr.Audio(
	type="filepath",
	label="Audio Input",
	show_label=False,
	format="wav"
	)
	embedding_btn = gr.Button("EXTRACT", variant="primary", size="lg", elem_classes="custom-action-btn")
	embedding_output = gr.HTML(value=create_embedding_empty())

	embedding_btn.click(
	demo_extract_embedding,
	inputs=[embedding_audio],
	outputs=[embedding_output],
	api_visibility="private"
	)

	# COMPARE VOICES
	with gr.Column(scale=1, elem_classes="demo-card-column"):
	gr.HTML("""
	<div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;">
	<svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M2 10V14" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M5 8V16" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M8 11V13" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M22 10V14" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M19 7V17" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M16 11V13" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M10 12H14" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M10 12L11.5 10.5" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M10 12L11.5 13.5" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M14 12L12.5 10.5" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M14 12L12.5 13.5" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	</svg>
	<div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;">
	MATCH VOICE
	</div>
	</div>
	""")
	with gr.Row():
	compare_audio1 = gr.Audio(
	type="filepath",
	label="Audio 1",
	show_label=False,
	format="wav"
	)
	compare_audio2 = gr.Audio(
	type="filepath",
	label="Audio 2",
	show_label=False,
	format="wav"
	)
	compare_btn = gr.Button("COMPARE", variant="primary", size="lg", elem_classes="custom-action-btn")
	compare_output = gr.HTML(value=create_compare_empty())

	compare_btn.click(
	demo_match_voice,
	inputs=[compare_audio1, compare_audio2],
	outputs=[compare_output],
	api_visibility="private"
	)

	# ACOUSTIC ANALYSIS
	with gr.Column(scale=1, elem_classes="demo-card-column"):
	gr.HTML("""
	<div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;">
	<svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M22 10C22 10 20 4 17 4C14 4 12 16 9 16C6 16 4 10 2 10" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<g transform="translate(13, 11)">
	<circle cx="5" cy="5" r="4" stroke="#7c3aed" stroke-width="1.5"/>
	<path d="M8 8L11 11" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round"/>
	</g>
	</svg>
	<div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;">
	ANALYZE ACOUSTICS
	</div>
	</div>
	""")
	acoustic_audio = gr.Audio(
	type="filepath",
	label="Audio Input",
	show_label=False,
	format="wav"
	)
	acoustic_btn = gr.Button("ANALYZE", variant="primary", size="lg", elem_classes="custom-action-btn")
	acoustic_output = gr.HTML(value=create_acoustic_empty())

	acoustic_btn.click(
	demo_acoustic_analysis,
	inputs=[acoustic_audio],
	outputs=[acoustic_output],
	api_visibility="private"
	)

	# ==================== SECOND ROW: 3 MORE DEMO CARDS ====================
	with gr.Row(equal_height=True, elem_classes="demo-row"):
	# AUDIO TRANSCRIPTION
	with gr.Column(scale=1, elem_classes="demo-card-column"):
	gr.HTML("""
	<div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;">
	<svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M2 12C2 12 4 5 7 5C10 5 11 19 14 19C15.5 19 16.5 15 16.5 15" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
	<path d="M19 7H22" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M19 12H22" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	<path d="M19 17H22" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/>
	</svg>
	<div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;">
	TRANSCRIBE AUDIO
	</div>
	</div>
	""")
	transcribe_audio_input = gr.Audio(
	type="filepath",
	label="Audio Input",
	show_label=False,
	format="wav"
	)
	transcribe_btn = gr.Button("TRANSCRIBE", variant="primary", size="lg", elem_classes="custom-action-btn")
	transcribe_output = gr.HTML(value=create_transcription_empty())

	transcribe_btn.click(
	lambda audio: demo_transcribe_audio(audio, "en"),
	inputs=[transcribe_audio_input],
	outputs=[transcribe_output],
	api_visibility="private"
	)

	# CLEAN AUDIO EXTRACTION
	with gr.Column(scale=1, elem_classes="demo-card-column"):
	gr.HTML("""
	<div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;">
	<svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<path d="M12 5V19" stroke="#7c3aed" stroke-width="2.5" stroke-linecap="round"/>
	<path d="M9 8V16" stroke="#7c3aed" stroke-width="2.5" stroke-linecap="round"/>
	<path d="M15 8V16" stroke="#7c3aed" stroke-width="2.5" stroke-linecap="round"/>
	<path d="M5 4H3V20H5" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
	<path d="M19 4H21V20H19" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
	</svg>
	<div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;">
	ISOLATE VOICE
	</div>
	</div>
	""")
	clean_audio_input = gr.Audio(
	type="filepath",
	label="Audio with Background",
	show_label=False,
	format="wav"
	)
	clean_btn = gr.Button("EXTRACT VOICE", variant="primary", size="lg", elem_classes="custom-action-btn")
	clean_audio_output = gr.Audio(label="Clean Audio", type="filepath", visible=True)

	clean_btn.click(
	demo_clean_extraction,
	inputs=[clean_audio_input],
	outputs=[clean_audio_output],
	api_visibility="private"
	)

	# VOICE SIMILARITY
	with gr.Column(scale=1, elem_classes="demo-card-column"):
	gr.HTML("""
	<div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;">
	<svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;">
	<circle cx="12" cy="12" r="9" stroke="#7c3aed" stroke-width="1" opacity="0.3"/>
	<path d="M12 3V21" stroke="#7c3aed" stroke-width="1" opacity="0.3"/>
	<path d="M4.2 7.5L19.8 16.5" stroke="#7c3aed" stroke-width="1" opacity="0.3"/>
	<path d="M19.8 7.5L4.2 16.5" stroke="#7c3aed" stroke-width="1" opacity="0.3"/>
	<path d="M12 5L18 9L16.5 18H7.5L6 9L12 5Z" fill="#7c3aed" fill-opacity="0.4" stroke="#7c3aed" stroke-width="2" stroke-linejoin="round"/>
	<circle cx="12" cy="5" r="1.5" fill="#7c3aed"/>
	<circle cx="18" cy="9" r="1.5" fill="#7c3aed"/>
	<circle cx="16.5" cy="18" r="1.5" fill="#7c3aed"/>
	<circle cx="7.5" cy="18" r="1.5" fill="#7c3aed"/>
	<circle cx="6" cy="9" r="1.5" fill="#7c3aed"/>
	</svg>
	<div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;">
	GRADE VOICE
	</div>
	</div>
	""")
	with gr.Row():
	similarity_user_audio = gr.Audio(
	type="filepath",
	label="User Audio",
	show_label=False,
	format="wav"
	)
	similarity_ref_audio = gr.Audio(
	type="filepath",
	label="Reference Audio",
	show_label=False,
	format="wav"
	)
	similarity_btn = gr.Button("ANALYZE", variant="primary", size="lg", elem_classes="custom-action-btn")
	similarity_output = gr.HTML(value=create_similarity_empty())

	similarity_btn.click(
	demo_voice_similarity,
	inputs=[similarity_user_audio, similarity_ref_audio],
	outputs=[similarity_output],
	api_visibility="private"
	)


	# ==================== MCP TOOL INTERFACES (HIDDEN, API ONLY) ====================
	with gr.Row(visible=False):
	# extract_embedding
	mcp_emb_input = gr.Textbox()
	mcp_emb_output = gr.Textbox()
	mcp_emb_btn = gr.Button()
	mcp_emb_btn.click(extract_embedding, inputs=[mcp_emb_input], outputs=[mcp_emb_output])

	# match_voice
	mcp_cmp_input1 = gr.Textbox()
	mcp_cmp_input2 = gr.Textbox()
	mcp_cmp_output = gr.Textbox()
	mcp_cmp_btn = gr.Button()
	mcp_cmp_btn.click(match_voice, inputs=[mcp_cmp_input1, mcp_cmp_input2], outputs=[mcp_cmp_output])

	# analyze_acoustics
	mcp_ac_input = gr.Textbox()
	mcp_ac_output = gr.Textbox()
	mcp_ac_btn = gr.Button()
	mcp_ac_btn.click(analyze_acoustics, inputs=[mcp_ac_input], outputs=[mcp_ac_output])

	# transcribe_audio
	mcp_tr_input = gr.Textbox()
	mcp_tr_lang = gr.Textbox(value="en")
	mcp_tr_output = gr.Textbox()
	mcp_tr_btn = gr.Button()
	mcp_tr_btn.click(transcribe_audio, inputs=[mcp_tr_input, mcp_tr_lang], outputs=[mcp_tr_output])

	# isolate_voice
	mcp_iso_input = gr.Textbox()
	mcp_iso_output = gr.Textbox()
	mcp_iso_btn = gr.Button()
	mcp_iso_btn.click(isolate_voice, inputs=[mcp_iso_input], outputs=[mcp_iso_output])

	# grade_voice
	mcp_sim_user = gr.Textbox()
	mcp_sim_ref = gr.Textbox()
	mcp_sim_text = gr.Textbox()
	mcp_sim_cat = gr.Textbox(value="meme")
	mcp_sim_output = gr.Textbox()
	mcp_sim_btn = gr.Button()
	mcp_sim_btn.click(grade_voice, inputs=[mcp_sim_user, mcp_sim_ref, mcp_sim_text, mcp_sim_cat], outputs=[mcp_sim_output])


	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	mcp_server=True
	)