Spaces:
Running
Running
| """ | |
| VoiceKit - MCP Server for Voice Analysis | |
| 6 MCP tools for voice processing (all accept base64 audio): | |
| - Embedding extraction, voice comparison, acoustic analysis | |
| - Speech-to-text, voice isolation, similarity analysis | |
| MCP Endpoint: https://mcp-1st-birthday-voicekit.hf.space/gradio_api/mcp/sse | |
| """ | |
| import gradio as gr | |
| import base64 | |
| import os | |
| import json | |
| import tempfile | |
| import math | |
| import re | |
| # Set Gradio temp directory to current directory | |
| GRADIO_TEMP_DIR = os.path.join(os.getcwd(), "gradio_temp") | |
| os.makedirs(GRADIO_TEMP_DIR, exist_ok=True) | |
| os.environ['GRADIO_TEMP_DIR'] = GRADIO_TEMP_DIR | |
| tempfile.tempdir = GRADIO_TEMP_DIR | |
| # Modal connection (requires MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets) | |
| try: | |
| import modal | |
| AudioAnalyzer = modal.Cls.from_name("voice-semantle", "AudioAnalyzer") | |
| analyzer = AudioAnalyzer() | |
| modal_available = True | |
| print("Modal connected!") | |
| except Exception as e: | |
| modal_available = False | |
| analyzer = None | |
| print(f"Modal not available: {e}") | |
| # Load README.md and convert to HTML | |
| def load_readme_as_html(): | |
| """Load README.md and convert markdown to HTML""" | |
| try: | |
| with open("README.md", "r", encoding="utf-8") as f: | |
| content = f.read() | |
| # Remove YAML front matter | |
| content = re.sub(r'^---\n.*?\n---\n', '', content, flags=re.DOTALL) | |
| html = content | |
| # Headers | |
| html = re.sub(r'^### (.+)$', r'<h3>\1</h3>', html, flags=re.MULTILINE) | |
| html = re.sub(r'^## (.+)$', r'<h2>\1</h2>', html, flags=re.MULTILINE) | |
| html = re.sub(r'^# (.+)$', r'<h1>\1</h1>', html, flags=re.MULTILINE) | |
| # Code blocks - preserve content without adding extra newlines | |
| def format_code_block(match): | |
| code = match.group(2).strip() | |
| # Replace internal newlines with a placeholder, then restore after processing | |
| # This prevents the paragraph logic from adding extra breaks | |
| code_escaped = code.replace('\n', '<!-- NEWLINE -->') | |
| return f'<pre><code>{code_escaped}</code></pre>' | |
| html = re.sub(r'```(\w*)\n(.*?)```', format_code_block, html, flags=re.DOTALL) | |
| # Images - convert relative paths to HuggingFace raw file URLs | |
| # Handle both <img> tags and markdown image syntax | |
| HF_BASE_URL = "https://huggingface.co/spaces/MCP-1st-Birthday/voicekit/resolve/main" | |
| def convert_image_path(match): | |
| src = match.group(1) | |
| # If it's a relative path (not starting with http), convert to HF URL | |
| if not src.startswith('http'): | |
| src = f"{HF_BASE_URL}/{src}" | |
| return f'<img src="{src}" style="max-width:100%; height:auto; border-radius:8px; margin:12px 0;">' | |
| html = re.sub(r'<img src="([^"]+)"[^>]*>', convert_image_path, html) | |
| # Inline code (but not inside <pre><code> blocks) | |
| html = re.sub(r'`([^`]+)`', r'<code>\1</code>', html) | |
| # Bold | |
| html = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', html) | |
| # Links | |
| html = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<a href="\2" target="_blank">\1</a>', html) | |
| # Tables | |
| lines = html.split('\n') | |
| in_table = False | |
| table_html = [] | |
| new_lines = [] | |
| for line in lines: | |
| if '|' in line and line.strip().startswith('|'): | |
| if not in_table: | |
| in_table = True | |
| table_html = ['<table>'] | |
| if re.match(r'^\|[\s\-:|]+\|$', line.strip()): | |
| continue | |
| cells = [c.strip() for c in line.strip().split('|')[1:-1]] | |
| if len(table_html) == 1: | |
| table_html.append('<thead><tr>') | |
| for cell in cells: | |
| table_html.append(f'<th>{cell}</th>') | |
| table_html.append('</tr></thead><tbody>') | |
| else: | |
| table_html.append('<tr>') | |
| for cell in cells: | |
| table_html.append(f'<td>{cell}</td>') | |
| table_html.append('</tr>') | |
| else: | |
| if in_table: | |
| table_html.append('</tbody></table>') | |
| new_lines.append(''.join(table_html)) | |
| table_html = [] | |
| in_table = False | |
| new_lines.append(line) | |
| if in_table: | |
| table_html.append('</tbody></table>') | |
| new_lines.append(''.join(table_html)) | |
| html = '\n'.join(new_lines) | |
| # Lists | |
| html = re.sub(r'^- (.+)$', r'<li>\1</li>', html, flags=re.MULTILINE) | |
| html = re.sub(r'(<li>.*</li>\n?)+', r'<ul>\g<0></ul>', html) | |
| # Paragraphs - skip lines that are inside pre/code blocks | |
| lines = html.split('\n') | |
| result = [] | |
| for line in lines: | |
| stripped = line.strip() | |
| if stripped and not stripped.startswith('<') and not stripped.startswith('```'): | |
| result.append(f'<p>{stripped}</p>') | |
| else: | |
| result.append(line) | |
| # Join and restore newlines in code blocks | |
| final_html = '\n'.join(result) | |
| final_html = final_html.replace('<!-- NEWLINE -->', '\n') | |
| # Escape curly braces for f-string compatibility | |
| final_html = final_html.replace('{', '{{').replace('}', '}}') | |
| return final_html | |
| except Exception as e: | |
| return f"<p>Error loading README: {e}</p>" | |
| readme_html = load_readme_as_html() | |
| def file_to_base64(file_path: str) -> str: | |
| """Convert file path to base64 string""" | |
| if not file_path: | |
| return "" | |
| with open(file_path, "rb") as f: | |
| return base64.b64encode(f.read()).decode() | |
| # ============================================================================ | |
| # MCP Tools (all accept base64 directly) | |
| # ============================================================================ | |
| def extract_embedding(audio_base64: str) -> str: | |
| """ | |
| Extract voice embedding using Wav2Vec2. | |
| Returns a 768-dimensional vector representing voice characteristics. | |
| Useful for voice comparison, speaker identification, etc. | |
| Args: | |
| audio_base64: Audio file as base64 encoded string | |
| Returns: | |
| embedding (768-dim list), model, dim | |
| """ | |
| if not modal_available: | |
| return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."}) | |
| if not audio_base64: | |
| return json.dumps({"error": "No audio provided"}) | |
| try: | |
| result = analyzer.extract_embedding.remote(audio_base64) | |
| if "embedding" in result: | |
| result["embedding_preview"] = result["embedding"][:5] + ["..."] | |
| result["embedding_length"] = len(result["embedding"]) | |
| del result["embedding"] | |
| return json.dumps(result, ensure_ascii=False, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}) | |
| def match_voice(audio1_base64: str, audio2_base64: str) -> str: | |
| """ | |
| Compare similarity between two voices. | |
| Extracts Wav2Vec2 embeddings and calculates cosine similarity. | |
| Useful for checking if the same person spoke with similar tone. | |
| Args: | |
| audio1_base64: First audio as base64 encoded string | |
| audio2_base64: Second audio as base64 encoded string | |
| Returns: | |
| similarity (0-1), tone_score (0-100) | |
| """ | |
| if not modal_available: | |
| return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."}) | |
| if not audio1_base64 or not audio2_base64: | |
| return json.dumps({"error": "Both audio files required"}) | |
| try: | |
| result = analyzer.compare_voices.remote(audio1_base64, audio2_base64) | |
| return json.dumps(result, ensure_ascii=False, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}) | |
| def analyze_acoustics(audio_base64: str) -> str: | |
| """ | |
| Analyze acoustic features of audio. | |
| Extracts pitch, energy, rhythm, tempo, and spectral characteristics. | |
| Useful for understanding voice expressiveness and characteristics. | |
| Args: | |
| audio_base64: Audio file as base64 encoded string | |
| Returns: | |
| pitch, energy, rhythm, tempo, spectral information | |
| """ | |
| if not modal_available: | |
| return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."}) | |
| if not audio_base64: | |
| return json.dumps({"error": "No audio provided"}) | |
| try: | |
| result = analyzer.analyze_acoustic_features.remote(audio_base64) | |
| return json.dumps(result, ensure_ascii=False, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}) | |
| def transcribe_audio(audio_base64: str, language: str = "en") -> str: | |
| """ | |
| Convert audio to text (Speech-to-Text). | |
| Uses ElevenLabs Scribe v1 model for high-quality speech recognition. | |
| Supports various languages. | |
| Args: | |
| audio_base64: Audio file as base64 encoded string | |
| language: Language code (e.g., "en", "ko", "ja"). Default is "en" | |
| Returns: | |
| text, language, model | |
| """ | |
| if not modal_available: | |
| return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."}) | |
| if not audio_base64: | |
| return json.dumps({"error": "No audio provided"}) | |
| try: | |
| result = analyzer.transcribe_audio.remote(audio_base64, language) | |
| return json.dumps(result, ensure_ascii=False, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}) | |
| def isolate_voice(audio_base64: str) -> str: | |
| """ | |
| Remove background music (BGM) and extract voice only. | |
| Uses ElevenLabs Voice Isolator to remove music, noise, etc. | |
| Useful for memes, songs, and other audio with background sounds. | |
| Args: | |
| audio_base64: Audio file as base64 encoded string | |
| Returns: | |
| isolated_audio_base64, metadata (bgm_detected, sizes, duration) | |
| """ | |
| if not modal_available: | |
| return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."}) | |
| if not audio_base64: | |
| return json.dumps({"error": "No audio provided"}) | |
| try: | |
| result = analyzer.isolate_voice.remote(audio_base64) | |
| return json.dumps(result, ensure_ascii=False, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}) | |
| def grade_voice( | |
| user_audio_base64: str, | |
| reference_audio_base64: str, | |
| reference_text: str = "", | |
| category: str = "meme" | |
| ) -> str: | |
| """ | |
| Comprehensively compare and analyze user voice with reference voice. | |
| Evaluates with 5 metrics: | |
| - pronunciation: Pronunciation accuracy (STT-based) | |
| - tone: Voice timbre similarity (Wav2Vec2 embedding) | |
| - pitch: Pitch matching | |
| - rhythm: Rhythm sense | |
| - energy: Energy expressiveness | |
| Args: | |
| user_audio_base64: User audio as base64 encoded string | |
| reference_audio_base64: Reference audio as base64 encoded string | |
| reference_text: Reference text (optional, enables pronunciation scoring) | |
| category: Category (meme, song, movie) - determines weights | |
| Returns: | |
| overall_score, metrics, weak_points, strong_points, feedback | |
| """ | |
| if not modal_available: | |
| return json.dumps({"error": "Modal not available. Please set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in HF Secrets."}) | |
| if not user_audio_base64 or not reference_audio_base64: | |
| return json.dumps({"error": "Both user and reference audio required"}) | |
| try: | |
| result = analyzer.analyze_audio.remote( | |
| user_audio_base64=user_audio_base64, | |
| reference_audio_base64=reference_audio_base64, | |
| reference_text=reference_text if reference_text else None, | |
| challenge_id="mcp_analysis", | |
| category=category, | |
| ) | |
| # Simplify output for backend/API use | |
| metrics = result.get("metrics", {}) | |
| simple_result = { | |
| "pitch": metrics.get("pitch", 0), | |
| "rhythm": metrics.get("rhythm", 0), | |
| "energy": metrics.get("energy", 0), | |
| "pronunciation": metrics.get("pronunciation", 0), | |
| "transcript": metrics.get("transcript", 0), | |
| "overall": result.get("overall_score", 0), | |
| "user_text": result.get("user_text", "") | |
| } | |
| return json.dumps(simple_result, ensure_ascii=False, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}) | |
| # ============================================================================ | |
| # Demo Functions for UI | |
| # ============================================================================ | |
| def demo_acoustic_analysis(audio_file): | |
| """Acoustic Analysis - Analyze pitch, energy, rhythm, tempo""" | |
| if not audio_file: | |
| return create_acoustic_empty() | |
| audio_b64 = file_to_base64(audio_file) | |
| result_json = analyze_acoustics(audio_b64) | |
| try: | |
| result = json.loads(result_json) | |
| if "error" in result: | |
| return f'''<div style="color: #ef4444; padding: 20px; background: #fee; border-radius: 12px; border: 1px solid #fca5a5;"> | |
| <strong>Error in result:</strong><br>{result.get("error", "Unknown error")} | |
| </div>''' | |
| return create_acoustic_visualization(result) | |
| except Exception as e: | |
| return f'''<div style="color: #ef4444; padding: 20px; background: #fee; border-radius: 12px; border: 1px solid #fca5a5;"> | |
| <strong>Parsing Error:</strong> {str(e)}<br><br> | |
| <strong>Raw Result (first 500 chars):</strong><br> | |
| <code style="display: block; padding: 10px; background: white; border-radius: 4px; overflow-x: auto; font-size: 12px;">{result_json[:500]}</code> | |
| </div>''' | |
| def demo_transcribe_audio(audio_file, language): | |
| """Audio Transcription""" | |
| if not audio_file: | |
| return create_transcription_empty() | |
| audio_b64 = file_to_base64(audio_file) | |
| result_json = transcribe_audio(audio_b64, language) | |
| try: | |
| result = json.loads(result_json) | |
| if "error" in result: | |
| return create_transcription_empty() | |
| text = result.get("text", "") | |
| return create_transcription_visualization(text) | |
| except: | |
| return create_transcription_empty() | |
| def demo_clean_extraction(audio_file): | |
| """Clean Audio Extraction - returns audio file only""" | |
| if not audio_file: | |
| return None | |
| audio_b64 = file_to_base64(audio_file) | |
| result_json = isolate_voice(audio_b64) | |
| try: | |
| result = json.loads(result_json) | |
| if "error" in result: | |
| return None | |
| # Convert isolated audio base64 back to file | |
| import tempfile | |
| isolated_audio_bytes = base64.b64decode(result["isolated_audio_base64"]) | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: | |
| tmp.write(isolated_audio_bytes) | |
| isolated_audio_path = tmp.name | |
| return isolated_audio_path | |
| except: | |
| return None | |
| def demo_extract_embedding(audio_file): | |
| """Extract Embedding - extract voice fingerprint""" | |
| if not audio_file: | |
| return create_embedding_empty() | |
| audio_b64 = file_to_base64(audio_file) | |
| result_json = extract_embedding(audio_b64) | |
| try: | |
| result = json.loads(result_json) | |
| if "error" in result: | |
| return f'''<div style="color: #ef4444; padding: 20px; background: #fee; border-radius: 12px; border: 1px solid #fca5a5;"> | |
| <strong>Error in result:</strong><br>{result.get("error", "Unknown error")} | |
| </div>''' | |
| return create_embedding_visualization(result) | |
| except Exception as e: | |
| return f'''<div style="color: #ef4444; padding: 20px; background: #fee; border-radius: 12px; border: 1px solid #fca5a5;"> | |
| <strong>Parsing Error:</strong> {str(e)}<br><br> | |
| <strong>Raw Result (first 500 chars):</strong><br> | |
| <code style="display: block; padding: 10px; background: white; border-radius: 4px; overflow-x: auto; font-size: 12px;">{result_json[:500]}</code> | |
| </div>''' | |
| def demo_match_voice(audio1, audio2): | |
| """Compare Voices - compare two voice similarities""" | |
| if not audio1 or not audio2: | |
| return create_compare_empty() | |
| audio1_b64 = file_to_base64(audio1) | |
| audio2_b64 = file_to_base64(audio2) | |
| result_json = match_voice(audio1_b64, audio2_b64) | |
| try: | |
| result = json.loads(result_json) | |
| if "error" in result: | |
| return create_compare_empty() | |
| return create_compare_visualization(result) | |
| except: | |
| return create_compare_empty() | |
| def demo_voice_similarity(user_audio, ref_audio): | |
| """Voice Similarity - comprehensive 5-metric analysis""" | |
| if not user_audio or not ref_audio: | |
| return create_similarity_empty() | |
| user_b64 = file_to_base64(user_audio) | |
| ref_b64 = file_to_base64(ref_audio) | |
| result_json = grade_voice(user_b64, ref_b64, "", "meme") | |
| try: | |
| result = json.loads(result_json) | |
| if "error" in result: | |
| return create_similarity_empty() | |
| return create_similarity_visualization(result) | |
| except: | |
| return create_similarity_empty() | |
| # ============================================================================ | |
| # Visualization Functions | |
| # ============================================================================ | |
| def create_acoustic_empty(): | |
| """Empty state for acoustic analysis""" | |
| return """ | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.4); | |
| border: 1px solid rgba(124, 58, 237, 0.2); | |
| border-radius: 16px; | |
| padding: 30px 20px; | |
| text-align: center; | |
| height: 100%; | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <div style="margin-bottom: 12px; opacity: 0.5;"> | |
| <svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;"> | |
| <path d="M22 10C22 10 20 4 17 4C14 4 12 16 9 16C6 16 4 10 2 10" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <g transform="translate(13, 11)"> | |
| <circle cx="5" cy="5" r="4" stroke="#7c3aed" stroke-width="1.5"/> | |
| <path d="M8 8L11 11" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round"/> | |
| </g> | |
| </svg> | |
| </div> | |
| <div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;"> | |
| Upload audio to analyze acoustic features | |
| </div> | |
| </div> | |
| """ | |
| def create_acoustic_visualization(result): | |
| """Acoustic analysis visualization with radar chart""" | |
| pitch = result.get("pitch", {}) | |
| energy = result.get("energy", {}) | |
| rhythm = result.get("rhythm", {}) | |
| tempo = result.get("tempo", 0) | |
| spectral = result.get("spectral", {}) | |
| # Use pre-calculated scores from Modal backend (already 0-100) | |
| pitch_norm = pitch.get("score", 0) | |
| energy_norm = energy.get("score", 0) | |
| rhythm_norm = rhythm.get("score", 0) | |
| spectral_norm = spectral.get("score", 0) | |
| # Tempo: normalize BPM to 0-100 (60-180 BPM range) | |
| tempo_bpm = tempo | |
| tempo_norm = min(100, max(0, (tempo_bpm - 60) / 120 * 100)) if tempo_bpm > 0 else 0 | |
| # Radar chart calculation | |
| center_x, center_y = 150, 150 | |
| radius = 110 | |
| # 5 metrics in order: Pitch(top), Energy(top-right), Rhythm(bottom-right), Tempo(bottom-left), Spectral(top-left) | |
| metrics = [ | |
| ("Pitch", pitch_norm, -90), # 0° - 90° = -90° (top) | |
| ("Energy", energy_norm, -18), # 72° - 90° = -18° (top-right) | |
| ("Rhythm", rhythm_norm, 54), # 144° - 90° = 54° (bottom-right) | |
| ("Tempo", tempo_norm, 126), # 216° - 90° = 126° (bottom-left) | |
| ("Spectral", spectral_norm, 198) # 288° - 90° = 198° (top-left) | |
| ] | |
| # Calculate polygon points for data | |
| data_points = [] | |
| for _, value, angle_deg in metrics: | |
| angle_rad = math.radians(angle_deg) | |
| point_radius = (value / 100) * radius | |
| x = center_x + point_radius * math.cos(angle_rad) | |
| y = center_y + point_radius * math.sin(angle_rad) | |
| data_points.append(f"{x:.2f},{y:.2f}") | |
| # Background concentric pentagons (20, 40, 60, 80, 100) | |
| def create_pentagon_points(scale): | |
| points = [] | |
| for _, _, angle_deg in metrics: | |
| angle_rad = math.radians(angle_deg) | |
| r = radius * scale | |
| x = center_x + r * math.cos(angle_rad) | |
| y = center_y + r * math.sin(angle_rad) | |
| points.append(f"{x:.2f},{y:.2f}") | |
| return " ".join(points) | |
| background_pentagons = "" | |
| for scale in [0.2, 0.4, 0.6, 0.8, 1.0]: | |
| background_pentagons += f'<polygon points="{create_pentagon_points(scale)}" fill="none" stroke="rgba(124, 58, 237, 0.15)" stroke-width="1"/>' | |
| # Axis lines from center to vertices | |
| axis_lines = "" | |
| for _, _, angle_deg in metrics: | |
| angle_rad = math.radians(angle_deg) | |
| x = center_x + radius * math.cos(angle_rad) | |
| y = center_y + radius * math.sin(angle_rad) | |
| axis_lines += f'<line x1="{center_x}" y1="{center_y}" x2="{x:.2f}" y2="{y:.2f}" stroke="rgba(124, 58, 237, 0.3)" stroke-width="1"/>' | |
| # Labels at vertices | |
| labels = "" | |
| for label, value, angle_deg in metrics: | |
| angle_rad = math.radians(angle_deg) | |
| # Position label outside the pentagon | |
| label_radius = radius + 25 | |
| x = center_x + label_radius * math.cos(angle_rad) | |
| y = center_y + label_radius * math.sin(angle_rad) | |
| labels += f'''<text x="{x:.2f}" y="{y:.2f}" text-anchor="middle" dominant-baseline="middle" fill="#a5b4fc" font-size="11" font-weight="600"> | |
| {label} | |
| <tspan x="{x:.2f}" dy="12" fill="#a855f7" font-size="13" font-weight="700">{int(value)}</tspan> | |
| </text>''' | |
| return f""" | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.6); | |
| border: 1px solid rgba(124, 58, 237, 0.3); | |
| border-radius: 16px; | |
| padding: 20px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <svg width="300" height="300" viewBox="0 0 300 300"> | |
| <!-- Background pentagons --> | |
| {background_pentagons} | |
| <!-- Axis lines --> | |
| {axis_lines} | |
| <!-- Data polygon --> | |
| <polygon points="{' '.join(data_points)}" | |
| fill="rgba(124, 58, 237, 0.3)" | |
| stroke="#a855f7" | |
| stroke-width="2"/> | |
| <!-- Data points --> | |
| {''.join([f'<circle cx="{pt.split(",")[0]}" cy="{pt.split(",")[1]}" r="4" fill="#a855f7"/>' for pt in data_points])} | |
| <!-- Labels --> | |
| {labels} | |
| </svg> | |
| </div> | |
| """ | |
| def create_mimicry_empty(): | |
| """Empty state for voice mimicry game""" | |
| return """ | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.4); | |
| border: 1px solid rgba(124, 58, 237, 0.2); | |
| border-radius: 16px; | |
| padding: 30px 20px; | |
| text-align: center; | |
| height: 100%; | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <div style="margin-bottom: 12px; opacity: 0.5;"> | |
| <svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;"> | |
| <defs> | |
| <linearGradient id="micGradEmpty" x1="0%" y1="0%" x2="100%" y2="100%"> | |
| <stop offset="0%" style="stop-color:#8b5cf6"/> | |
| <stop offset="100%" style="stop-color:#6366f1"/> | |
| </linearGradient> | |
| </defs> | |
| <path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z" fill="url(#micGradEmpty)"/> | |
| <path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z" fill="url(#micGradEmpty)"/> | |
| </svg> | |
| </div> | |
| <div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;"> | |
| Upload reference and your voice to see similarity scores | |
| </div> | |
| </div> | |
| """ | |
| def create_mimicry_visualization(result): | |
| """Voice mimicry score visualization with progress bars""" | |
| pronunciation = result.get("pronunciation", 0) | |
| tone = result.get("transcript", 0) # Tone score | |
| pitch = result.get("pitch", 0) | |
| rhythm = result.get("rhythm", 0) | |
| energy = result.get("energy", 0) | |
| def create_progress_bar(label, value): | |
| return f""" | |
| <div style="display: flex; align-items: center; gap: 12px; margin-bottom: 10px;"> | |
| <div style="flex: 1;"> | |
| <div style="font-size: 11px; color: #cbd5e1; margin-bottom: 4px;">{label}</div> | |
| <div style=" | |
| height: 6px; | |
| background: rgba(124, 58, 237, 0.2); | |
| border-radius: 3px; | |
| overflow: hidden; | |
| "> | |
| <div style=" | |
| height: 100%; | |
| width: {value}%; | |
| background: linear-gradient(90deg, #6366f1, #22d3ee); | |
| border-radius: 3px; | |
| "></div> | |
| </div> | |
| </div> | |
| <div style=" | |
| font-size: 14px; | |
| font-weight: 700; | |
| color: #22d3ee; | |
| min-width: 32px; | |
| text-align: right; | |
| ">{value}</div> | |
| </div> | |
| """ | |
| return f""" | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.6); | |
| border: 1px solid rgba(124, 58, 237, 0.3); | |
| border-radius: 16px; | |
| padding: 20px; | |
| height: 100%; | |
| display: flex; | |
| flex-direction: column; | |
| "> | |
| <div style=" | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| margin-bottom: 16px; | |
| padding-bottom: 14px; | |
| border-bottom: 1px solid rgba(124, 58, 237, 0.2); | |
| "> | |
| <div style=" | |
| width: 40px; | |
| height: 40px; | |
| border-radius: 10px; | |
| background: linear-gradient(135deg, #7c3aed, #6366f1); | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| flex-shrink: 0; | |
| "> | |
| <svg width="24" height="24" viewBox="0 0 24 24" fill="none"> | |
| <circle cx="12" cy="12" r="10" fill="rgba(255, 255, 255, 0.2)" stroke="white" stroke-width="1.5"/> | |
| <text x="12" y="16" text-anchor="middle" font-size="10" fill="white" font-weight="bold">AI</text> | |
| </svg> | |
| </div> | |
| <div style="flex: 1; min-width: 0;"> | |
| <div style="font-size: 10px; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;">CLAUDE</div> | |
| <div style="font-size: 11px; color: #cbd5e1; line-height: 1.4;"> | |
| Wow, that voice input, takes analytical skills of course but I'll handle it | |
| </div> | |
| </div> | |
| </div> | |
| <div style="flex: 1;"> | |
| {create_progress_bar("Pronunciation", pronunciation)} | |
| {create_progress_bar("Tone", tone)} | |
| {create_progress_bar("Pitch", pitch)} | |
| {create_progress_bar("Rhythm", rhythm)} | |
| {create_progress_bar("Energy", energy)} | |
| </div> | |
| </div> | |
| """ | |
| def create_transcription_empty(): | |
| """Empty state for transcription""" | |
| return """ | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.4); | |
| border: 1px solid rgba(124, 58, 237, 0.2); | |
| border-radius: 12px; | |
| padding: 20px; | |
| text-align: center; | |
| color: #a5b4fc; | |
| font-size: 13px; | |
| "> | |
| Upload audio to transcribe | |
| </div> | |
| """ | |
| def create_transcription_visualization(text): | |
| """Simple text display for transcription result""" | |
| return f""" | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.6); | |
| border: 1px solid rgba(124, 58, 237, 0.3); | |
| border-radius: 12px; | |
| padding: 20px; | |
| color: #e2e8f0; | |
| font-size: 20px; | |
| line-height: 1.6; | |
| white-space: pre-wrap; | |
| word-wrap: break-word; | |
| "> | |
| {text if text else "Transcription completed"} | |
| </div> | |
| """ | |
| def create_embedding_empty(): | |
| """Empty state for embedding extraction""" | |
| return """ | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.4); | |
| border: 1px solid rgba(124, 58, 237, 0.2); | |
| border-radius: 16px; | |
| padding: 30px 20px; | |
| text-align: center; | |
| height: 100%; | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <div style="margin-bottom: 12px; opacity: 0.5;"> | |
| <svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;"> | |
| <path d="M21 16V8L12 4L3 8V16L12 20L21 16Z" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <path d="M12 4V12M12 12V20M12 12L21 8M12 12L3 8" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <circle cx="12" cy="12" r="2" fill="#A855F7"/> | |
| </svg> | |
| </div> | |
| <div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;"> | |
| Upload audio to extract voice embedding | |
| </div> | |
| </div> | |
| """ | |
| def create_embedding_visualization(result): | |
| """Embedding extraction visualization""" | |
| model = result.get("model", "Wav2Vec2") | |
| dim = result.get("embedding_length", result.get("dim", 768)) | |
| preview = result.get("embedding_preview", []) | |
| # Filter only numeric values to avoid format errors with strings like "..." | |
| if preview: | |
| numeric_preview = [v for v in preview if isinstance(v, (int, float))] | |
| preview_str = ", ".join([f"{v:.4f}" for v in numeric_preview]) if numeric_preview else "..." | |
| else: | |
| preview_str = "..." | |
| return f""" | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.6); | |
| border: 1px solid rgba(124, 58, 237, 0.3); | |
| border-radius: 16px; | |
| padding: 20px; | |
| height: 100%; | |
| display: flex; | |
| flex-direction: column; | |
| "> | |
| <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; padding: 10px; background: rgba(124, 58, 237, 0.1); border-radius: 8px;"> | |
| <div style="font-size: 16px; color: #cbd5e1; font-weight: 600;">Model</div> | |
| <div style="font-size: 18px; font-weight: 700; color: #22d3ee;">{model}</div> | |
| </div> | |
| <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; padding: 10px; background: rgba(124, 58, 237, 0.1); border-radius: 8px;"> | |
| <div style="font-size: 16px; color: #cbd5e1; font-weight: 600;">Dimensions</div> | |
| <div style="font-size: 18px; font-weight: 700; color: #22d3ee;">{dim}</div> | |
| </div> | |
| <div style="padding: 10px; background: rgba(124, 58, 237, 0.1); border-radius: 8px;"> | |
| <div style="font-size: 16px; color: #cbd5e1; font-weight: 600; margin-bottom: 8px;">Preview</div> | |
| <div style="font-size: 14px; font-family: monospace; color: #22d3ee; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;"> | |
| [{preview_str}] | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| def create_compare_empty(): | |
| """Empty state for voice comparison""" | |
| return """ | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.4); | |
| border: 1px solid rgba(124, 58, 237, 0.2); | |
| border-radius: 16px; | |
| padding: 30px 20px; | |
| text-align: center; | |
| height: 100%; | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <div style="margin-bottom: 12px; opacity: 0.5;"> | |
| <svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;"> | |
| <path d="M2 10V14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M5 8V16" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M8 11V13" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M22 10V14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M19 7V17" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M16 11V13" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M10 12H14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| </svg> | |
| </div> | |
| <div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;"> | |
| Upload two audio files to compare voices | |
| </div> | |
| </div> | |
| """ | |
| def create_compare_visualization(result): | |
| """Voice comparison visualization with similarity score""" | |
| similarity = result.get("similarity", 0) | |
| tone_score = result.get("tone_score", 0) | |
| # Convert similarity to percentage | |
| similarity_pct = int(similarity * 100) | |
| # Color based on similarity - Purple theme matching VOICE SIMILARITY | |
| if similarity_pct >= 80: | |
| color = "#c084fc" # Light purple (high score) | |
| elif similarity_pct >= 60: | |
| color = "#a855f7" # Medium purple (medium score) | |
| else: | |
| color = "#7c3aed" # Dark purple (low score) | |
| return f""" | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.6); | |
| border: 1px solid rgba(124, 58, 237, 0.3); | |
| border-radius: 16px; | |
| padding: 20px; | |
| height: 100%; | |
| display: flex; | |
| align-items: flex-end; | |
| justify-content: center; | |
| padding-bottom: 40px; | |
| "> | |
| <div style=" | |
| width: 160px; | |
| height: 160px; | |
| border-radius: 50%; | |
| background: conic-gradient({color} 0deg {similarity_pct * 3.6}deg, rgba(124, 58, 237, 0.2) {similarity_pct * 3.6}deg 360deg); | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <div style=" | |
| width: 130px; | |
| height: 130px; | |
| border-radius: 50%; | |
| background: rgba(10, 10, 26, 0.9); | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <span style="font-size: 40px; font-weight: 700; color: {color};">{similarity_pct}</span> | |
| <span style="font-size: 11px; color: #a5b4fc; letter-spacing: 0.5px;">SIMILARITY</span> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| def create_similarity_empty(): | |
| """Empty state for voice similarity analysis""" | |
| return """ | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.4); | |
| border: 1px solid rgba(124, 58, 237, 0.2); | |
| border-radius: 16px; | |
| padding: 30px 20px; | |
| text-align: center; | |
| height: 100%; | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <div style="margin-bottom: 12px; opacity: 0.5;"> | |
| <svg width="48" height="48" viewBox="0 0 24 24" fill="none" style="margin: 0 auto; display: block;"> | |
| <circle cx="12" cy="12" r="9" stroke="#A855F7" stroke-width="1" opacity="0.3"/> | |
| <path d="M12 5L18 9L16.5 18H7.5L6 9L12 5Z" fill="#A855F7" fill-opacity="0.4" stroke="#A855F7" stroke-width="2" stroke-linejoin="round"/> | |
| <circle cx="12" cy="5" r="1.5" fill="#A855F7"/> | |
| </svg> | |
| </div> | |
| <div style="color: #a5b4fc; font-size: 12px; line-height: 1.5;"> | |
| Upload audio files for comprehensive similarity analysis | |
| </div> | |
| </div> | |
| """ | |
| def create_similarity_visualization(result): | |
| """Voice similarity visualization with radar chart""" | |
| overall = result.get("overall", 0) | |
| pronunciation = result.get("pronunciation", 0) | |
| transcript = result.get("transcript", 0) | |
| pitch = result.get("pitch", 0) | |
| rhythm = result.get("rhythm", 0) | |
| energy = result.get("energy", 0) | |
| # Color based on overall score - Purple theme | |
| if overall >= 80: | |
| color = "#c084fc" # Light purple (high score) | |
| elif overall >= 60: | |
| color = "#a855f7" # Medium purple (medium score) | |
| else: | |
| color = "#7c3aed" # Dark purple (low score) | |
| # Radar chart calculation | |
| center_x, center_y = 150, 150 | |
| radius = 110 | |
| # 5 metrics in order: Pronunciation(top), Transcript(top-right), Pitch(bottom-right), Energy(bottom-left), Rhythm(top-left) | |
| metrics = [ | |
| ("Pronunciation", pronunciation, -90), # 0° - 90° = -90° (top) | |
| ("Transcript", transcript, -18), # 72° - 90° = -18° (top-right) | |
| ("Pitch", pitch, 54), # 144° - 90° = 54° (bottom-right) | |
| ("Energy", energy, 126), # 216° - 90° = 126° (bottom-left) | |
| ("Rhythm", rhythm, 198) # 288° - 90° = 198° (top-left) | |
| ] | |
| # Calculate polygon points for data | |
| data_points = [] | |
| for _, value, angle_deg in metrics: | |
| angle_rad = math.radians(angle_deg) | |
| point_radius = (value / 100) * radius | |
| x = center_x + point_radius * math.cos(angle_rad) | |
| y = center_y + point_radius * math.sin(angle_rad) | |
| data_points.append(f"{x:.2f},{y:.2f}") | |
| # Background concentric pentagons (20, 40, 60, 80, 100) | |
| def create_pentagon_points(scale): | |
| points = [] | |
| for _, _, angle_deg in metrics: | |
| angle_rad = math.radians(angle_deg) | |
| r = radius * scale | |
| x = center_x + r * math.cos(angle_rad) | |
| y = center_y + r * math.sin(angle_rad) | |
| points.append(f"{x:.2f},{y:.2f}") | |
| return " ".join(points) | |
| background_pentagons = "" | |
| for scale in [0.2, 0.4, 0.6, 0.8, 1.0]: | |
| background_pentagons += f'<polygon points="{create_pentagon_points(scale)}" fill="none" stroke="rgba(124, 58, 237, 0.15)" stroke-width="1"/>' | |
| # Axis lines from center to vertices | |
| axis_lines = "" | |
| for _, _, angle_deg in metrics: | |
| angle_rad = math.radians(angle_deg) | |
| x = center_x + radius * math.cos(angle_rad) | |
| y = center_y + radius * math.sin(angle_rad) | |
| axis_lines += f'<line x1="{center_x}" y1="{center_y}" x2="{x:.2f}" y2="{y:.2f}" stroke="rgba(124, 58, 237, 0.3)" stroke-width="1"/>' | |
| # Labels at vertices | |
| labels = "" | |
| for label, value, angle_deg in metrics: | |
| angle_rad = math.radians(angle_deg) | |
| # Position label outside the pentagon | |
| label_radius = radius + 25 | |
| x = center_x + label_radius * math.cos(angle_rad) | |
| y = center_y + label_radius * math.sin(angle_rad) | |
| labels += f'''<text x="{x:.2f}" y="{y:.2f}" text-anchor="middle" dominant-baseline="middle" fill="#a5b4fc" font-size="11" font-weight="600"> | |
| {label} | |
| <tspan x="{x:.2f}" dy="12" fill="#a855f7" font-size="13" font-weight="700">{value}</tspan> | |
| </text>''' | |
| return f""" | |
| <div style=" | |
| background: rgba(10, 10, 26, 0.6); | |
| border: 1px solid rgba(124, 58, 237, 0.3); | |
| border-radius: 16px; | |
| padding: 20px; | |
| display: flex; | |
| align-items: center; | |
| gap: 30px; | |
| "> | |
| <!-- Left: Overall Score Donut --> | |
| <div style="flex: 1; display: flex; align-items: center; justify-content: center;"> | |
| <div style=" | |
| width: 160px; | |
| height: 160px; | |
| border-radius: 50%; | |
| background: conic-gradient({color} 0deg {overall * 3.6}deg, rgba(124, 58, 237, 0.2) {overall * 3.6}deg 360deg); | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <div style=" | |
| width: 130px; | |
| height: 130px; | |
| border-radius: 50%; | |
| background: rgba(10, 10, 26, 0.9); | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| justify-content: center; | |
| "> | |
| <span style="font-size: 40px; font-weight: 700; color: {color};">{overall}</span> | |
| <span style="font-size: 11px; color: #a5b4fc; letter-spacing: 0.5px;">OVERALL</span> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Right: Radar Chart --> | |
| <div style="flex: 1; display: flex; align-items: center; justify-content: center;"> | |
| <svg width="300" height="300" viewBox="0 0 300 300"> | |
| <!-- Background pentagons --> | |
| {background_pentagons} | |
| <!-- Axis lines --> | |
| {axis_lines} | |
| <!-- Data polygon --> | |
| <polygon points="{' '.join(data_points)}" | |
| fill="rgba(124, 58, 237, 0.3)" | |
| stroke="#a855f7" | |
| stroke-width="2"/> | |
| <!-- Data points --> | |
| {''.join([f'<circle cx="{pt.split(",")[0]}" cy="{pt.split(",")[1]}" r="4" fill="#a855f7"/>' for pt in data_points])} | |
| <!-- Labels --> | |
| {labels} | |
| </svg> | |
| </div> | |
| </div> | |
| """ | |
| # Clean audio functions removed - using gr.Audio component directly | |
| # ============================================================================ | |
| # Gradio Interface with Dark Theme | |
| # ============================================================================ | |
| custom_css = """ | |
| /* ===== DARK THEME STYLING (CSS-ONLY) ===== */ | |
| /* This CSS forces dark mode appearance regardless of system/Gradio theme */ | |
| /* All colors are SOLID (not rgba/transparent) to ensure consistent appearance */ | |
| :root { | |
| color-scheme: dark !important; | |
| --body-background-fill: #0a0a1a !important; | |
| --background-fill-primary: #0d0d1a !important; | |
| --background-fill-secondary: #12122a !important; | |
| --block-background-fill: #0d0d1a !important; | |
| --input-background-fill: #1a1a35 !important; | |
| --body-text-color: #e0e7ff !important; | |
| --block-title-text-color: #a5b4fc !important; | |
| --block-label-text-color: #a5b4fc !important; | |
| --input-text-color: #e0e7ff !important; | |
| --neutral-50: #0a0a1a !important; | |
| --neutral-100: #0d0d1a !important; | |
| --neutral-200: #12122a !important; | |
| --neutral-300: #1a1a35 !important; | |
| --neutral-400: #2d2d4a !important; | |
| --neutral-500: #4a4a6a !important; | |
| --neutral-600: #7c7c9a !important; | |
| --neutral-700: #a5b4fc !important; | |
| --neutral-800: #c7d2fe !important; | |
| --neutral-900: #e0e7ff !important; | |
| --neutral-950: #ffffff !important; | |
| } | |
| /* Force dark mode on html and body */ | |
| html, body { | |
| background: #0a0a1a !important; | |
| background-color: #0a0a1a !important; | |
| color: #e0e7ff !important; | |
| } | |
| /* ===== GLOBAL STYLES ===== */ | |
| body { | |
| background: linear-gradient(180deg, #0a0a1a 0%, #0f0f23 100%) !important; | |
| background-color: #0a0a1a !important; | |
| color: #ffffff !important; | |
| font-family: system-ui, -apple-system, sans-serif; | |
| } | |
| /* Override Gradio's light mode backgrounds AND text colors */ | |
| .dark, .light, [data-theme="light"], [data-theme="dark"], | |
| html[data-theme="light"], html[data-theme="dark"], | |
| body.light, body.dark { | |
| --body-background-fill: #0a0a1a !important; | |
| --background-fill-primary: #0d0d1a !important; | |
| --background-fill-secondary: #12122a !important; | |
| --block-background-fill: #0d0d1a !important; | |
| --input-background-fill: #1a1a35 !important; | |
| --body-text-color: #e0e7ff !important; | |
| --block-title-text-color: #a5b4fc !important; | |
| --block-label-text-color: #a5b4fc !important; | |
| --input-text-color: #e0e7ff !important; | |
| --neutral-50: #0a0a1a !important; | |
| --neutral-100: #0d0d1a !important; | |
| --neutral-200: #12122a !important; | |
| --neutral-300: #1a1a35 !important; | |
| --neutral-400: #2d2d4a !important; | |
| --neutral-500: #4a4a6a !important; | |
| --neutral-600: #7c7c9a !important; | |
| --neutral-700: #a5b4fc !important; | |
| --neutral-800: #c7d2fe !important; | |
| --neutral-900: #e0e7ff !important; | |
| --neutral-950: #ffffff !important; | |
| color: #e0e7ff !important; | |
| background: #0a0a1a !important; | |
| background-color: #0a0a1a !important; | |
| } | |
| .gradio-container { | |
| max-width: 100% !important; | |
| width: 100% !important; | |
| padding: 0px 16px 20px 16px !important; | |
| background: #0a0a1a !important; | |
| background-color: #0a0a1a !important; | |
| margin: 0 !important; | |
| } | |
| .gradio-container > .main, | |
| .gradio-container .main, | |
| .main { | |
| max-width: 100% !important; | |
| width: 100% !important; | |
| padding-left: 0 !important; | |
| padding-right: 0 !important; | |
| margin: 0 auto !important; | |
| } | |
| .contain { | |
| max-width: 100% !important; | |
| padding: 0 !important; | |
| } | |
| /* Force full width on all Gradio internal containers */ | |
| .gradio-container > div, | |
| .gradio-container > div > div, | |
| #component-0, | |
| .wrap, | |
| .app, | |
| .contain, | |
| footer, | |
| .gradio-row, | |
| .gradio-column, | |
| .svelte-1gfkn6j, | |
| [class*="svelte-"] { | |
| max-width: 100% !important; | |
| } | |
| .gradio-row { | |
| max-width: 100% !important; | |
| width: 100% !important; | |
| margin: 0 !important; | |
| padding: 0 !important; | |
| } | |
| /* ===== HEADER (FLOATING, NO CARD) ===== */ | |
| .header-main { | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| margin-bottom: 0; | |
| padding: 0; | |
| } | |
| .header-left { | |
| display: flex; | |
| align-items: center; | |
| gap: 16px; | |
| } | |
| .header-icon { | |
| font-size: 48px; | |
| filter: drop-shadow(0 4px 12px rgba(99, 102, 241, 0.6)); | |
| } | |
| .header-title { | |
| font-size: 42px; | |
| font-weight: 900; | |
| color: #e0e7ff; | |
| margin: 0; | |
| letter-spacing: -0.5px; | |
| } | |
| .header-subtitle { | |
| color: #c7d2fe; | |
| font-size: 20px; | |
| font-weight: 700; | |
| margin-left: 6px; | |
| } | |
| /* ===== DOCS BUTTON ===== */ | |
| .docs-button { | |
| display: flex; | |
| align-items: center; | |
| gap: 8px; | |
| padding: 10px 20px; | |
| background: linear-gradient(135deg, rgba(124, 58, 237, 0.3), rgba(99, 102, 241, 0.3)); | |
| border: 1px solid rgba(124, 58, 237, 0.5); | |
| border-radius: 12px; | |
| color: #e0e7ff; | |
| font-size: 14px; | |
| font-weight: 600; | |
| cursor: pointer; | |
| transition: all 0.3s ease; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| } | |
| .docs-button:hover { | |
| background: linear-gradient(135deg, rgba(124, 58, 237, 0.5), rgba(99, 102, 241, 0.5)); | |
| border-color: rgba(124, 58, 237, 0.8); | |
| transform: translateY(-2px); | |
| box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4); | |
| } | |
| .docs-button svg { | |
| width: 18px; | |
| height: 18px; | |
| } | |
| /* ===== DOCS MODAL ===== */ | |
| .docs-modal-overlay { | |
| display: none; | |
| position: fixed !important; | |
| inset: 0 !important; | |
| width: 100vw !important; | |
| height: 100vh !important; | |
| background: rgba(0, 0, 0, 0.85) !important; | |
| backdrop-filter: blur(10px) !important; | |
| z-index: 99999 !important; | |
| justify-content: center !important; | |
| align-items: flex-start !important; | |
| padding-top: 60px !important; | |
| box-sizing: border-box !important; | |
| /* Modal positioned near top of viewport */ | |
| overflow: hidden !important; | |
| } | |
| .docs-modal-overlay.active { | |
| display: flex !important; | |
| } | |
| .docs-modal { | |
| position: relative !important; | |
| background: #0d0d1a !important; | |
| border: 2px solid #7c3aed !important; | |
| border-radius: 20px !important; | |
| width: calc(100vw - 80px) !important; | |
| max-width: 1200px !important; | |
| max-height: 55vh !important; | |
| overflow: hidden !important; | |
| box-shadow: 0 25px 80px rgba(0, 0, 0, 0.9) !important; | |
| /* Remove margin that could affect centering */ | |
| margin: 0 !important; | |
| /* Prevent any transform inheritance issues */ | |
| transform: none !important; | |
| } | |
| .docs-modal-header { | |
| display: flex !important; | |
| justify-content: space-between !important; | |
| align-items: center !important; | |
| padding: 20px 24px !important; | |
| border-bottom: 2px solid #7c3aed !important; | |
| background: #1a1a2e !important; | |
| } | |
| .docs-modal-title { | |
| font-size: 20px; | |
| font-weight: 700; | |
| color: #e0e7ff; | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| } | |
| .docs-modal-close { | |
| background: rgba(124, 58, 237, 0.3); | |
| border: 2px solid rgba(124, 58, 237, 0.5); | |
| border-radius: 12px; | |
| color: #e0e7ff; | |
| font-size: 28px; | |
| font-weight: 300; | |
| cursor: pointer; | |
| padding: 4px 14px; | |
| line-height: 1; | |
| transition: all 0.2s; | |
| } | |
| .docs-modal-close:hover { | |
| background: rgba(124, 58, 237, 0.4); | |
| border-color: rgba(124, 58, 237, 0.6); | |
| } | |
| .docs-modal-content { | |
| padding: 24px !important; | |
| overflow-y: auto !important; | |
| max-height: calc(55vh - 80px) !important; | |
| color: #c7d2fe !important; | |
| font-size: 15px !important; | |
| line-height: 1.7 !important; | |
| background: #0d0d1a !important; | |
| } | |
| .docs-modal-content h1 { font-size: 28px; color: #e0e7ff; margin: 0 0 16px 0; padding-bottom: 12px; border-bottom: 2px solid rgba(124, 58, 237, 0.3); } | |
| .docs-modal-content h2 { font-size: 22px; color: #e0e7ff; margin: 24px 0 12px 0; } | |
| .docs-modal-content h3 { font-size: 18px; color: #a5b4fc; margin: 20px 0 10px 0; } | |
| .docs-modal-content p { margin: 12px 0; } | |
| .docs-modal-content ul, .docs-modal-content ol { margin: 12px 0; padding-left: 24px; } | |
| .docs-modal-content li { margin: 6px 0; } | |
| .docs-modal-content code { background: rgba(124, 58, 237, 0.2); padding: 2px 6px; border-radius: 4px; font-family: 'SF Mono', 'Monaco', 'Consolas', monospace; font-size: 13px; color: #c4b5fd; } | |
| .docs-modal-content pre { background: rgba(0, 0, 0, 0.4); border: 1px solid rgba(124, 58, 237, 0.2); border-radius: 12px; padding: 16px; overflow-x: auto; margin: 16px 0; white-space: pre; } | |
| .docs-modal-content pre code { background: transparent; padding: 0; color: #a5b4fc; white-space: pre; display: block; } | |
| .docs-modal-content table { width: 100%; border-collapse: collapse; margin: 16px 0; } | |
| .docs-modal-content th, .docs-modal-content td { padding: 10px 12px; text-align: left; border: 1px solid rgba(124, 58, 237, 0.2); } | |
| .docs-modal-content th { background: rgba(124, 58, 237, 0.15); color: #e0e7ff; font-weight: 600; } | |
| .docs-modal-content td { color: #c7d2fe; } | |
| .docs-modal-content a { color: #a78bfa; text-decoration: none; } | |
| .docs-modal-content a:hover { text-decoration: underline; } | |
| .docs-modal-content strong { color: #e0e7ff; } | |
| .docs-modal-content img { max-width: 100%; max-height: 400px; height: auto; border-radius: 8px; margin: 12px 0; object-fit: contain; } | |
| /* ===== CARD STYLES ===== */ | |
| .card { | |
| background: #0f0f23 !important; | |
| background-color: #0f0f23 !important; | |
| border: 1px solid #3d2a6b !important; | |
| border-radius: 20px; | |
| padding: 30px; | |
| box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4); | |
| transition: all 0.3s ease; | |
| height: 100%; | |
| display: flex; | |
| flex-direction: column; | |
| } | |
| .card:hover { | |
| border-color: #5b3d99 !important; | |
| box-shadow: 0 12px 48px rgba(124, 58, 237, 0.3); | |
| } | |
| /* Ensure columns in top row have equal height */ | |
| .gradio-row:first-of-type .gradio-column { | |
| display: flex !important; | |
| flex-direction: column !important; | |
| } | |
| .gradio-row:first-of-type .gradio-column > div { | |
| flex: 1 !important; | |
| display: flex !important; | |
| flex-direction: column !important; | |
| } | |
| /* Set minimum height for top row cards */ | |
| .gradio-row:first-of-type .card { | |
| min-height: 550px; | |
| } | |
| .card-title { | |
| font-size: 16px; | |
| font-weight: 700; | |
| color: #a5b4fc; | |
| text-transform: uppercase; | |
| letter-spacing: 1px; | |
| margin-bottom: 20px; | |
| display: flex; | |
| align-items: center; | |
| } | |
| /* ===== ROW SPACING ===== */ | |
| .gradio-row { | |
| gap: 24px !important; | |
| } | |
| /* ===== QUICK START - CODE BLOCK (TERMINAL/IDE STYLE) ===== */ | |
| .terminal-window { | |
| background: #1a1b26; | |
| border: 1px solid rgba(124, 58, 237, 0.3); | |
| border-radius: 12px; | |
| overflow: hidden; | |
| margin-bottom: 16px; | |
| box-shadow: 0 8px 32px rgba(0, 0, 0, 0.6); | |
| } | |
| .terminal-header { | |
| background: #16161e; | |
| padding: 12px 16px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| border-bottom: 1px solid rgba(124, 58, 237, 0.2); | |
| } | |
| .terminal-dots { | |
| display: flex; | |
| gap: 8px; | |
| } | |
| .terminal-dot { | |
| width: 12px; | |
| height: 12px; | |
| border-radius: 50%; | |
| } | |
| .terminal-dot.red { | |
| background: #ff5f56 !important; | |
| box-shadow: 0 0 8px rgba(255, 95, 86, 0.8) !important; | |
| } | |
| .terminal-dot.yellow { | |
| background: #ffbd2e !important; | |
| box-shadow: 0 0 8px rgba(255, 189, 46, 0.8) !important; | |
| } | |
| .terminal-dot.green { | |
| background: #27c93f !important; | |
| box-shadow: 0 0 8px rgba(39, 201, 63, 0.8) !important; | |
| } | |
| .terminal-title { | |
| font-size: 12px; | |
| color: #6b7280; | |
| font-family: 'SF Mono', 'Monaco', 'Consolas', monospace; | |
| font-weight: 500; | |
| } | |
| .terminal-body { | |
| background: #1a1b26; | |
| padding: 0; | |
| display: flex; | |
| } | |
| .line-numbers { | |
| background: #16161e; | |
| padding: 16px 12px; | |
| border-right: 1px solid rgba(124, 58, 237, 0.15); | |
| user-select: none; | |
| text-align: right; | |
| min-width: 48px; | |
| } | |
| .line-num { | |
| display: block; | |
| color: #4a5568; | |
| font-family: 'SF Mono', 'Monaco', 'Consolas', monospace; | |
| font-size: 14px; | |
| line-height: 1.8; | |
| } | |
| .code-content { | |
| flex: 1; | |
| padding: 16px 20px; | |
| overflow-x: auto; | |
| } | |
| .code-line { | |
| display: block; | |
| white-space: pre; | |
| font-family: 'SF Mono', 'Monaco', 'Consolas', monospace; | |
| font-size: 14px; | |
| line-height: 1.8; | |
| color: #a9b1d6; | |
| } | |
| .json-key { | |
| color: #7dcfff; | |
| font-weight: 500; | |
| } | |
| .json-string { | |
| color: #9ece6a; | |
| } | |
| .json-bracket { | |
| color: #bb9af7; | |
| font-weight: 600; | |
| } | |
| .json-colon { | |
| color: #c0caf5; | |
| } | |
| .json-comma { | |
| color: #c0caf5; | |
| } | |
| .copy-button { | |
| width: 100%; | |
| background: linear-gradient(135deg, #7c3aed, #6366f1) !important; | |
| border: none !important; | |
| border-radius: 12px !important; | |
| padding: 14px 24px !important; | |
| font-weight: 700 !important; | |
| font-size: 13px !important; | |
| color: white !important; | |
| text-transform: uppercase; | |
| letter-spacing: 1px; | |
| cursor: pointer; | |
| box-shadow: 0 4px 16px rgba(124, 58, 237, 0.4) !important; | |
| transition: all 0.3s ease !important; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 8px; | |
| } | |
| .copy-button:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 6px 24px rgba(124, 58, 237, 0.6) !important; | |
| } | |
| /* ===== TOOLS TABLE ===== */ | |
| .tools-table, | |
| table.tools-table, | |
| .light .tools-table, | |
| .dark .tools-table, | |
| [data-theme="light"] .tools-table, | |
| [data-theme="dark"] .tools-table { | |
| width: 100%; | |
| border-collapse: separate; | |
| border-spacing: 0; | |
| background: #0d0d1f !important; | |
| background-color: #0d0d1f !important; | |
| border-radius: 12px; | |
| overflow: hidden; | |
| border: 1px solid #3d2a6b !important; | |
| margin-bottom: 0; | |
| flex: 1; | |
| color: #cbd5e1 !important; | |
| } | |
| .tools-table th, | |
| table.tools-table th, | |
| .light .tools-table th, | |
| .dark .tools-table th, | |
| [data-theme="light"] .tools-table th, | |
| [data-theme="dark"] .tools-table th { | |
| background: #1f1545 !important; | |
| background-color: #1f1545 !important; | |
| color: #a5b4fc !important; | |
| font-weight: 700; | |
| font-size: 16px; | |
| text-transform: uppercase; | |
| letter-spacing: 1.5px; | |
| padding: 20px 14px; | |
| text-align: left; | |
| border-bottom: 1px solid #3d2a6b !important; | |
| } | |
| .tools-table td, | |
| table.tools-table td, | |
| .light .tools-table td, | |
| .dark .tools-table td, | |
| [data-theme="light"] .tools-table td, | |
| [data-theme="dark"] .tools-table td { | |
| padding: 20px 14px; | |
| color: #cbd5e1 !important; | |
| background: #0d0d1f !important; | |
| background-color: #0d0d1f !important; | |
| font-size: 16px; | |
| border-bottom: 1px solid #1a1535 !important; | |
| } | |
| .tools-table tr:last-child td { | |
| border-bottom: none; | |
| } | |
| .tools-table tr:hover, | |
| .tools-table tr:hover td { | |
| background: #1a1540 !important; | |
| background-color: #1a1540 !important; | |
| } | |
| .tool-name, | |
| .light .tool-name, | |
| .dark .tool-name, | |
| [data-theme="light"] .tool-name, | |
| [data-theme="dark"] .tool-name { | |
| color: #22d3ee !important; | |
| font-family: 'SF Mono', 'Monaco', 'Consolas', monospace; | |
| font-weight: 600; | |
| font-size: 13px; | |
| vertical-align: middle; | |
| } | |
| /* ===== COMPOSITE SECTION ===== */ | |
| .composite-section, | |
| .light .composite-section, | |
| .dark .composite-section, | |
| [data-theme="light"] .composite-section, | |
| [data-theme="dark"] .composite-section { | |
| background: #0d0d1f !important; | |
| background-color: #0d0d1f !important; | |
| border: 1px solid #3d2a6b !important; | |
| border-radius: 12px; | |
| padding: 20px; | |
| color: #cbd5e1 !important; | |
| } | |
| .composite-header, | |
| .light .composite-header, | |
| .dark .composite-header, | |
| [data-theme="light"] .composite-header, | |
| [data-theme="dark"] .composite-header { | |
| font-size: 11px; | |
| font-weight: 700; | |
| color: #a5b4fc !important; | |
| text-transform: uppercase; | |
| letter-spacing: 1.5px; | |
| margin-bottom: 12px; | |
| } | |
| .composite-content, | |
| .light .composite-content, | |
| .dark .composite-content, | |
| [data-theme="light"] .composite-content, | |
| [data-theme="dark"] .composite-content { | |
| color: #cbd5e1 !important; | |
| font-size: 12px; | |
| line-height: 1.6; | |
| margin-bottom: 16px; | |
| } | |
| .try-demo-button { | |
| width: 100%; | |
| background: transparent !important; | |
| border: 2px solid #7c3aed !important; | |
| border-radius: 12px !important; | |
| padding: 12px 24px !important; | |
| font-weight: 700 !important; | |
| font-size: 12px !important; | |
| color: #7c3aed !important; | |
| text-transform: uppercase; | |
| letter-spacing: 1px; | |
| cursor: pointer; | |
| transition: all 0.3s ease !important; | |
| } | |
| .try-demo-button:hover { | |
| background: rgba(124, 58, 237, 0.1) !important; | |
| border-color: #7c3aed !important; | |
| color: #8b5cf6 !important; | |
| } | |
| /* ===== BUTTONS ===== */ | |
| button[variant="primary"] { | |
| background: linear-gradient(135deg, #7c3aed, #6366f1) !important; | |
| border: none !important; | |
| border-radius: 12px !important; | |
| padding: 14px 32px !important; | |
| font-weight: 700 !important; | |
| font-size: 14px !important; | |
| color: white !important; | |
| box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| button[variant="primary"]:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 32px rgba(124, 58, 237, 0.6) !important; | |
| } | |
| /* ===== AUDIO COMPONENT ===== */ | |
| .gradio-audio { | |
| background: rgba(30, 27, 75, 0.6) !important; | |
| border: 1px solid rgba(124, 58, 237, 0.3) !important; | |
| border-radius: 12px !important; | |
| } | |
| /* ===== TEXTBOX ===== */ | |
| textarea { | |
| background: rgba(30, 27, 75, 0.6) !important; | |
| border: 1px solid rgba(124, 58, 237, 0.3) !important; | |
| border-radius: 12px !important; | |
| color: #e0e7ff !important; | |
| font-size: 13px !important; | |
| } | |
| /* ===== DROPDOWN ===== */ | |
| select { | |
| background: rgba(30, 27, 75, 0.6) !important; | |
| border: 1px solid rgba(124, 58, 237, 0.3) !important; | |
| border-radius: 12px !important; | |
| color: #e0e7ff !important; | |
| } | |
| /* ===== LABELS ===== */ | |
| label { | |
| color: #a5b4fc !important; | |
| font-weight: 600 !important; | |
| font-size: 12px !important; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| } | |
| /* ===== HTML OUTPUT ===== */ | |
| .gradio-html { | |
| background: transparent !important; | |
| border: none !important; | |
| } | |
| /* ===== DEMO ROW LAYOUT ===== */ | |
| .demo-row { | |
| display: flex !important; | |
| gap: 24px !important; | |
| align-items: stretch !important; | |
| } | |
| /* Only apply card style to the outer column (demo-card-column) */ | |
| .demo-card-column { | |
| display: flex !important; | |
| flex-direction: column !important; | |
| height: 700px !important; | |
| min-height: 700px !important; | |
| max-height: 700px !important; | |
| background: rgba(15, 15, 35, 0.8) !important; | |
| backdrop-filter: blur(20px) !important; | |
| border: 1px solid rgba(124, 58, 237, 0.3) !important; | |
| border-radius: 20px !important; | |
| padding: 4px 4px 2px 4px !important; | |
| box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4) !important; | |
| transition: all 0.3s ease !important; | |
| gap: 2px !important; | |
| overflow-y: auto !important; | |
| } | |
| .demo-card-column:hover { | |
| border-color: rgba(124, 58, 237, 0.5) !important; | |
| box-shadow: 0 12px 48px rgba(124, 58, 237, 0.3) !important; | |
| } | |
| /* Remove any border/background from inner elements */ | |
| .demo-card-column > div, | |
| .demo-card-column > div > div, | |
| .demo-row > div > div { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| padding: 0 !important; | |
| border-radius: 0 !important; | |
| } | |
| /* Remove card background from inner HTML - we use column background instead */ | |
| .demo-row .card { | |
| background: transparent !important; | |
| backdrop-filter: none !important; | |
| border: none !important; | |
| border-radius: 0 !important; | |
| padding: 0 !important; | |
| box-shadow: none !important; | |
| margin-bottom: 12px !important; | |
| } | |
| .demo-row .card:hover { | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| /* Ensure all inner components have transparent background */ | |
| .demo-row .gradio-audio, | |
| .demo-row .gradio-dropdown, | |
| .demo-row .gradio-textbox, | |
| .demo-row .gradio-button { | |
| background: transparent !important; | |
| } | |
| /* Create a wrapper for input elements (flex container) */ | |
| .demo-card-column > div:not(:last-child) { | |
| flex: 0 0 auto !important; | |
| } | |
| /* Adjust spacing for input elements in demo cards */ | |
| .demo-row .gradio-audio { | |
| margin-top: 6px !important; | |
| margin-bottom: 0px !important; | |
| max-height: 50px !important; | |
| min-height: 40px !important; | |
| height: 45px !important; | |
| } | |
| /* Target all child elements of audio component */ | |
| .demo-row .gradio-audio > div, | |
| .demo-row .gradio-audio .wrap, | |
| .demo-row .gradio-audio .upload-container, | |
| .demo-row .gradio-audio .record-container, | |
| .demo-row .gradio-audio * { | |
| max-height: 50px !important; | |
| } | |
| /* Audio player specific height reduction */ | |
| .demo-row .gradio-audio audio { | |
| height: 26px !important; | |
| max-height: 26px !important; | |
| min-height: 26px !important; | |
| } | |
| /* Upload/record button container height */ | |
| .demo-row .gradio-audio .upload-container, | |
| .demo-row .gradio-audio .record-container { | |
| min-height: 38px !important; | |
| max-height: 38px !important; | |
| padding: 4px !important; | |
| } | |
| /* Audio component buttons */ | |
| .demo-row .gradio-audio button { | |
| height: 28px !important; | |
| min-height: 28px !important; | |
| max-height: 28px !important; | |
| padding: 4px 10px !important; | |
| font-size: 10px !important; | |
| } | |
| /* Hide text nodes in audio upload area - keep icons */ | |
| .demo-row .gradio-audio .upload-text { | |
| display: none !important; | |
| } | |
| .demo-row .gradio-audio .placeholder { | |
| display: none !important; | |
| } | |
| .demo-row .gradio-audio span:not(:has(svg)) { | |
| font-size: 0 !important; | |
| } | |
| .demo-row .gradio-audio p { | |
| display: none !important; | |
| } | |
| /* Hide "Drop Audio Here", "- or -", "Click to Upload" text */ | |
| .demo-row .gradio-audio .upload-container span, | |
| .demo-row .gradio-audio .upload-container p { | |
| font-size: 0 !important; | |
| line-height: 0 !important; | |
| } | |
| /* Keep SVG icons visible */ | |
| .demo-row .gradio-audio svg { | |
| font-size: initial !important; | |
| } | |
| /* ADDITIONAL METHODS: Hide all text in audio upload area */ | |
| .demo-row .gradio-audio label { | |
| font-size: 0 !important; | |
| } | |
| .demo-row .gradio-audio label span:not(:has(svg)) { | |
| display: none !important; | |
| } | |
| .demo-row .gradio-audio .file-preview { | |
| font-size: 0 !important; | |
| } | |
| .demo-row .gradio-audio .file-preview span { | |
| font-size: 0 !important; | |
| display: none !important; | |
| } | |
| .demo-row .gradio-audio [data-testid="upload-text"], | |
| .demo-row .gradio-audio [data-testid="file-preview-text"], | |
| .demo-row .gradio-audio .upload-text, | |
| .demo-row .gradio-audio .file-preview-text { | |
| display: none !important; | |
| visibility: hidden !important; | |
| font-size: 0 !important; | |
| } | |
| /* Target all text nodes (more aggressive) */ | |
| .demo-row .gradio-audio *:not(svg):not(path):not(circle):not(rect):not(line) { | |
| color: transparent !important; | |
| } | |
| .demo-row .gradio-audio button { | |
| color: white !important; | |
| } | |
| /* Ensure icons remain visible */ | |
| .demo-row .gradio-audio svg, | |
| .demo-row .gradio-audio svg * { | |
| color: initial !important; | |
| fill: currentColor !important; | |
| stroke: currentColor !important; | |
| } | |
| /* NUCLEAR OPTION: Hide everything in label, then show only necessary elements */ | |
| .demo-row .gradio-audio label > div > div { | |
| display: none !important; | |
| } | |
| .demo-row .gradio-audio label::before { | |
| content: '' !important; | |
| } | |
| .demo-row .gradio-audio label * { | |
| visibility: hidden !important; | |
| } | |
| .demo-row .gradio-audio label svg { | |
| visibility: visible !important; | |
| } | |
| .demo-row .gradio-audio label button { | |
| visibility: visible !important; | |
| } | |
| .demo-row .gradio-audio label audio { | |
| visibility: visible !important; | |
| } | |
| /* Force hide any text content */ | |
| .demo-row .gradio-audio label > div::after, | |
| .demo-row .gradio-audio label > div::before { | |
| content: '' !important; | |
| display: none !important; | |
| } | |
| /* Additional override for upload text elements */ | |
| .demo-row .gradio-audio [class*="upload"], | |
| .demo-row .gradio-audio [class*="placeholder"], | |
| .demo-row .gradio-audio [class*="text"] { | |
| font-size: 0 !important; | |
| line-height: 0 !important; | |
| width: 0 !important; | |
| height: 0 !important; | |
| opacity: 0 !important; | |
| visibility: hidden !important; | |
| position: absolute !important; | |
| left: -9999px !important; | |
| } | |
| /* NUCLEAR OPTION 2: Complete removal of label content */ | |
| .demo-row .gradio-audio label.block { | |
| display: none !important; | |
| } | |
| .demo-row .gradio-audio .file-upload { | |
| display: none !important; | |
| } | |
| /* Hide all direct text children */ | |
| .demo-row .gradio-audio label > span:not(:has(button)):not(:has(audio)):not(:has(svg)) { | |
| display: none !important; | |
| } | |
| /* Gradio 6.0 specific selectors - upload area */ | |
| .demo-row .gradio-audio [data-testid="upload-button"], | |
| .demo-row .gradio-audio [data-testid="file-upload"], | |
| .demo-row .gradio-audio .upload-area { | |
| display: none !important; | |
| } | |
| /* Hide all paragraph elements in audio component */ | |
| .demo-row .gradio-audio label p, | |
| .demo-row .gradio-audio label span.text, | |
| .demo-row .gradio-audio label div.text { | |
| display: none !important; | |
| } | |
| /* More aggressive text hiding - target by content */ | |
| .demo-row .gradio-audio *::before, | |
| .demo-row .gradio-audio *::after { | |
| content: '' !important; | |
| display: none !important; | |
| } | |
| /* Make sure only buttons and audio players are visible */ | |
| .demo-row .gradio-audio > label > div > div:not(:has(button)):not(:has(audio)) { | |
| display: none !important; | |
| } | |
| /* Gradio Blocks specific - Hide wrapper divs that contain text */ | |
| .demo-row .gradio-audio .wrap > div:not(:has(button)):not(:has(audio)):not(:has(svg)) { | |
| display: none !important; | |
| } | |
| /* Override for Gradio 6.x structure */ | |
| .demo-row .gradio-audio [class*="svelte-"] span:not(:has(svg)):not(:has(button)) { | |
| display: none !important; | |
| } | |
| .demo-row .gradio-dropdown, | |
| .demo-row .gradio-textbox { | |
| margin-bottom: 2px !important; | |
| } | |
| .demo-row .gradio-row { | |
| margin-bottom: 2px !important; | |
| } | |
| /* IMPORTANT: Button alignment - push buttons to bottom with margin-top: auto */ | |
| .demo-row .gradio-button { | |
| margin-top: auto !important; | |
| margin-bottom: 0px !important; | |
| flex-shrink: 0 !important; | |
| } | |
| /* Output area should not push button down - set flex: 1 */ | |
| .demo-row .gradio-html { | |
| flex: 1 !important; | |
| margin-bottom: 0 !important; | |
| display: flex !important; | |
| flex-direction: column !important; | |
| max-height: 300px !important; | |
| overflow-y: auto !important; | |
| } | |
| /* Output audio component (clean_audio_output) height limit */ | |
| .demo-row .gradio-audio[data-testid="audio-output"], | |
| .demo-row > div:last-child .gradio-audio { | |
| max-height: 120px !important; | |
| min-height: 60px !important; | |
| height: auto !important; | |
| margin-bottom: 0px !important; | |
| } | |
| /* ===== CUSTOM ACTION BUTTONS (DEMO CARDS) ===== */ | |
| .custom-action-btn, | |
| .custom-action-btn button, | |
| .custom-action-btn button[data-testid="button"], | |
| button.custom-action-btn, | |
| .demo-row .custom-action-btn, | |
| .demo-row .custom-action-btn button { | |
| width: 100% !important; | |
| min-width: 100% !important; | |
| max-width: 100% !important; | |
| background: linear-gradient(135deg, #6366f1, #7c3aed) !important; | |
| border: none !important; | |
| border-radius: 12px !important; | |
| padding: 8px 16px !important; | |
| height: 38px !important; | |
| min-height: 38px !important; | |
| max-height: 38px !important; | |
| font-weight: 700 !important; | |
| font-size: 16px !important; | |
| letter-spacing: 1.5px !important; | |
| text-transform: uppercase !important; | |
| color: white !important; | |
| box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4) !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .custom-action-btn:hover, | |
| .custom-action-btn button:hover, | |
| .custom-action-btn button[data-testid="button"]:hover, | |
| button.custom-action-btn:hover, | |
| .demo-row .custom-action-btn:hover, | |
| .demo-row .custom-action-btn button:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 8px 32px rgba(124, 58, 237, 0.6) !important; | |
| background: linear-gradient(135deg, #6366f1, #7c3aed) !important; | |
| } | |
| /* ===== DECORATIVE ELEMENTS ===== */ | |
| .diamond-decoration { | |
| position: fixed; | |
| bottom: 40px; | |
| right: 40px; | |
| width: 80px; | |
| height: 80px; | |
| border: 2px solid rgba(124, 58, 237, 0.2); | |
| transform: rotate(45deg); | |
| pointer-events: none; | |
| z-index: 1; | |
| } | |
| .star-decoration { | |
| display: none; | |
| } | |
| """ | |
| with gr.Blocks() as demo: | |
| # Inject custom CSS and decorative elements (positioned fixed, no DOM space) | |
| gr.HTML(f""" | |
| <style>{custom_css}</style> | |
| <div class="diamond-decoration"></div> | |
| <div class="star-decoration"> | |
| <svg width="24" height="24" viewBox="0 0 24 24" fill="none"> | |
| <path d="M12 2l3.09 6.26L22 9.27l-5 4.87 1.18 6.88L12 17.77l-6.18 3.25L7 14.14 2 9.27l6.91-1.01L12 2z" fill="#a5b4fc" opacity="0.4"/> | |
| </svg> | |
| </div> | |
| <script> | |
| // Force dark mode styling (no redirect, just add classes) | |
| (function() {{ | |
| // Add dark mode classes immediately | |
| document.documentElement.classList.add('dark'); | |
| document.documentElement.setAttribute('data-theme', 'dark'); | |
| document.body.classList.add('dark'); | |
| document.body.setAttribute('data-theme', 'dark'); | |
| // Also observe for Gradio container and add dark class there | |
| const observer = new MutationObserver(function(mutations) {{ | |
| const container = document.querySelector('.gradio-container'); | |
| if (container && !container.classList.contains('dark')) {{ | |
| container.classList.add('dark'); | |
| container.setAttribute('data-theme', 'dark'); | |
| }} | |
| }}); | |
| observer.observe(document.body, {{ childList: true, subtree: true }}); | |
| }})(); | |
| // JavaScript to completely remove upload text from Audio components in demo-row | |
| function removeAudioUploadText() {{ | |
| // Find all audio components in demo-row | |
| const demoRows = document.querySelectorAll('.demo-row'); | |
| demoRows.forEach(row => {{ | |
| const audioComponents = row.querySelectorAll('.gradio-audio'); | |
| audioComponents.forEach(audio => {{ | |
| // METHOD 1: Remove ALL text nodes (most aggressive) | |
| const walker = document.createTreeWalker( | |
| audio, | |
| NodeFilter.SHOW_TEXT, | |
| null, | |
| false | |
| ); | |
| const textNodesToRemove = []; | |
| while(walker.nextNode()) {{ | |
| const node = walker.currentNode; | |
| // Only keep text that's inside button or audio elements | |
| const parentTag = node.parentElement?.tagName?.toLowerCase(); | |
| if (parentTag !== 'button' && parentTag !== 'audio') {{ | |
| textNodesToRemove.push(node); | |
| }} | |
| }} | |
| textNodesToRemove.forEach(node => {{ | |
| if (node.parentNode) {{ | |
| node.parentNode.removeChild(node); | |
| }} | |
| }}); | |
| // METHOD 2: Hide elements by class/data attributes | |
| const elementsToHide = audio.querySelectorAll( | |
| '[class*="upload"], [class*="placeholder"], [class*="text"], ' + | |
| '[data-testid*="upload"], [data-testid*="file"], ' + | |
| 'label.block, .file-upload, p, span:not(:has(button)):not(:has(svg))' | |
| ); | |
| elementsToHide.forEach(el => {{ | |
| el.style.display = 'none'; | |
| el.style.visibility = 'hidden'; | |
| el.style.fontSize = '0'; | |
| el.style.lineHeight = '0'; | |
| el.style.width = '0'; | |
| el.style.height = '0'; | |
| el.style.opacity = '0'; | |
| el.style.position = 'absolute'; | |
| el.style.left = '-9999px'; | |
| }}); | |
| // METHOD 3: Remove label.block entirely if it exists | |
| const labelBlocks = audio.querySelectorAll('label.block'); | |
| labelBlocks.forEach(label => {{ | |
| // Only remove if it doesn't contain button or audio | |
| if (!label.querySelector('button') && !label.querySelector('audio')) {{ | |
| label.remove(); | |
| }} | |
| }}); | |
| // METHOD 4: Clear innerHTML of divs that don't contain buttons/audio | |
| const allDivs = audio.querySelectorAll('div'); | |
| allDivs.forEach(div => {{ | |
| if (!div.querySelector('button') && !div.querySelector('audio') && !div.querySelector('svg')) {{ | |
| // Check if div only contains text | |
| const hasOnlyText = Array.from(div.childNodes).every(node => | |
| node.nodeType === Node.TEXT_NODE || | |
| (node.nodeType === Node.ELEMENT_NODE && !node.querySelector('button, audio, svg')) | |
| ); | |
| if (hasOnlyText) {{ | |
| div.innerHTML = ''; | |
| }} | |
| }} | |
| }}); | |
| }}); | |
| }}); | |
| }} | |
| // Run immediately | |
| removeAudioUploadText(); | |
| // Run after DOM changes (MutationObserver) | |
| const observer = new MutationObserver(() => {{ | |
| removeAudioUploadText(); | |
| }}); | |
| // Start observing after a short delay to ensure Gradio has loaded | |
| setTimeout(() => {{ | |
| observer.observe(document.body, {{ | |
| childList: true, | |
| subtree: true | |
| }}); | |
| }}, 500); | |
| // Also run on window load | |
| window.addEventListener('load', removeAudioUploadText); | |
| // Run periodically for the first 5 seconds (catch late renders) | |
| let attempts = 0; | |
| const interval = setInterval(() => {{ | |
| removeAudioUploadText(); | |
| attempts++; | |
| if (attempts > 10) {{ | |
| clearInterval(interval); | |
| }} | |
| }}, 500); | |
| </script> | |
| """) | |
| # ==================== HEADER (FLOATING) ==================== | |
| gr.HTML(f""" | |
| <div class="header-main"> | |
| <div class="header-left"> | |
| <span class="header-icon"> | |
| <svg width="72" height="72" viewBox="0 0 52 52" fill="none"> | |
| <defs> | |
| <linearGradient id="logoGradHeader" x1="0%" y1="0%" x2="100%" y2="100%"> | |
| <stop offset="0%" style="stop-color:#7c3aed"/> | |
| <stop offset="100%" style="stop-color:#6366f1"/> | |
| </linearGradient> | |
| </defs> | |
| <!-- Left: Microphone (rounded capsule + stand) --> | |
| <!-- Microphone capsule (rounded rect) --> | |
| <rect x="8" y="12" width="9" height="14" rx="4.5" fill="url(#logoGradHeader)"/> | |
| <!-- Microphone grill lines (horizontal detail) --> | |
| <line x1="9" y1="16" x2="14" y2="16" stroke="#000000" stroke-width="0.8" stroke-linecap="round"/> | |
| <line x1="9" y1="19.5" x2="14" y2="19.5" stroke="#000000" stroke-width="0.8" stroke-linecap="round"/> | |
| <line x1="9" y1="23" x2="14" y2="23" stroke="#000000" stroke-width="0.8" stroke-linecap="round"/> | |
| <!-- Arc stand --> | |
| <path d="M6.5 26c0 2.5 2.2 5 6 5s6-2.5 6-5" stroke="url(#logoGradHeader)" stroke-width="2" fill="none" stroke-linecap="round"/> | |
| <!-- Pole --> | |
| <rect x="11.5" y="31" width="2" height="5" fill="url(#logoGradHeader)"/> | |
| <!-- Base --> | |
| <rect x="7.5" y="36" width="9" height="2" rx="1" fill="url(#logoGradHeader)"/> | |
| <!-- Right: Audio Wave Bars (4 vertical bars with different heights) --> | |
| <rect x="28" y="18" width="3" height="16" rx="1.5" fill="url(#logoGradHeader)" opacity="0.9"/> | |
| <rect x="34" y="14" width="3" height="24" rx="1.5" fill="url(#logoGradHeader)" opacity="0.95"/> | |
| <rect x="40" y="20" width="3" height="12" rx="1.5" fill="url(#logoGradHeader)" opacity="0.85"/> | |
| <rect x="46" y="22" width="3" height="8" rx="1.5" fill="url(#logoGradHeader)" opacity="0.8"/> | |
| </svg> | |
| </span> | |
| <div> | |
| <span class="header-title">VoiceKit</span> | |
| <span class="header-subtitle">MCP Server</span> | |
| </div> | |
| </div> | |
| <button class="docs-button" onclick="document.getElementById('docsModal').classList.add('active')"> | |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"> | |
| <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/> | |
| <polyline points="14 2 14 8 20 8"/> | |
| <line x1="16" y1="13" x2="8" y2="13"/> | |
| <line x1="16" y1="17" x2="8" y2="17"/> | |
| <polyline points="10 9 9 9 8 9"/> | |
| </svg> | |
| DOCS | |
| </button> | |
| </div> | |
| <!-- DOCS Modal --> | |
| <div id="docsModal" class="docs-modal-overlay" onclick="if(event.target === this) this.classList.remove('active')"> | |
| <div class="docs-modal"> | |
| <div class="docs-modal-header"> | |
| <div class="docs-modal-title"> | |
| <svg width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="#a78bfa" stroke-width="2"> | |
| <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/> | |
| <polyline points="14 2 14 8 20 8"/> | |
| </svg> | |
| Documentation | |
| </div> | |
| <button class="docs-modal-close" onclick="document.getElementById('docsModal').classList.remove('active')">×</button> | |
| </div> | |
| <div class="docs-modal-content"> | |
| {readme_html} | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| # ==================== TOP ROW: QUICK START + AVAILABLE TOOLS ==================== | |
| with gr.Row(equal_height=True): | |
| # QUICK START CARD | |
| with gr.Column(scale=1): | |
| gr.HTML(""" | |
| <div class="card" style="min-height: 550px;"> | |
| <div class="card-title"> | |
| <svg width="18" height="18" viewBox="0 0 24 24" fill="#7c3aed" style="margin-right: 8px;"> | |
| <path d="M19.14 12.94c.04-.31.06-.63.06-.94 0-.31-.02-.63-.06-.94l2.03-1.58c.18-.14.23-.41.12-.61l-1.92-3.32c-.12-.22-.37-.29-.59-.22l-2.39.96c-.5-.38-1.03-.7-1.62-.94l-.36-2.54c-.04-.24-.24-.41-.48-.41h-3.84c-.24 0-.43.17-.47.41l-.36 2.54c-.59.24-1.13.57-1.62.94l-2.39-.96c-.22-.08-.47 0-.59.22L2.74 8.87c-.12.21-.08.47.12.61l2.03 1.58c-.04.31-.06.63-.06.94s.02.63.06.94l-2.03 1.58c-.18.14-.23.41-.12.61l1.92 3.32c.12.22.37.29.59.22l2.39-.96c.5.38 1.03.7 1.62.94l.36 2.54c.05.24.24.41.48.41h3.84c.24 0 .44-.17.47-.41l.36-2.54c.59-.24 1.13-.56 1.62-.94l2.39.96c.22.08.47 0 .59-.22l1.92-3.32c.12-.22.07-.47-.12-.61l-2.01-1.58zM12 15.6c-1.98 0-3.6-1.62-3.6-3.6s1.62-3.6 3.6-3.6 3.6 1.62 3.6 3.6-1.62 3.6-3.6 3.6z"/> | |
| </svg> | |
| QUICK START | |
| </div> | |
| <div class="terminal-window"> | |
| <!-- Terminal Header with Dots and Filename --> | |
| <div class="terminal-header"> | |
| <div class="terminal-dots"> | |
| <div class="terminal-dot red"></div> | |
| <div class="terminal-dot yellow"></div> | |
| <div class="terminal-dot green"></div> | |
| </div> | |
| <div class="terminal-title">claude_desktop_config.json</div> | |
| <div style="width: 60px;"></div> <!-- Spacer for center alignment --> | |
| </div> | |
| <!-- Terminal Body with Line Numbers and Code --> | |
| <div class="terminal-body"> | |
| <div class="line-numbers"> | |
| <div class="line-num">1</div> | |
| <div class="line-num">2</div> | |
| <div class="line-num">3</div> | |
| <div class="line-num">4</div> | |
| <div class="line-num">5</div> | |
| <div class="line-num">6</div> | |
| <div class="line-num">7</div> | |
| <div class="line-num">8</div> | |
| <div class="line-num">9</div> | |
| <div class="line-num">10</div> | |
| <div class="line-num">11</div> | |
| <div class="line-num">12</div> | |
| </div> | |
| <div class="code-content"> | |
| <div class="code-line"><span class="json-bracket">{</span></div> | |
| <div class="code-line"> <span class="json-key">"mcpServers"</span><span class="json-colon">:</span> <span class="json-bracket">{</span></div> | |
| <div class="code-line"> <span class="json-key">"voicekit"</span><span class="json-colon">:</span> <span class="json-bracket">{</span></div> | |
| <div class="code-line"> <span class="json-key">"command"</span><span class="json-colon">:</span> <span class="json-string">"npx"</span><span class="json-comma">,</span></div> | |
| <div class="code-line"> <span class="json-key">"args"</span><span class="json-colon">:</span> <span class="json-bracket">[</span></div> | |
| <div class="code-line"> <span class="json-string">"-y"</span><span class="json-comma">,</span></div> | |
| <div class="code-line"> <span class="json-string">"mcp-remote"</span><span class="json-comma">,</span></div> | |
| <div class="code-line"> <span class="json-string">"https://mcp-1st-birthday-voicekit.hf.space/gradio_api/mcp/sse"</span></div> | |
| <div class="code-line"> <span class="json-bracket">]</span></div> | |
| <div class="code-line"> <span class="json-bracket">}</span></div> | |
| <div class="code-line"> <span class="json-bracket">}</span></div> | |
| <div class="code-line"><span class="json-bracket">}</span></div> | |
| </div> | |
| </div> | |
| </div> | |
| <button class="copy-button" onclick="navigator.clipboard.writeText(JSON.stringify({mcpServers:{voicekit:{command:'npx',args:['-y','mcp-remote','https://mcp-1st-birthday-voicekit.hf.space/gradio_api/mcp/sse']}}},null,2))"> | |
| <svg width="16" height="16" viewBox="0 0 24 24" fill="white" style="display: inline-block; vertical-align: middle;"> | |
| <rect x="9" y="9" width="13" height="13" rx="2" ry="2" fill="white"/> | |
| <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" fill="none" stroke="white" stroke-width="2"/> | |
| </svg> | |
| COPY CONFIG | |
| </button> | |
| </div> | |
| """) | |
| # AVAILABLE TOOLS CARD | |
| with gr.Column(scale=1): | |
| gr.HTML(""" | |
| <div class="card" style="min-height: 550px;"> | |
| <div class="card-title"> | |
| <svg width="18" height="18" viewBox="0 0 24 24" fill="#7c3aed" style="margin-right: 8px;"> | |
| <path d="M22.7 19l-9.1-9.1c.9-2.3.4-5-1.5-6.9-2-2-5-2.4-7.4-1.3L9 6 6 9 1.6 4.7C.4 7.1.9 10.1 2.9 12.1c1.9 1.9 4.6 2.4 6.9 1.5l9.1 9.1c.4.4 1 .4 1.4 0l2.3-2.3c.5-.4.5-1.1.1-1.4z"/> | |
| </svg> | |
| AVAILABLE TOOLS | |
| </div> | |
| <table class="tools-table"> | |
| <thead> | |
| <tr> | |
| <th>TOOL</th> | |
| <th>PURPOSE</th> | |
| <th>INPUT</th> | |
| <th>OUTPUT</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <tr> | |
| <td> | |
| <div style="display: flex; align-items: center; gap: 12px;"> | |
| <svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M21 16V8L12 4L3 8V16L12 20L21 16Z" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <path d="M12 4V12M12 12V20M12 12L21 8M12 12L3 8" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <circle cx="12" cy="12" r="2" fill="#A855F7"/> | |
| <circle cx="16.5" cy="10" r="1.5" fill="#A855F7"/> | |
| <circle cx="7.5" cy="14" r="1.5" fill="#A855F7"/> | |
| <path d="M12 12L16.5 10M12 12L7.5 14" stroke="#A855F7" stroke-width="1.5" stroke-linecap="round"/> | |
| </svg> | |
| <span class="tool-name">extract_embedding</span> | |
| </div> | |
| </td> | |
| <td>Extract 768-dim voice fingerprint</td> | |
| <td>audio_base64</td> | |
| <td>embedding, model, dim</td> | |
| </tr> | |
| <tr> | |
| <td> | |
| <div style="display: flex; align-items: center; gap: 12px;"> | |
| <svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M2 10V14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M5 8V16" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M8 11V13" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M22 10V14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M19 7V17" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M16 11V13" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M10 12H14" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M10 12L11.5 10.5" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M10 12L11.5 13.5" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M14 12L12.5 10.5" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M14 12L12.5 13.5" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| </svg> | |
| <span class="tool-name">match_voice</span> | |
| </div> | |
| </td> | |
| <td>Compare two voice similarities</td> | |
| <td>audio1_base64, audio2_base64</td> | |
| <td>similarity, tone_score</td> | |
| </tr> | |
| <tr> | |
| <td> | |
| <div style="display: flex; align-items: center; gap: 12px;"> | |
| <svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M22 10C22 10 20 4 17 4C14 4 12 16 9 16C6 16 4 10 2 10" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <g transform="translate(13, 11)"> | |
| <circle cx="5" cy="5" r="4" stroke="#A855F7" stroke-width="1.5"/> | |
| <path d="M8 8L11 11" stroke="#A855F7" stroke-width="1.5" stroke-linecap="round"/> | |
| </g> | |
| </svg> | |
| <span class="tool-name">analyze_acoustics</span> | |
| </div> | |
| </td> | |
| <td>Analyze pitch, energy, rhythm, tempo</td> | |
| <td>audio_base64</td> | |
| <td>pitch, energy, rhythm, tempo</td> | |
| </tr> | |
| <tr> | |
| <td> | |
| <div style="display: flex; align-items: center; gap: 12px;"> | |
| <svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M2 12C2 12 4 5 7 5C10 5 11 19 14 19C15.5 19 16.5 15 16.5 15" stroke="#A855F7" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <path d="M19 7H22" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M19 12H22" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M19 17H22" stroke="#A855F7" stroke-width="2" stroke-linecap="round"/> | |
| </svg> | |
| <span class="tool-name">transcribe_audio</span> | |
| </div> | |
| </td> | |
| <td>Convert speech to text</td> | |
| <td>audio_base64, language</td> | |
| <td>text, language, model</td> | |
| </tr> | |
| <tr> | |
| <td> | |
| <div style="display: flex; align-items: center; gap: 12px;"> | |
| <svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M12 5V19" stroke="#A855F7" stroke-width="2.5" stroke-linecap="round"/> | |
| <path d="M9 8V16" stroke="#A855F7" stroke-width="2.5" stroke-linecap="round"/> | |
| <path d="M15 8V16" stroke="#A855F7" stroke-width="2.5" stroke-linecap="round"/> | |
| <path d="M5 4H3V20H5" stroke="#A855F7" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/> | |
| <path d="M19 4H21V20H19" stroke="#A855F7" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/> | |
| </svg> | |
| <span class="tool-name">isolate_voice</span> | |
| </div> | |
| </td> | |
| <td>Remove background music/noise</td> | |
| <td>audio_base64</td> | |
| <td>isolated_audio_base64, metadata</td> | |
| </tr> | |
| <tr> | |
| <td> | |
| <div style="display: flex; align-items: center; gap: 12px;"> | |
| <svg width="24" height="24" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <circle cx="12" cy="12" r="9" stroke="#A855F7" stroke-width="1" opacity="0.3"/> | |
| <path d="M12 3V21" stroke="#A855F7" stroke-width="1" opacity="0.3"/> | |
| <path d="M4.2 7.5L19.8 16.5" stroke="#A855F7" stroke-width="1" opacity="0.3"/> | |
| <path d="M19.8 7.5L4.2 16.5" stroke="#A855F7" stroke-width="1" opacity="0.3"/> | |
| <path d="M12 5L18 9L16.5 18H7.5L6 9L12 5Z" fill="#A855F7" fill-opacity="0.4" stroke="#A855F7" stroke-width="2" stroke-linejoin="round"/> | |
| <circle cx="12" cy="5" r="1.5" fill="#A855F7"/> | |
| <circle cx="18" cy="9" r="1.5" fill="#A855F7"/> | |
| <circle cx="16.5" cy="18" r="1.5" fill="#A855F7"/> | |
| <circle cx="7.5" cy="18" r="1.5" fill="#A855F7"/> | |
| <circle cx="6" cy="9" r="1.5" fill="#A855F7"/> | |
| </svg> | |
| <span class="tool-name">grade_voice</span> | |
| </div> | |
| </td> | |
| <td>5-metric comprehensive analysis</td> | |
| <td>user_audio, reference_audio, text, category</td> | |
| <td>overall, metrics, feedback</td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| </div> | |
| """) | |
| # ==================== FIRST ROW: 3 DEMO CARDS ==================== | |
| with gr.Row(equal_height=True, elem_classes="demo-row"): | |
| # EXTRACT EMBEDDING | |
| with gr.Column(scale=1, elem_classes="demo-card-column"): | |
| gr.HTML(""" | |
| <div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;"> | |
| <svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M21 16V8L12 4L3 8V16L12 20L21 16Z" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <path d="M12 4V12M12 12V20M12 12L21 8M12 12L3 8" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <circle cx="12" cy="12" r="2" fill="#7c3aed"/> | |
| <circle cx="16.5" cy="10" r="1.5" fill="#7c3aed"/> | |
| <circle cx="7.5" cy="14" r="1.5" fill="#7c3aed"/> | |
| <path d="M12 12L16.5 10M12 12L7.5 14" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round"/> | |
| </svg> | |
| <div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;"> | |
| EXTRACT EMBEDDING | |
| </div> | |
| </div> | |
| """) | |
| embedding_audio = gr.Audio( | |
| type="filepath", | |
| label="Audio Input", | |
| show_label=False, | |
| format="wav" | |
| ) | |
| embedding_btn = gr.Button("EXTRACT", variant="primary", size="lg", elem_classes="custom-action-btn") | |
| embedding_output = gr.HTML(value=create_embedding_empty()) | |
| embedding_btn.click( | |
| demo_extract_embedding, | |
| inputs=[embedding_audio], | |
| outputs=[embedding_output], | |
| api_visibility="private" | |
| ) | |
| # COMPARE VOICES | |
| with gr.Column(scale=1, elem_classes="demo-card-column"): | |
| gr.HTML(""" | |
| <div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;"> | |
| <svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M2 10V14" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M5 8V16" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M8 11V13" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M22 10V14" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M19 7V17" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M16 11V13" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M10 12H14" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M10 12L11.5 10.5" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M10 12L11.5 13.5" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M14 12L12.5 10.5" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M14 12L12.5 13.5" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| </svg> | |
| <div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;"> | |
| MATCH VOICE | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| compare_audio1 = gr.Audio( | |
| type="filepath", | |
| label="Audio 1", | |
| show_label=False, | |
| format="wav" | |
| ) | |
| compare_audio2 = gr.Audio( | |
| type="filepath", | |
| label="Audio 2", | |
| show_label=False, | |
| format="wav" | |
| ) | |
| compare_btn = gr.Button("COMPARE", variant="primary", size="lg", elem_classes="custom-action-btn") | |
| compare_output = gr.HTML(value=create_compare_empty()) | |
| compare_btn.click( | |
| demo_match_voice, | |
| inputs=[compare_audio1, compare_audio2], | |
| outputs=[compare_output], | |
| api_visibility="private" | |
| ) | |
| # ACOUSTIC ANALYSIS | |
| with gr.Column(scale=1, elem_classes="demo-card-column"): | |
| gr.HTML(""" | |
| <div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;"> | |
| <svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M22 10C22 10 20 4 17 4C14 4 12 16 9 16C6 16 4 10 2 10" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <g transform="translate(13, 11)"> | |
| <circle cx="5" cy="5" r="4" stroke="#7c3aed" stroke-width="1.5"/> | |
| <path d="M8 8L11 11" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round"/> | |
| </g> | |
| </svg> | |
| <div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;"> | |
| ANALYZE ACOUSTICS | |
| </div> | |
| </div> | |
| """) | |
| acoustic_audio = gr.Audio( | |
| type="filepath", | |
| label="Audio Input", | |
| show_label=False, | |
| format="wav" | |
| ) | |
| acoustic_btn = gr.Button("ANALYZE", variant="primary", size="lg", elem_classes="custom-action-btn") | |
| acoustic_output = gr.HTML(value=create_acoustic_empty()) | |
| acoustic_btn.click( | |
| demo_acoustic_analysis, | |
| inputs=[acoustic_audio], | |
| outputs=[acoustic_output], | |
| api_visibility="private" | |
| ) | |
| # ==================== SECOND ROW: 3 MORE DEMO CARDS ==================== | |
| with gr.Row(equal_height=True, elem_classes="demo-row"): | |
| # AUDIO TRANSCRIPTION | |
| with gr.Column(scale=1, elem_classes="demo-card-column"): | |
| gr.HTML(""" | |
| <div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;"> | |
| <svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M2 12C2 12 4 5 7 5C10 5 11 19 14 19C15.5 19 16.5 15 16.5 15" stroke="#7c3aed" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/> | |
| <path d="M19 7H22" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M19 12H22" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| <path d="M19 17H22" stroke="#7c3aed" stroke-width="2" stroke-linecap="round"/> | |
| </svg> | |
| <div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;"> | |
| TRANSCRIBE AUDIO | |
| </div> | |
| </div> | |
| """) | |
| transcribe_audio_input = gr.Audio( | |
| type="filepath", | |
| label="Audio Input", | |
| show_label=False, | |
| format="wav" | |
| ) | |
| transcribe_btn = gr.Button("TRANSCRIBE", variant="primary", size="lg", elem_classes="custom-action-btn") | |
| transcribe_output = gr.HTML(value=create_transcription_empty()) | |
| transcribe_btn.click( | |
| lambda audio: demo_transcribe_audio(audio, "en"), | |
| inputs=[transcribe_audio_input], | |
| outputs=[transcribe_output], | |
| api_visibility="private" | |
| ) | |
| # CLEAN AUDIO EXTRACTION | |
| with gr.Column(scale=1, elem_classes="demo-card-column"): | |
| gr.HTML(""" | |
| <div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;"> | |
| <svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <path d="M12 5V19" stroke="#7c3aed" stroke-width="2.5" stroke-linecap="round"/> | |
| <path d="M9 8V16" stroke="#7c3aed" stroke-width="2.5" stroke-linecap="round"/> | |
| <path d="M15 8V16" stroke="#7c3aed" stroke-width="2.5" stroke-linecap="round"/> | |
| <path d="M5 4H3V20H5" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/> | |
| <path d="M19 4H21V20H19" stroke="#7c3aed" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/> | |
| </svg> | |
| <div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;"> | |
| ISOLATE VOICE | |
| </div> | |
| </div> | |
| """) | |
| clean_audio_input = gr.Audio( | |
| type="filepath", | |
| label="Audio with Background", | |
| show_label=False, | |
| format="wav" | |
| ) | |
| clean_btn = gr.Button("EXTRACT VOICE", variant="primary", size="lg", elem_classes="custom-action-btn") | |
| clean_audio_output = gr.Audio(label="Clean Audio", type="filepath", visible=True) | |
| clean_btn.click( | |
| demo_clean_extraction, | |
| inputs=[clean_audio_input], | |
| outputs=[clean_audio_output], | |
| api_visibility="private" | |
| ) | |
| # VOICE SIMILARITY | |
| with gr.Column(scale=1, elem_classes="demo-card-column"): | |
| gr.HTML(""" | |
| <div style="display: flex; align-items: center; gap: 6px; margin-bottom: 8px; padding-left: 18px; padding-top: 10px;"> | |
| <svg width="20" height="20" viewBox="0 0 24 24" fill="none" style="flex-shrink: 0;"> | |
| <circle cx="12" cy="12" r="9" stroke="#7c3aed" stroke-width="1" opacity="0.3"/> | |
| <path d="M12 3V21" stroke="#7c3aed" stroke-width="1" opacity="0.3"/> | |
| <path d="M4.2 7.5L19.8 16.5" stroke="#7c3aed" stroke-width="1" opacity="0.3"/> | |
| <path d="M19.8 7.5L4.2 16.5" stroke="#7c3aed" stroke-width="1" opacity="0.3"/> | |
| <path d="M12 5L18 9L16.5 18H7.5L6 9L12 5Z" fill="#7c3aed" fill-opacity="0.4" stroke="#7c3aed" stroke-width="2" stroke-linejoin="round"/> | |
| <circle cx="12" cy="5" r="1.5" fill="#7c3aed"/> | |
| <circle cx="18" cy="9" r="1.5" fill="#7c3aed"/> | |
| <circle cx="16.5" cy="18" r="1.5" fill="#7c3aed"/> | |
| <circle cx="7.5" cy="18" r="1.5" fill="#7c3aed"/> | |
| <circle cx="6" cy="9" r="1.5" fill="#7c3aed"/> | |
| </svg> | |
| <div style="font-size: 16px; font-weight: 700; color: #a5b4fc; text-transform: uppercase; letter-spacing: 1px;"> | |
| GRADE VOICE | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| similarity_user_audio = gr.Audio( | |
| type="filepath", | |
| label="User Audio", | |
| show_label=False, | |
| format="wav" | |
| ) | |
| similarity_ref_audio = gr.Audio( | |
| type="filepath", | |
| label="Reference Audio", | |
| show_label=False, | |
| format="wav" | |
| ) | |
| similarity_btn = gr.Button("ANALYZE", variant="primary", size="lg", elem_classes="custom-action-btn") | |
| similarity_output = gr.HTML(value=create_similarity_empty()) | |
| similarity_btn.click( | |
| demo_voice_similarity, | |
| inputs=[similarity_user_audio, similarity_ref_audio], | |
| outputs=[similarity_output], | |
| api_visibility="private" | |
| ) | |
| # ==================== MCP TOOL INTERFACES (HIDDEN, API ONLY) ==================== | |
| with gr.Row(visible=False): | |
| # extract_embedding | |
| mcp_emb_input = gr.Textbox() | |
| mcp_emb_output = gr.Textbox() | |
| mcp_emb_btn = gr.Button() | |
| mcp_emb_btn.click(extract_embedding, inputs=[mcp_emb_input], outputs=[mcp_emb_output]) | |
| # match_voice | |
| mcp_cmp_input1 = gr.Textbox() | |
| mcp_cmp_input2 = gr.Textbox() | |
| mcp_cmp_output = gr.Textbox() | |
| mcp_cmp_btn = gr.Button() | |
| mcp_cmp_btn.click(match_voice, inputs=[mcp_cmp_input1, mcp_cmp_input2], outputs=[mcp_cmp_output]) | |
| # analyze_acoustics | |
| mcp_ac_input = gr.Textbox() | |
| mcp_ac_output = gr.Textbox() | |
| mcp_ac_btn = gr.Button() | |
| mcp_ac_btn.click(analyze_acoustics, inputs=[mcp_ac_input], outputs=[mcp_ac_output]) | |
| # transcribe_audio | |
| mcp_tr_input = gr.Textbox() | |
| mcp_tr_lang = gr.Textbox(value="en") | |
| mcp_tr_output = gr.Textbox() | |
| mcp_tr_btn = gr.Button() | |
| mcp_tr_btn.click(transcribe_audio, inputs=[mcp_tr_input, mcp_tr_lang], outputs=[mcp_tr_output]) | |
| # isolate_voice | |
| mcp_iso_input = gr.Textbox() | |
| mcp_iso_output = gr.Textbox() | |
| mcp_iso_btn = gr.Button() | |
| mcp_iso_btn.click(isolate_voice, inputs=[mcp_iso_input], outputs=[mcp_iso_output]) | |
| # grade_voice | |
| mcp_sim_user = gr.Textbox() | |
| mcp_sim_ref = gr.Textbox() | |
| mcp_sim_text = gr.Textbox() | |
| mcp_sim_cat = gr.Textbox(value="meme") | |
| mcp_sim_output = gr.Textbox() | |
| mcp_sim_btn = gr.Button() | |
| mcp_sim_btn.click(grade_voice, inputs=[mcp_sim_user, mcp_sim_ref, mcp_sim_text, mcp_sim_cat], outputs=[mcp_sim_output]) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| mcp_server=True | |
| ) | |