""" Report Cards Component Generate downloadable summary cards for leaderboard and runs """ import pandas as pd from datetime import datetime from typing import Optional import base64 from pathlib import Path def _get_logo_base64(): """Load and encode TraceMind logo as base64""" try: # Try local file first (for development and GitHub) logo_path = Path(__file__).parent.parent / "Logo.png" if logo_path.exists(): with open(logo_path, "rb") as f: return base64.b64encode(f.read()).decode() # Fallback: fetch from GitHub assets branch (for HuggingFace Spaces) # Logo.png is hosted on assets branch to avoid binary file issues on HF import urllib.request github_logo_url = "https://raw.githubusercontent.com/Mandark-droid/TraceMind-AI/assets/Logo.png" with urllib.request.urlopen(github_logo_url, timeout=5) as response: return base64.b64encode(response.read()).decode() except Exception as e: print(f"Warning: Could not load logo: {e}") return None def generate_leaderboard_summary_card(df: pd.DataFrame, top_n: int = 3) -> str: """ Generate HTML for leaderboard summary card Args: df: Leaderboard DataFrame top_n: Number of top performers to show Returns: HTML string for summary card """ if df.empty: return _create_empty_card_html("No leaderboard data available") # Get top performers by success rate top_models = df.nlargest(top_n, 'success_rate') if 'success_rate' in df.columns else df.head(top_n) # Get logo logo_base64 = _get_logo_base64() # Card header html = f"""
{f'' if logo_base64 else ''}

🧠 TraceMind Agent Evaluation Leaderboard

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}

🏆 Top Performers

""" # Top models medals = ["🥇", "🥈", "🥉", "4️⃣", "5️⃣"] for idx, (_, row) in enumerate(top_models.iterrows()): if idx >= top_n: break model_name = row['model'].split('/')[-1] if '/' in str(row['model']) else str(row['model']) html += f"""
{medals[idx]}

{model_name}

""" # Add metrics if 'success_rate' in row and pd.notna(row['success_rate']): html += f'✓ {row["success_rate"]:.1f}% Success Rate' if 'avg_duration_ms' in row and pd.notna(row['avg_duration_ms']): duration_s = row['avg_duration_ms'] / 1000 html += f'⚡ {duration_s:.1f}s Avg Duration' if 'total_cost_usd' in row and pd.notna(row['total_cost_usd']): html += f'💰 ${row["total_cost_usd"]:.4f} per run' # Add GPU metrics if available if 'co2_emissions_g' in row and pd.notna(row['co2_emissions_g']): html += f'🌱 {row["co2_emissions_g"]:.2f}g CO2' if 'gpu_utilization_avg' in row and pd.notna(row['gpu_utilization_avg']): html += f'🎮 {row["gpu_utilization_avg"]:.1f}% GPU Util' html += """
""" # Aggregate stats total_runs = len(df) unique_models = df['model'].nunique() if 'model' in df.columns else 0 avg_success = df['success_rate'].mean() if 'success_rate' in df.columns else 0 html += f"""

📊 Leaderboard Stats

  • • {total_runs} total evaluation runs
  • • {unique_models} unique models tested
  • • {avg_success:.1f}% average success rate
  • """ # Add cost stats if available if 'total_cost_usd' in df.columns: total_cost = df['total_cost_usd'].sum() html += f'
  • • ${total_cost:.2f} total evaluation cost
  • ' # Add CO2 stats if available if 'co2_emissions_g' in df.columns: total_co2 = df['co2_emissions_g'].sum() html += f'
  • • {total_co2:.2f}g total CO2 emissions
  • ' html += """
""" # Add CSS html += _get_card_css() return html def generate_run_report_card(run_data: dict) -> str: """ Generate HTML for individual run report card Args: run_data: Dictionary with run information Returns: HTML string for run report card """ if not run_data: return _create_empty_card_html("No run data available") model_name = run_data.get('model', 'Unknown Model') model_display = model_name.split('/')[-1] if '/' in model_name else model_name run_id = run_data.get('run_id', 'unknown') timestamp = run_data.get('timestamp', datetime.now().strftime('%Y-%m-%d %H:%M')) # Get logo logo_base64 = _get_logo_base64() html = f"""
{f'' if logo_base64 else ''}

🤖 {model_display} Evaluation Report

Run ID: {run_id}

{timestamp}

""" # Success rate visualization success_rate = run_data.get('success_rate', 0) stars = "⭐" * int(success_rate / 20) # 5 stars max html += f"""
{stars}
{success_rate:.1f}% Success Rate
""" # Performance metrics html += """

📊 Performance Metrics

    """ if 'successful_tests' in run_data and 'total_tests' in run_data: html += f'
  • Tests: {run_data["successful_tests"]}/{run_data["total_tests"]} passed
  • ' if 'avg_steps' in run_data: html += f'
  • Avg Steps: {run_data["avg_steps"]:.1f} per test
  • ' if 'avg_duration_ms' in run_data: duration_s = run_data['avg_duration_ms'] / 1000 html += f'
  • Avg Duration: {duration_s:.1f}s
  • ' if 'total_duration_ms' in run_data: total_duration = run_data['total_duration_ms'] / 1000 mins = int(total_duration // 60) secs = int(total_duration % 60) html += f'
  • Total Duration: {mins}m {secs}s
  • ' html += """
""" # Cost analysis if 'total_tokens' in run_data or 'total_cost_usd' in run_data: html += """

💰 Cost Analysis

    """ if 'total_tokens' in run_data: html += f'
  • Total Tokens: {run_data["total_tokens"]:,}
  • ' if 'total_cost_usd' in run_data: html += f'
  • Total Cost: ${run_data["total_cost_usd"]:.4f}
  • ' if 'avg_cost_per_test_usd' in run_data: html += f'
  • Cost per Test: ${run_data["avg_cost_per_test_usd"]:.6f}
  • ' html += """
""" # Sustainability if 'co2_emissions_g' in run_data or 'provider' in run_data: html += """

🌱 Sustainability

    """ if 'co2_emissions_g' in run_data: html += f'
  • CO2 Emissions: {run_data["co2_emissions_g"]:.2f}g
  • ' if 'provider' in run_data: provider_label = "API" if run_data['provider'] == 'litellm' else "GPU" html += f'
  • Provider: {run_data["provider"]} ({provider_label})
  • ' if 'gpu_utilization_avg' in run_data and pd.notna(run_data['gpu_utilization_avg']): html += f'
  • GPU Utilization: {run_data["gpu_utilization_avg"]:.1f}%
  • ' html += """
""" # Footer html += f"""
""" # Add CSS html += _get_card_css() return html def download_card_as_png_js(element_id: str = "summary-card-html") -> str: """ JavaScript to convert HTML card to PNG using html2canvas Args: element_id: ID of the HTML element to capture Returns: JavaScript code as string """ return f""" () => {{ // Load html2canvas from CDN if not already loaded if (typeof html2canvas === 'undefined') {{ const script = document.createElement('script'); script.src = 'https://cdn.jsdelivr.net/npm/html2canvas@1.4.1/dist/html2canvas.min.js'; script.onload = captureCard; document.head.appendChild(script); }} else {{ captureCard(); }} function captureCard() {{ console.log('Searching for card element...'); // Try multiple strategies to find the card let card = document.getElementById('{element_id}'); if (!card) {{ console.log('ID not found, trying class selectors...'); card = document.querySelector('.tracemind-run-card, .tracemind-comparison-card, .tracemind-summary-card'); }} if (!card) {{ console.log('Class not found, trying summary-card-html...'); card = document.getElementById('summary-card-html'); }} if (!card) {{ console.log('Still not found, searching all elements with tracemind in class...'); const cards = document.querySelectorAll('[class*="tracemind"]'); console.log('Found elements:', cards.length); cards.forEach((el, i) => console.log(`Card ${{i}}:`, el.className, el.id)); if (cards.length > 0) {{ card = cards[0]; }} }} if (!card) {{ console.error('Card element not found anywhere!'); console.log('All IDs on page:', Array.from(document.querySelectorAll('[id]')).map(el => el.id)); alert('Card element not found. Please make sure you selected a run first.'); return; }} console.log('Found card:', card); console.log('Card content length:', card.innerHTML?.length || 0); // Clone the card to avoid modifying the original const cardClone = card.cloneNode(true); cardClone.style.position = 'absolute'; cardClone.style.left = '-9999px'; cardClone.style.top = '0'; document.body.appendChild(cardClone); // Force all text elements to white in the clone const textElements = cardClone.querySelectorAll('h1, h2, h3, p, li, span, a, div'); textElements.forEach(el => {{ // Skip elements with gradient text (background-clip: text) const computedStyle = window.getComputedStyle(el); const hasGradientText = computedStyle.webkitBackgroundClip === 'text' || computedStyle.backgroundClip === 'text' || el.style.webkitBackgroundClip === 'text' || el.style.backgroundClip === 'text'; if (!hasGradientText) {{ el.style.color = '#ffffff'; el.style.setProperty('color', '#ffffff', 'important'); }} }}); // Ensure background is black cardClone.style.backgroundColor = '#000000'; cardClone.style.setProperty('background-color', '#000000', 'important'); html2canvas(cardClone, {{ backgroundColor: '#000000', scale: 2, logging: false, useCORS: true, allowTaint: true }}).then(canvas => {{ // Remove the clone document.body.removeChild(cardClone); console.log('Canvas size:', canvas.width, 'x', canvas.height); const link = document.createElement('a'); const timestamp = new Date().toISOString().slice(0, 10); link.download = `tracemind-report-${{timestamp}}.png`; link.href = canvas.toDataURL('image/png'); link.click(); }}).catch(err => {{ // Remove the clone on error if (document.body.contains(cardClone)) {{ document.body.removeChild(cardClone); }} console.error('Error capturing card:', err); alert('Failed to download card: ' + err.message); }}); }} }} """ def _create_empty_card_html(message: str) -> str: """Create empty card with message""" return f"""

{message}

{_get_card_css()} """ def _get_card_css() -> str: """Get CSS for summary cards""" return """ """ def generate_comparison_report_card(run_a_data: dict, run_b_data: dict) -> str: """ Generate HTML for comparison report card showing two runs side by side Args: run_a_data: Dictionary with Run A information run_b_data: Dictionary with Run B information Returns: HTML string for comparison report card """ if not run_a_data or not run_b_data: return _create_empty_card_html("Missing run data for comparison") model_a = run_a_data.get('model', 'Unknown').split('/')[-1] model_b = run_b_data.get('model', 'Unknown').split('/')[-1] # Get logo logo_base64 = _get_logo_base64() # Determine winners for each metric success_winner = "A" if run_a_data.get('success_rate', 0) > run_b_data.get('success_rate', 0) else "B" cost_winner = "A" if run_a_data.get('total_cost_usd', 999) < run_b_data.get('total_cost_usd', 999) else "B" speed_winner = "A" if run_a_data.get('avg_duration_ms', 999999) < run_b_data.get('avg_duration_ms', 999999) else "B" eco_winner = "A" if run_a_data.get('co2_emissions_g', 999) < run_b_data.get('co2_emissions_g', 999) else "B" # Count overall wins a_wins = sum(1 for w in [success_winner, cost_winner, speed_winner, eco_winner] if w == "A") b_wins = 4 - a_wins overall_winner = "A" if a_wins > b_wins else ("B" if b_wins > a_wins else "Tie") html = f"""
{f'' if logo_base64 else ''}

⚖️ Model Comparison Report

{model_a} vs {model_b}

{datetime.now().strftime('%Y-%m-%d %H:%M')}

{'🏆' * 5}
Overall Winner: Run {overall_winner} ({a_wins if overall_winner == "A" else b_wins}/4 categories)

Run A: {model_a}

{'✅' if success_winner == "A" else '📊'} Success: {run_a_data.get('success_rate', 0):.1f}%
{'✅' if cost_winner == "A" else '💰'} Cost: ${run_a_data.get('total_cost_usd', 0):.4f}
{'✅' if speed_winner == "A" else '⚡'} Speed: {run_a_data.get('avg_duration_ms', 0)/1000:.2f}s
{'✅' if eco_winner == "A" else '🌱'} CO2: {run_a_data.get('co2_emissions_g', 0):.2f}g

Run B: {model_b}

{'✅' if success_winner == "B" else '📊'} Success: {run_b_data.get('success_rate', 0):.1f}%
{'✅' if cost_winner == "B" else '💰'} Cost: ${run_b_data.get('total_cost_usd', 0):.4f}
{'✅' if speed_winner == "B" else '⚡'} Speed: {run_b_data.get('avg_duration_ms', 0)/1000:.2f}s
{'✅' if eco_winner == "B" else '🌱'} CO2: {run_b_data.get('co2_emissions_g', 0):.2f}g

💡 Recommendation

{f"Run {overall_winner} ({model_a if overall_winner == 'A' else model_b}) is recommended for most use cases" if overall_winner != "Tie" else "Both runs are evenly matched - choose based on your specific priorities"}

""" return html