Spaces:

MCP-1st-Birthday
/

TraceMind

Running

File size: 30,207 Bytes

"""
Screen 4: Trace Detail View
Shows detailed OpenTelemetry trace visualization
"""

import gradio as gr
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime
import pandas as pd
from typing import Optional, Callable, Dict, Any, List
from components.thought_graph import create_thought_graph


def create_trace_detail_screen(
    trace_data: dict,
    on_back: Optional[Callable] = None,
    mcp_qa_enabled: bool = True
) -> gr.Blocks:
    """
    Create the trace detail screen UI

    Args:
        trace_data: OpenTelemetry trace data
        on_back: Callback for back button
        mcp_qa_enabled: Enable MCP Q&A tool

    Returns:
        Gradio Blocks for trace detail screen
    """

    with gr.Blocks() as trace_detail:
        with gr.Row():
            if on_back:
                back_btn = gr.Button("⬅️ Back to Run Detail", variant="secondary", size="sm")

        gr.Markdown(f"# 🔍 Trace Detail: {trace_data.get('trace_id', 'Unknown')}")

        # Safely extract spans
        spans = trace_data.get('spans', [])
        if hasattr(spans, 'tolist'):
            spans = spans.tolist()
        elif not isinstance(spans, list):
            spans = list(spans) if spans is not None else []

        # Trace metadata
        with gr.Row():
            gr.Markdown(f"""
            **Trace ID:** `{trace_data.get('trace_id', 'N/A')}`
            **Total Spans:** {len(spans)}
            """)

        # Tabs for different visualizations
        with gr.Tabs() as tabs:
            # Tab 1: Thought Graph (STAR FEATURE!)
            with gr.Tab("🧠 Thought Graph"):
                gr.Markdown("""
                ### Agent Reasoning Flow
                This graph visualizes how your agent thinks - showing the flow of reasoning steps,
                tool calls, and LLM interactions as a network.

                **Node Colors:**
                - 🟣 Purple: LLM reasoning steps
                - 🟠 Orange: Tool calls
                - 🔵 Blue: Chains/Agents
                - 🔴 Red: Errors
                """)

                # Create and display thought graph
                thought_graph_plot = gr.Plot(
                    value=create_thought_graph(spans, trace_data.get('trace_id', 'Unknown')),
                    label=""
                )

            # Tab 2: Execution Timeline (Waterfall)
            with gr.Tab("⏱️ Execution Timeline"):
                gr.Markdown("""
                ### Waterfall Chart
                Timeline view showing when each span executed and for how long.
                """)

                # Span visualization
                span_viz = gr.Plot(
                    value=create_span_visualization(spans, trace_data.get('trace_id', 'Unknown')),
                    label=""
                )

            # Tab 3: Span Details
            with gr.Tab("📋 Span Details"):
                gr.Markdown("""
                ### Detailed Span Information
                Raw span data with attributes, status, and metadata.
                """)

                # Span details table
                span_table = create_span_table(spans)

        # MCP Q&A Tool (below tabs)
        gr.Markdown("---")
        if mcp_qa_enabled:
            with gr.Accordion("🤖 Ask About This Trace", open=False):
                question_input = gr.Textbox(
                    label="Question",
                    placeholder="e.g., Why was the tool called twice? What tool did the agent use first?",
                    lines=2,
                    info="Ask questions about this trace execution, tool usage, or agent behavior"
                )
                ask_btn = gr.Button("Ask", variant="primary")
                answer_output = gr.Markdown("*Ask a question to get AI-powered insights*")

                # Wire up MCP Q&A (placeholder for now)
                ask_btn.click(
                    fn=lambda q: f"**Answer:** This is a placeholder. MCP integration coming soon.\n\n**Your question:** {q}",
                    inputs=[question_input],
                    outputs=[answer_output]
                )

        # Wire up events
        if on_back:
            back_btn.click(fn=on_back, inputs=[], outputs=[])

    return trace_detail


def process_trace_data(spans: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Process trace spans for waterfall visualization"""
    # Ensure spans is a list
    if hasattr(spans, 'tolist'):
        spans = spans.tolist()
    elif not isinstance(spans, list):
        spans = list(spans) if spans is not None else []

    if not spans:
        return []

    # Helper function to get timestamp from span (handles different field names)
    def get_timestamp(span, field_name):
        """Get timestamp handling different OpenTelemetry field name variations"""
        # Try different variations of field names
        variations = [
            field_name,  # e.g., 'startTime'
            field_name.lower(),  # e.g., 'starttime'
            field_name.replace('Time', 'TimeUnixNano'),  # e.g., 'startTimeUnixNano'
            field_name[0].lower() + field_name[1:],  # e.g., 'startTime'
            # Add snake_case variations (start_time, end_time)
            field_name.replace('Time', '_time').lower(),  # e.g., 'start_time'
            field_name.replace('Time', '_time_unix_nano').lower(),  # e.g., 'start_time_unix_nano'
        ]

        for var in variations:
            if var in span:
                value = span[var]
                # Handle both string and numeric timestamps
                if isinstance(value, str):
                    return int(value)
                return value

        # If not found, return 0
        return 0

    # Calculate relative times
    start_times = [get_timestamp(span, 'startTime') for span in spans]
    min_start = min(start_times) if start_times else 0
    max_start = max(start_times) if start_times else 0

    # Check if we have any actual timing data
    has_timing_data = min_start > 0 or max_start > 0

    # Debug: Print first span's raw timestamps
    if spans:
        first_span = spans[0]
        print(f"[DEBUG] First span raw data sample:")
        print(f"  startTime field: {first_span.get('startTime', 'NOT FOUND')}")
        print(f"  endTime field: {first_span.get('endTime', 'NOT FOUND')}")
        print(f"  startTimeUnixNano field: {first_span.get('startTimeUnixNano', 'NOT FOUND')}")
        print(f"  endTimeUnixNano field: {first_span.get('endTimeUnixNano', 'NOT FOUND')}")
        print(f"  HAS_TIMING_DATA: {has_timing_data}")
        if 'attributes' in first_span:
            attrs = first_span['attributes']
            print(f"  Sample attributes: {list(attrs.keys())[:5] if isinstance(attrs, dict) else 'N/A'}")
            if isinstance(attrs, dict):
                # Check for cost fields
                cost_fields = [k for k in attrs.keys() if 'cost' in k.lower() or 'price' in k.lower()]
                if cost_fields:
                    print(f"  Cost-related fields found: {cost_fields}")

    # Auto-detect timestamp unit based on magnitude
    time_divisor = 1000000  # Default: assume nanoseconds, convert to milliseconds
    if start_times and min_start > 0:
        # If timestamp is > 1e15, it's likely nanoseconds
        # If timestamp is > 1e12, it's likely microseconds
        # If timestamp is > 1e9, it's likely milliseconds
        # If timestamp is < 1e9, it's likely seconds
        if min_start > 1e15:
            time_divisor = 1000000  # nanoseconds to milliseconds
            time_unit = "nanoseconds"
        elif min_start > 1e12:
            time_divisor = 1000  # microseconds to milliseconds
            time_unit = "microseconds"
        elif min_start > 1e9:
            time_divisor = 1  # already in milliseconds
            time_unit = "milliseconds"
        else:
            time_divisor = 0.001  # seconds to milliseconds
            time_unit = "seconds"
        print(f"[DEBUG] Auto-detected timestamp unit: {time_unit} (min_start={min_start}, divisor={time_divisor})")

    processed_spans = []
    for idx, span in enumerate(spans):
        start_time = get_timestamp(span, 'startTime')
        end_time = get_timestamp(span, 'endTime')

        # Calculate relative start
        relative_start = (start_time - min_start) / time_divisor if has_timing_data else 0

        # Calculate duration - prefer duration_ms if available
        if 'duration_ms' in span and span['duration_ms'] is not None:
            actual_duration = float(span['duration_ms'])
        else:
            actual_duration = (end_time - start_time) / time_divisor

        # Debug: Print first few durations
        if idx < 3:
            duration_source = 'duration_ms' if 'duration_ms' in span else 'calculated'
            print(f"[DEBUG] Span {idx}: start={start_time}, end={end_time}, duration={actual_duration:.3f}ms ({duration_source})")

        # Handle span ID variations
        span_id = span.get('spanId') or span.get('span_id') or span.get('spanID') or f'span_{idx}'
        parent_id = span.get('parentSpanId') or span.get('parent_span_id') or span.get('parentSpanID')

        # Get span kind - check both top-level and OpenInference attributes
        span_kind = span.get('kind', 'INTERNAL')
        attributes = span.get('attributes', {})

        # Check for OpenInference span kind in attributes
        if isinstance(attributes, dict) and 'openinference.span.kind' in attributes:
            openinference_kind = attributes.get('openinference.span.kind')
            # Map OpenInference kinds to OpenTelemetry kinds for consistency
            # OpenInference kinds: CHAIN, TOOL, LLM, RETRIEVER, EMBEDDING, AGENT, etc.
            if openinference_kind:
                span_kind = openinference_kind.upper()

        # Extract token and cost information from attributes
        token_info = {}
        cost_info = {}
        if isinstance(attributes, dict):
            # Helper to safely extract numeric values
            def safe_numeric(value):
                """Safely convert to numeric, return None if invalid"""
                if value is None:
                    return None
                try:
                    if isinstance(value, (int, float)):
                        return value
                    return float(value)
                except (ValueError, TypeError):
                    return None

            # Check for token usage (various formats)
            prompt_tokens = None
            completion_tokens = None

            if 'gen_ai.usage.prompt_tokens' in attributes:
                prompt_tokens = safe_numeric(attributes['gen_ai.usage.prompt_tokens'])
            if 'gen_ai.usage.completion_tokens' in attributes:
                completion_tokens = safe_numeric(attributes['gen_ai.usage.completion_tokens'])
            if 'llm.token_count.prompt' in attributes and prompt_tokens is None:
                prompt_tokens = safe_numeric(attributes['llm.token_count.prompt'])
            if 'llm.token_count.completion' in attributes and completion_tokens is None:
                completion_tokens = safe_numeric(attributes['llm.token_count.completion'])

            # Store valid token counts
            if prompt_tokens is not None:
                token_info['prompt_tokens'] = int(prompt_tokens)
            if completion_tokens is not None:
                token_info['completion_tokens'] = int(completion_tokens)

            # Calculate total tokens
            if 'prompt_tokens' in token_info and 'completion_tokens' in token_info:
                token_info['total_tokens'] = token_info['prompt_tokens'] + token_info['completion_tokens']
            elif 'llm.usage.total_tokens' in attributes:
                total = safe_numeric(attributes['llm.usage.total_tokens'])
                if total is not None:
                    token_info['total_tokens'] = int(total)

            # Check for cost information (various formats)
            if 'gen_ai.usage.cost.total' in attributes:
                cost = safe_numeric(attributes['gen_ai.usage.cost.total'])
                if cost is not None:
                    cost_info['total_cost'] = cost
            elif 'llm.usage.cost' in attributes:
                cost = safe_numeric(attributes['llm.usage.cost'])
                if cost is not None:
                    cost_info['total_cost'] = cost

            # Debug: Print cost info for LLM spans
            if idx < 2 and span_kind == 'LLM':
                print(f"[DEBUG] LLM Span {idx} cost extraction:")
                print(f"  gen_ai.usage.cost.total: {attributes.get('gen_ai.usage.cost.total', 'NOT FOUND')}")
                print(f"  llm.usage.cost: {attributes.get('llm.usage.cost', 'NOT FOUND')}")
                print(f"  cost_info: {cost_info}")

        # Store actual duration for tooltip, use minimum for visualization
        display_duration = max(actual_duration, 0.1)  # Minimum width for visibility

        processed_spans.append({
            'span_id': span_id,
            'parent_id': parent_id,
            'name': span.get('name', 'Unknown'),
            'kind': span_kind,
            'start_time': relative_start,
            'duration': display_duration,  # For bar width
            'actual_duration': actual_duration,  # For tooltip
            'end_time': relative_start + actual_duration,  # Use actual for end time
            'attributes': attributes,
            'status': span.get('status', {}).get('code', 'UNKNOWN'),
            'tokens': token_info,
            'cost': cost_info
        })

    print(f"[DEBUG] Total spans in input: {len(spans)}")
    print(f"[DEBUG] Processed spans: {len(processed_spans)}")

    # Debug: Show span kinds and statuses detected
    span_kinds = {}
    span_statuses = {}
    durations = []
    spans_with_tokens = 0
    spans_with_cost = 0
    for span in processed_spans:
        kind = span['kind']
        status = span['status']
        span_kinds[kind] = span_kinds.get(kind, 0) + 1
        span_statuses[status] = span_statuses.get(status, 0) + 1
        durations.append(span['actual_duration'])
        if span['tokens']:
            spans_with_tokens += 1
        if span['cost']:
            spans_with_cost += 1

    print(f"[DEBUG] Span kinds detected: {span_kinds}")
    print(f"[DEBUG] Span statuses detected: {span_statuses}")
    if durations:
        print(f"[DEBUG] Duration range: {min(durations):.3f}ms - {max(durations):.3f}ms")
    print(f"[DEBUG] Spans with token info: {spans_with_tokens}/{len(processed_spans)}")
    print(f"[DEBUG] Spans with cost info: {spans_with_cost}/{len(processed_spans)}")

    return processed_spans


def create_span_visualization(spans: List[Dict[str, Any]], trace_id: str = "Unknown") -> go.Figure:
    """Create an interactive Plotly waterfall visualization of spans"""
    processed_spans = process_trace_data(spans)

    print(f"[DEBUG] create_span_visualization - Received {len(spans)} spans")
    print(f"[DEBUG] create_span_visualization - Processed {len(processed_spans)} spans")

    if not processed_spans:
        # Return empty figure with message
        fig = go.Figure()
        fig.add_annotation(
            text="No spans to display",
            xref="paper", yref="paper",
            x=0.5, y=0.5, xanchor='center', yanchor='middle',
            showarrow=False,
            font=dict(size=20)
        )
        return fig

    # Sort spans by start time for better visualization
    processed_spans.sort(key=lambda x: x['start_time'])

    # Create unique labels for each span (include index to ensure uniqueness)
    for idx, span in enumerate(processed_spans):
        # Add span index to make labels unique
        span['display_name'] = f"{span['name']} [{idx}]"

    # Create colors based on span status and kind
    colors = []
    color_map = {}  # Track which colors are assigned to which kinds
    for span in processed_spans:
        status = span['status']
        kind = span['kind']

        # Only show red for actual errors (ERROR status)
        if status == 'ERROR':
            color = '#DC143C'  # Crimson for errors
        else:
            # Color by span kind (supports both OpenTelemetry and OpenInference)
            if kind == 'SERVER':
                color = '#2E8B57'  # Sea Green
            elif kind == 'CLIENT':
                color = '#4169E1'  # Royal Blue
            elif kind == 'LLM':
                color = '#9B59B6'  # Purple for LLM calls
            elif kind == 'TOOL':
                color = '#E67E22'  # Orange for Tool calls
            elif kind == 'CHAIN':
                color = '#3498DB'  # Light Blue for Chains
            elif kind == 'AGENT':
                color = '#1ABC9C'  # Turquoise for Agents
            elif kind == 'RETRIEVER':
                color = '#F39C12'  # Yellow-Orange for Retrievers
            elif kind == 'EMBEDDING':
                color = '#8E44AD'  # Dark Purple for Embeddings
            else:
                color = '#4682B4'  # Steel Blue for INTERNAL/unknown

        colors.append(color)
        if kind not in color_map:
            color_map[kind] = color

    print(f"[DEBUG] Color assignments: {color_map}")

    # Create the waterfall chart
    fig = go.Figure()

    # Prepare custom data for hover tooltips
    customdata = []
    for span in processed_spans:
        # Build token info string
        token_str = ""
        if span['tokens']:
            tokens = span['tokens']
            if 'total_tokens' in tokens:
                token_str = f"<br>Tokens: {tokens['total_tokens']}"
                if 'prompt_tokens' in tokens and 'completion_tokens' in tokens:
                    token_str += f" (prompt: {tokens['prompt_tokens']}, completion: {tokens['completion_tokens']})"
            elif 'prompt_tokens' in tokens or 'completion_tokens' in tokens:
                parts = []
                if 'prompt_tokens' in tokens:
                    parts.append(f"prompt: {tokens['prompt_tokens']}")
                if 'completion_tokens' in tokens:
                    parts.append(f"completion: {tokens['completion_tokens']}")
                token_str = f"<br>Tokens: {', '.join(parts)}"

        # Build cost info string
        cost_str = ""
        if span['cost'] and 'total_cost' in span['cost']:
            cost_str = f"<br>Cost: ${span['cost']['total_cost']:.6f}"

        customdata.append([
            span['name'],
            span['kind'],
            span['span_id'],
            span['end_time'],
            span['actual_duration'],  # Show actual duration, not display duration
            token_str,
            cost_str
        ])

    # Add bars for each span (use display_name for unique y-axis labels)
    fig.add_trace(go.Bar(
        y=[span['display_name'] for span in processed_spans],
        x=[span['duration'] for span in processed_spans],  # Display duration (min 0.1ms)
        base=[span['start_time'] for span in processed_spans],
        orientation='h',
        marker_color=colors,
        hovertemplate=(
            "<b>%{customdata[0]}</b><br>" +
            "Type: %{customdata[1]}<br>" +
            "Span ID: %{customdata[2]}<br>" +
            "Duration: %{customdata[4]:.3f} ms<br>" +  # Actual duration with 3 decimal places
            "Start: %{base:.2f} ms<br>" +
            "End: %{customdata[3]:.2f} ms" +
            "%{customdata[5]}" +  # Token info (already formatted)
            "%{customdata[6]}" +  # Cost info (already formatted)
            "<extra></extra>"
        ),
        customdata=customdata,
        name="Spans"
    ))

    # Update layout for better visualization
    fig.update_layout(
        title={
            'text': f"OpenTelemetry Trace: {trace_id}",
            'x': 0.5,
            'xanchor': 'center'
        },
        xaxis_title="Time (milliseconds)",
        yaxis_title="Spans",
        showlegend=False,
        height=400 + len(processed_spans) * 30,  # Dynamic height based on span count
        bargap=0.2,
        hovermode='closest'
    )

    return fig


def create_span_table(spans: List[Dict[str, Any]]) -> gr.JSON:
    """Create detailed span information display"""

    # Ensure spans is a list
    if hasattr(spans, 'tolist'):
        spans = spans.tolist()
    elif not isinstance(spans, list):
        spans = list(spans) if spans is not None else []

    # Helper function to get timestamp (same as in process_trace_data)
    def get_timestamp(span, field_name):
        variations = [
            field_name,
            field_name.lower(),
            field_name.replace('Time', 'TimeUnixNano'),
            field_name[0].lower() + field_name[1:],
        ]
        for var in variations:
            if var in span:
                value = span[var]
                if isinstance(value, str):
                    return int(value)
                return value
        return 0

    # Simplify span data for display
    simplified_spans = []
    for span in spans:
        start_time = get_timestamp(span, 'startTime')
        end_time = get_timestamp(span, 'endTime')
        duration_ms = (end_time - start_time) / 1000000 if (end_time and start_time) else 0

        # Handle span ID variations
        span_id = span.get('spanId') or span.get('span_id') or span.get('spanID') or 'N/A'
        parent_id = span.get('parentSpanId') or span.get('parent_span_id') or span.get('parentSpanID') or 'root'

        simplified_spans.append({
            "Span ID": span_id,
            "Parent": parent_id,
            "Name": span.get('name', 'N/A'),
            "Kind": span.get('kind', 'N/A'),
            "Duration (ms)": round(duration_ms, 2),
            "Attributes": span.get('attributes', {}),
            "Status": span.get('status', {}).get('code', 'UNKNOWN')
        })

    return gr.JSON(value=simplified_spans, label="Span Details")


# GPU Metrics Visualization Functions

def extract_metrics_data(metrics_df):
    """
    Extract and prepare GPU metrics data for visualization

    Args:
        metrics_df: DataFrame with flat metrics structure (from HuggingFace dataset)
                   Expected columns: timestamp, gpu_utilization_percent, gpu_memory_used_mib,
                                   gpu_temperature_celsius, gpu_power_watts, co2_emissions_gco2e

    Returns:
        DataFrame ready for visualization
    """
    if metrics_df is None or metrics_df.empty:
        return pd.DataFrame()

    # Make a copy to avoid modifying original
    df = metrics_df.copy()

    # Ensure timestamp is datetime
    if 'timestamp' in df.columns:
        if not pd.api.types.is_datetime64_any_dtype(df['timestamp']):
            df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Sort by timestamp
    df = df.sort_values('timestamp').reset_index(drop=True)

    return df


def create_gpu_summary_cards(df):
    """
    Create summary cards for GPU metrics

    Args:
        df: DataFrame with flat metrics structure (columns: gpu_utilization_percent, etc.)

    Returns:
        HTML string with summary cards
    """
    if df is None or df.empty:
        return "<div style='padding: 20px; text-align: center;'>⚠️ No GPU metrics available (expected for API models)</div>"

    # Debug: Print DataFrame info
    print(f"[DEBUG create_gpu_summary_cards] DataFrame shape: {df.shape}")
    print(f"[DEBUG create_gpu_summary_cards] DataFrame columns: {list(df.columns)}")
    if not df.empty:
        print(f"[DEBUG create_gpu_summary_cards] First row sample: {df.iloc[0].to_dict()}")
        print(f"[DEBUG create_gpu_summary_cards] Last row sample: {df.iloc[-1].to_dict()}")

    # Use aggregate statistics (average/max) instead of just last row
    # This is more representative of overall GPU performance
    utilization = df['gpu_utilization_percent'].mean() if 'gpu_utilization_percent' in df.columns else 0
    memory_used = df['gpu_memory_used_mib'].max() if 'gpu_memory_used_mib' in df.columns else 0
    temperature = df['gpu_temperature_celsius'].max() if 'gpu_temperature_celsius' in df.columns else 0

    # CO2 emissions - use max value (cumulative total)
    co2_emissions = df['co2_emissions_gco2e'].max() if 'co2_emissions_gco2e' in df.columns else 0

    power = df['gpu_power_watts'].mean() if 'gpu_power_watts' in df.columns else 0

    # Get GPU name from first row (it's constant across all rows)
    gpu_name = df['gpu_name'].iloc[0] if 'gpu_name' in df.columns and not df.empty else 'Unknown GPU'

    print(f"[DEBUG create_gpu_summary_cards] Aggregated values - util: {utilization:.2f}, mem: {memory_used:.2f}, temp: {temperature:.2f}, co2: {co2_emissions:.4f}, gpu_name: {gpu_name}")

    # Get memory total from max value if available
    memory_total = df['gpu_memory_total_mib'].max() if 'gpu_memory_total_mib' in df.columns else 0
    memory_percent = (memory_used / memory_total * 100) if memory_total > 0 else 0

    cards_html = f"""
    <div style="display: grid; grid-template-columns: repeat(5, 1fr); gap: 15px; margin: 20px 0;">
        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; color: white; text-align: center;">
            <h3 style="margin: 0 0 10px 0; font-size: 1em;">GPU Name</h3>
            <h2 style="margin: 0; font-size: 1.2em;">{gpu_name}</h2>
        </div>
        <div style="background: linear-gradient(135deg, #fa709a 0%, #fee140 100%); padding: 20px; border-radius: 10px; color: white; text-align: center;">
            <h3 style="margin: 0 0 10px 0; font-size: 1em;">GPU Utilization</h3>
            <h2 style="margin: 0; font-size: 2em;">{utilization:.1f}%</h2>
        </div>
        <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); padding: 20px; border-radius: 10px; color: white; text-align: center;">
            <h3 style="margin: 0 0 10px 0; font-size: 1em;">GPU Memory</h3>
            <h2 style="margin: 0; font-size: 2em;">{memory_used:.0f} MiB</h2>
            <p style="margin: 5px 0 0 0; font-size: 0.8em; opacity: 0.9;">{memory_percent:.1f}% of {memory_total:.0f} MiB</p>
        </div>
        <div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); padding: 20px; border-radius: 10px; color: white; text-align: center;">
            <h3 style="margin: 0 0 10px 0; font-size: 1em;">GPU Temperature</h3>
            <h2 style="margin: 0; font-size: 2em;">{temperature:.0f}°C</h2>
        </div>
        <div style="background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%); padding: 20px; border-radius: 10px; color: white; text-align: center;">
            <h3 style="margin: 0 0 10px 0; font-size: 1em;">CO2 Emissions</h3>
            <h2 style="margin: 0; font-size: 2em;">{co2_emissions:.4f} g</h2>
            <p style="margin: 5px 0 0 0; font-size: 0.8em; opacity: 0.9;">Power: {power:.1f} W</p>
        </div>
    </div>
    """

    return cards_html


def create_gpu_metrics_dashboard(metrics_df):
    """
    Create a combined dashboard with GPU metric charts

    Args:
        metrics_df: DataFrame with flat metrics structure (from HuggingFace dataset)

    Returns:
        Plotly figure with GPU metrics time series
    """
    if metrics_df is None or metrics_df.empty:
        # Return empty figure with message
        fig = go.Figure()
        fig.add_annotation(
            text="No GPU metrics available (expected for API models)",
            xref="paper", yref="paper",
            x=0.5, y=0.5, xanchor='center', yanchor='middle',
            showarrow=False,
            font=dict(size=16)
        )
        return fig

    # Prepare data
    df = extract_metrics_data(metrics_df)

    if df.empty:
        return None

    # Create subplots for GPU metrics
    # We'll show: Utilization, Memory, Temperature, Power, CO2, Power Cost
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            'GPU Utilization (%)',
            'GPU Memory (MiB)',
            'GPU Temperature (°C)',
            'GPU Power (W)',
            'CO2 Emissions (g)',
            'Power Cost (USD)'
        ],
        vertical_spacing=0.10,
        horizontal_spacing=0.12,
        specs=[[{}, {}], [{}, {}], [{}, {}]]
    )

    colors = ['#667eea', '#f093fb', '#4facfe', '#FFE66D', '#43e97b', '#FF6B6B']

    # Define metrics to plot
    metrics_config = [
        ('gpu_utilization_percent', 'GPU Utilization (%)', 1, 1, colors[0]),
        ('gpu_memory_used_mib', 'GPU Memory (MiB)', 1, 2, colors[1]),
        ('gpu_temperature_celsius', 'GPU Temperature (°C)', 2, 1, colors[2]),
        ('gpu_power_watts', 'GPU Power (W)', 2, 2, colors[3]),
        ('co2_emissions_gco2e', 'CO2 Emissions (g)', 3, 1, colors[4]),
        ('power_cost_usd', 'Power Cost (USD)', 3, 2, colors[5]),
    ]

    for col_name, title, row, col, color in metrics_config:
        if col_name in df.columns:
            fig.add_trace(
                go.Scatter(
                    x=df['timestamp'],
                    y=df[col_name],
                    mode='lines+markers',
                    name=title,
                    line=dict(color=color, width=3),
                    marker=dict(size=6, color=color),
                    hovertemplate=(
                        f"<b>{title}</b><br>" +
                        "Time: %{x}<br>" +
                        "Value: %{y:.2f}<br>" +
                        "<extra></extra>"
                    )
                ),
                row=row, col=col
            )

    # Add memory total as a dashed line if available
    if 'gpu_memory_total_mib' in df.columns:
        total_memory = df['gpu_memory_total_mib'].iloc[0]
        fig.add_hline(
            y=total_memory,
            line_dash="dash",
            line_color="gray",
            annotation_text=f"Total: {total_memory:.0f} MiB",
            annotation_position="right",
            row=1, col=2
        )

    fig.update_layout(
        title_text="GPU Metrics Over Time",
        height=900,
        template="plotly_white",
        showlegend=False,
        hovermode='x unified'
    )

    # Update x-axes to show time format
    fig.update_xaxes(tickformat='%H:%M:%S')

    return fig