Spaces:
Running
Running
Mandark-droid
commited on
Commit
·
1fc3adb
1
Parent(s):
86ae373
Add enhanced leaderboard screen with data preparation
Browse files- Create screens/leaderboard.py with prepare_leaderboard_data function
- Add get_run_id_from_selection for navigation preparation
- Cache full leaderboard dataframe for run detail navigation
- Import and use leaderboard utilities in app.py
- Prepare foundation for Screen 1 → Screen 3 navigation
- app.py +9 -11
- screens/__init__.py +1 -0
- screens/leaderboard.py +78 -0
app.py
CHANGED
|
@@ -16,6 +16,7 @@ from utils.auth import is_authenticated, get_user_info, create_login_button, cre
|
|
| 16 |
from utils.navigation import Navigator, Screen
|
| 17 |
from data_loader import create_data_loader_from_env
|
| 18 |
from mcp_client.sync_wrapper import get_sync_mcp_client
|
|
|
|
| 19 |
|
| 20 |
# Initialize
|
| 21 |
data_loader = create_data_loader_from_env()
|
|
@@ -24,10 +25,13 @@ mcp_client = get_sync_mcp_client()
|
|
| 24 |
|
| 25 |
# Global state
|
| 26 |
current_selected_run = None
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def load_leaderboard_view():
|
| 30 |
"""Load and display the leaderboard with MCP-powered insights"""
|
|
|
|
|
|
|
| 31 |
# OAuth disabled for now
|
| 32 |
# if not is_authenticated(token, profile):
|
| 33 |
# return "Please log in to view the leaderboard", ""
|
|
@@ -39,17 +43,11 @@ def load_leaderboard_view():
|
|
| 39 |
if leaderboard_df.empty:
|
| 40 |
return "No evaluation runs found in the leaderboard", ""
|
| 41 |
|
| 42 |
-
#
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
# Round numeric columns
|
| 49 |
-
display_df['success_rate'] = display_df['success_rate'].round(1)
|
| 50 |
-
display_df['avg_duration_ms'] = display_df['avg_duration_ms'].round(0)
|
| 51 |
-
display_df['total_cost_usd'] = display_df['total_cost_usd'].round(4)
|
| 52 |
-
display_df['co2_emissions_g'] = display_df['co2_emissions_g'].round(2)
|
| 53 |
|
| 54 |
# Get MCP-powered insights
|
| 55 |
try:
|
|
|
|
| 16 |
from utils.navigation import Navigator, Screen
|
| 17 |
from data_loader import create_data_loader_from_env
|
| 18 |
from mcp_client.sync_wrapper import get_sync_mcp_client
|
| 19 |
+
from screens.leaderboard import prepare_leaderboard_data, get_run_id_from_selection
|
| 20 |
|
| 21 |
# Initialize
|
| 22 |
data_loader = create_data_loader_from_env()
|
|
|
|
| 25 |
|
| 26 |
# Global state
|
| 27 |
current_selected_run = None
|
| 28 |
+
leaderboard_df_cache = None # Cache full leaderboard with run_id column
|
| 29 |
|
| 30 |
|
| 31 |
def load_leaderboard_view():
|
| 32 |
"""Load and display the leaderboard with MCP-powered insights"""
|
| 33 |
+
global leaderboard_df_cache
|
| 34 |
+
|
| 35 |
# OAuth disabled for now
|
| 36 |
# if not is_authenticated(token, profile):
|
| 37 |
# return "Please log in to view the leaderboard", ""
|
|
|
|
| 43 |
if leaderboard_df.empty:
|
| 44 |
return "No evaluation runs found in the leaderboard", ""
|
| 45 |
|
| 46 |
+
# Cache the full dataframe (with run_id) for navigation
|
| 47 |
+
leaderboard_df_cache = leaderboard_df.copy()
|
| 48 |
+
|
| 49 |
+
# Prepare dataframe for display (formatted, sorted)
|
| 50 |
+
display_df = prepare_leaderboard_data(leaderboard_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Get MCP-powered insights
|
| 53 |
try:
|
screens/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Screens module
|
screens/leaderboard.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Leaderboard Screen for TraceMind-AI
|
| 3 |
+
Displays evaluation runs with MCP-powered insights
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import gradio as gr
|
| 8 |
+
from typing import Optional, Tuple
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def prepare_leaderboard_data(df: pd.DataFrame) -> pd.DataFrame:
|
| 12 |
+
"""
|
| 13 |
+
Prepare leaderboard dataframe for display
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
df: Raw leaderboard dataframe from HuggingFace
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
Formatted dataframe for display
|
| 20 |
+
"""
|
| 21 |
+
if df.empty:
|
| 22 |
+
return pd.DataFrame()
|
| 23 |
+
|
| 24 |
+
# Select and reorder columns for display
|
| 25 |
+
display_columns = [
|
| 26 |
+
'model', 'agent_type', 'provider', 'success_rate',
|
| 27 |
+
'total_tests', 'avg_duration_ms', 'total_cost_usd',
|
| 28 |
+
'co2_emissions_g', 'gpu_utilization_avg', 'submitted_by', 'timestamp'
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# Only include columns that exist
|
| 32 |
+
available_columns = [col for col in display_columns if col in df.columns]
|
| 33 |
+
display_df = df[available_columns].copy()
|
| 34 |
+
|
| 35 |
+
# Round numeric columns
|
| 36 |
+
if 'success_rate' in display_df.columns:
|
| 37 |
+
display_df['success_rate'] = display_df['success_rate'].round(1)
|
| 38 |
+
if 'avg_duration_ms' in display_df.columns:
|
| 39 |
+
display_df['avg_duration_ms'] = display_df['avg_duration_ms'].round(0)
|
| 40 |
+
if 'total_cost_usd' in display_df.columns:
|
| 41 |
+
display_df['total_cost_usd'] = display_df['total_cost_usd'].round(4)
|
| 42 |
+
if 'co2_emissions_g' in display_df.columns:
|
| 43 |
+
display_df['co2_emissions_g'] = display_df['co2_emissions_g'].round(2)
|
| 44 |
+
if 'gpu_utilization_avg' in display_df.columns:
|
| 45 |
+
display_df['gpu_utilization_avg'] = display_df['gpu_utilization_avg'].round(1)
|
| 46 |
+
|
| 47 |
+
# Sort by success rate descending by default
|
| 48 |
+
if 'success_rate' in display_df.columns:
|
| 49 |
+
display_df = display_df.sort_values('success_rate', ascending=False)
|
| 50 |
+
|
| 51 |
+
return display_df
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_run_id_from_selection(
|
| 55 |
+
df: pd.DataFrame,
|
| 56 |
+
evt: gr.SelectData
|
| 57 |
+
) -> Optional[str]:
|
| 58 |
+
"""
|
| 59 |
+
Extract run_id from a selected row in the dataframe
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
df: Full leaderboard dataframe (with run_id column)
|
| 63 |
+
evt: Gradio SelectData event from dataframe click
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
run_id string or None
|
| 67 |
+
"""
|
| 68 |
+
if df.empty or evt is None:
|
| 69 |
+
return None
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
row_index = evt.index[0] # evt.index is (row, col)
|
| 73 |
+
if row_index < len(df):
|
| 74 |
+
return df.iloc[row_index]['run_id']
|
| 75 |
+
except (IndexError, KeyError, AttributeError):
|
| 76 |
+
return None
|
| 77 |
+
|
| 78 |
+
return None
|