Mandark-droid commited on
Commit
1fc3adb
·
1 Parent(s): 86ae373

Add enhanced leaderboard screen with data preparation

Browse files

- Create screens/leaderboard.py with prepare_leaderboard_data function
- Add get_run_id_from_selection for navigation preparation
- Cache full leaderboard dataframe for run detail navigation
- Import and use leaderboard utilities in app.py
- Prepare foundation for Screen 1 → Screen 3 navigation

Files changed (3) hide show
  1. app.py +9 -11
  2. screens/__init__.py +1 -0
  3. screens/leaderboard.py +78 -0
app.py CHANGED
@@ -16,6 +16,7 @@ from utils.auth import is_authenticated, get_user_info, create_login_button, cre
16
  from utils.navigation import Navigator, Screen
17
  from data_loader import create_data_loader_from_env
18
  from mcp_client.sync_wrapper import get_sync_mcp_client
 
19
 
20
  # Initialize
21
  data_loader = create_data_loader_from_env()
@@ -24,10 +25,13 @@ mcp_client = get_sync_mcp_client()
24
 
25
  # Global state
26
  current_selected_run = None
 
27
 
28
 
29
  def load_leaderboard_view():
30
  """Load and display the leaderboard with MCP-powered insights"""
 
 
31
  # OAuth disabled for now
32
  # if not is_authenticated(token, profile):
33
  # return "Please log in to view the leaderboard", ""
@@ -39,17 +43,11 @@ def load_leaderboard_view():
39
  if leaderboard_df.empty:
40
  return "No evaluation runs found in the leaderboard", ""
41
 
42
- # Format dataframe for display
43
- display_df = leaderboard_df[[
44
- 'model', 'agent_type', 'success_rate', 'total_tests',
45
- 'avg_duration_ms', 'total_cost_usd', 'co2_emissions_g'
46
- ]].copy()
47
-
48
- # Round numeric columns
49
- display_df['success_rate'] = display_df['success_rate'].round(1)
50
- display_df['avg_duration_ms'] = display_df['avg_duration_ms'].round(0)
51
- display_df['total_cost_usd'] = display_df['total_cost_usd'].round(4)
52
- display_df['co2_emissions_g'] = display_df['co2_emissions_g'].round(2)
53
 
54
  # Get MCP-powered insights
55
  try:
 
16
  from utils.navigation import Navigator, Screen
17
  from data_loader import create_data_loader_from_env
18
  from mcp_client.sync_wrapper import get_sync_mcp_client
19
+ from screens.leaderboard import prepare_leaderboard_data, get_run_id_from_selection
20
 
21
  # Initialize
22
  data_loader = create_data_loader_from_env()
 
25
 
26
  # Global state
27
  current_selected_run = None
28
+ leaderboard_df_cache = None # Cache full leaderboard with run_id column
29
 
30
 
31
  def load_leaderboard_view():
32
  """Load and display the leaderboard with MCP-powered insights"""
33
+ global leaderboard_df_cache
34
+
35
  # OAuth disabled for now
36
  # if not is_authenticated(token, profile):
37
  # return "Please log in to view the leaderboard", ""
 
43
  if leaderboard_df.empty:
44
  return "No evaluation runs found in the leaderboard", ""
45
 
46
+ # Cache the full dataframe (with run_id) for navigation
47
+ leaderboard_df_cache = leaderboard_df.copy()
48
+
49
+ # Prepare dataframe for display (formatted, sorted)
50
+ display_df = prepare_leaderboard_data(leaderboard_df)
 
 
 
 
 
 
51
 
52
  # Get MCP-powered insights
53
  try:
screens/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Screens module
screens/leaderboard.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Leaderboard Screen for TraceMind-AI
3
+ Displays evaluation runs with MCP-powered insights
4
+ """
5
+
6
+ import pandas as pd
7
+ import gradio as gr
8
+ from typing import Optional, Tuple
9
+
10
+
11
+ def prepare_leaderboard_data(df: pd.DataFrame) -> pd.DataFrame:
12
+ """
13
+ Prepare leaderboard dataframe for display
14
+
15
+ Args:
16
+ df: Raw leaderboard dataframe from HuggingFace
17
+
18
+ Returns:
19
+ Formatted dataframe for display
20
+ """
21
+ if df.empty:
22
+ return pd.DataFrame()
23
+
24
+ # Select and reorder columns for display
25
+ display_columns = [
26
+ 'model', 'agent_type', 'provider', 'success_rate',
27
+ 'total_tests', 'avg_duration_ms', 'total_cost_usd',
28
+ 'co2_emissions_g', 'gpu_utilization_avg', 'submitted_by', 'timestamp'
29
+ ]
30
+
31
+ # Only include columns that exist
32
+ available_columns = [col for col in display_columns if col in df.columns]
33
+ display_df = df[available_columns].copy()
34
+
35
+ # Round numeric columns
36
+ if 'success_rate' in display_df.columns:
37
+ display_df['success_rate'] = display_df['success_rate'].round(1)
38
+ if 'avg_duration_ms' in display_df.columns:
39
+ display_df['avg_duration_ms'] = display_df['avg_duration_ms'].round(0)
40
+ if 'total_cost_usd' in display_df.columns:
41
+ display_df['total_cost_usd'] = display_df['total_cost_usd'].round(4)
42
+ if 'co2_emissions_g' in display_df.columns:
43
+ display_df['co2_emissions_g'] = display_df['co2_emissions_g'].round(2)
44
+ if 'gpu_utilization_avg' in display_df.columns:
45
+ display_df['gpu_utilization_avg'] = display_df['gpu_utilization_avg'].round(1)
46
+
47
+ # Sort by success rate descending by default
48
+ if 'success_rate' in display_df.columns:
49
+ display_df = display_df.sort_values('success_rate', ascending=False)
50
+
51
+ return display_df
52
+
53
+
54
+ def get_run_id_from_selection(
55
+ df: pd.DataFrame,
56
+ evt: gr.SelectData
57
+ ) -> Optional[str]:
58
+ """
59
+ Extract run_id from a selected row in the dataframe
60
+
61
+ Args:
62
+ df: Full leaderboard dataframe (with run_id column)
63
+ evt: Gradio SelectData event from dataframe click
64
+
65
+ Returns:
66
+ run_id string or None
67
+ """
68
+ if df.empty or evt is None:
69
+ return None
70
+
71
+ try:
72
+ row_index = evt.index[0] # evt.index is (row, col)
73
+ if row_index < len(df):
74
+ return df.iloc[row_index]['run_id']
75
+ except (IndexError, KeyError, AttributeError):
76
+ return None
77
+
78
+ return None