import os import json import datetime import gradio as gr import pandas as pd import numpy as np from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) from src.display.css_html_js import custom_css from src.display.utils import ( BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES, AutoEvalColumn, ModelType, fields, WeightType, Precision ) # SAGE specific imports - use populate module to avoid transformers dependency try: from src.populate import process_sage_results_for_leaderboard, get_sage_leaderboard_df SAGE_MODULES_AVAILABLE = process_sage_results_for_leaderboard is not None if SAGE_MODULES_AVAILABLE: print("✅ SAGE modules loaded successfully") else: print("❌ SAGE modules not available") except ImportError as e: print(f"Warning: SAGE modules not available: {e}") SAGE_MODULES_AVAILABLE = False # Configuration TOKEN = os.environ.get("HF_TOKEN", None) OWNER = "opencompass" def format_error(msg): return f"
{msg}
" def format_warning(msg): return f"{msg}
" def format_log(msg): return f"{msg}
" def model_hyperlink(link, model_name): if link and link.startswith("http"): return f'{model_name}' return model_name def get_leaderboard_dataframe(): """Generate leaderboard dataframe from SAGE results""" print("🔄 Loading SAGE leaderboard data...") if not SAGE_MODULES_AVAILABLE: print("❌ SAGE modules not available") return pd.DataFrame() try: sage_results = process_sage_results_for_leaderboard() print(f"📊 Loaded {len(sage_results)} SAGE results") if not sage_results: print("❌ No SAGE results found") return pd.DataFrame() # Convert to leaderboard format leaderboard_data = [] for result in sage_results: # Extract model name from submission_id if result.submission_id.startswith("initial_"): model_name = result.submission_id.split("_", 2)[-1].replace("_", " ") else: model_name = result.submission_id # Create model hyperlink (for now just display name) model_display = f"**{model_name}**" row = { "Model": model_display, "Organization": result.organization, "Overall (%)": result.results.get("sage_overall", 0), "Mathematics (%)": result.results.get("sage_math", 0), "Physics (%)": result.results.get("sage_physics", 0), "Chemistry (%)": result.results.get("sage_chemistry", 0), "Biology (%)": result.results.get("sage_biology", 0), "Earth Science (%)": result.results.get("sage_earth_science", 0), "Astronomy (%)": result.results.get("sage_astronomy", 0), "Submission Date": result.submitted_time } leaderboard_data.append(row) df = pd.DataFrame(leaderboard_data) if not df.empty: df = df.sort_values(by=["Overall (%)"], ascending=False) print(f"✅ Generated dataframe with {len(df)} rows") return df except Exception as e: print(f"❌ Error generating leaderboard dataframe: {e}") import traceback traceback.print_exc() return pd.DataFrame() def refresh_leaderboard(): """Refresh the leaderboard data""" print("🔄 Refreshing leaderboard data...") return get_leaderboard_dataframe() # Initialize data print("🚀 Initializing SAGE-Bench leaderboard...") leaderboard_df = get_leaderboard_dataframe() print(f"📈 Leaderboard initialized with {len(leaderboard_df)} rows") # Define column types for the dataframe COLUMN_TYPES = ["markdown", "str", "number", "number", "number", "number", "number", "number", "number", "str"] # Create Gradio interface demo = gr.Blocks(css=""" .markdown-text { font-size: 16px !important; } #citation-button { font-family: monospace; } """) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button", lines=10, max_lines=10, interactive=False ) # Main leaderboard table gr.Markdown("## 🏆 SAGE Benchmark Results", elem_classes="markdown-text") # Debug information gr.Markdown(f"📊 **Showing {len(leaderboard_df)} results**") leaderboard_table = gr.Dataframe( value=leaderboard_df, datatype=COLUMN_TYPES, interactive=False, wrap=True, column_widths=["25%", "15%", "8%", "8%", "8%", "8%", "8%", "8%", "8%", "12%"] ) # Refresh button refresh_button = gr.Button("🔄 Refresh Leaderboard") refresh_button.click( refresh_leaderboard, inputs=[], outputs=[leaderboard_table] ) # Submission section with gr.Accordion("📊 Submit Your SAGE Results", open=False): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Column(): org_textbox = gr.Textbox(label="Organization Name", placeholder="Your Organization") email_textbox = gr.Textbox(label="Contact Email", placeholder="contact@example.com") with gr.Column(): file_upload = gr.File( label="Upload SAGE Results (JSON)", file_types=[".json"], type="filepath" ) submit_button = gr.Button("Submit Results", variant="primary") submission_result = gr.HTML() # Simplified submission handling submit_button.click( lambda: format_warning("📋 Submission feature coming soon! For now, please contact administrators directly."), inputs=[], outputs=[submission_result] ) # Launch the app if __name__ == "__main__": demo.launch(debug=True)