SAGE-Bench / app.py
sudanl
fix: 解决transformers依赖导致的排行榜加载失败问题
ca09cf3
raw
history blame
6.74 kB
import os
import json
import datetime
import gradio as gr
import pandas as pd
import numpy as np
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
BENCHMARK_COLS,
COLS,
EVAL_COLS,
EVAL_TYPES,
AutoEvalColumn,
ModelType,
fields,
WeightType,
Precision
)
# SAGE specific imports - use populate module to avoid transformers dependency
try:
from src.populate import process_sage_results_for_leaderboard, get_sage_leaderboard_df
SAGE_MODULES_AVAILABLE = process_sage_results_for_leaderboard is not None
if SAGE_MODULES_AVAILABLE:
print("✅ SAGE modules loaded successfully")
else:
print("❌ SAGE modules not available")
except ImportError as e:
print(f"Warning: SAGE modules not available: {e}")
SAGE_MODULES_AVAILABLE = False
# Configuration
TOKEN = os.environ.get("HF_TOKEN", None)
OWNER = "opencompass"
def format_error(msg):
return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
def format_warning(msg):
return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"
def format_log(msg):
return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"
def model_hyperlink(link, model_name):
if link and link.startswith("http"):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
return model_name
def get_leaderboard_dataframe():
"""Generate leaderboard dataframe from SAGE results"""
print("🔄 Loading SAGE leaderboard data...")
if not SAGE_MODULES_AVAILABLE:
print("❌ SAGE modules not available")
return pd.DataFrame()
try:
sage_results = process_sage_results_for_leaderboard()
print(f"📊 Loaded {len(sage_results)} SAGE results")
if not sage_results:
print("❌ No SAGE results found")
return pd.DataFrame()
# Convert to leaderboard format
leaderboard_data = []
for result in sage_results:
# Extract model name from submission_id
if result.submission_id.startswith("initial_"):
model_name = result.submission_id.split("_", 2)[-1].replace("_", " ")
else:
model_name = result.submission_id
# Create model hyperlink (for now just display name)
model_display = f"**{model_name}**"
row = {
"Model": model_display,
"Organization": result.organization,
"Overall (%)": result.results.get("sage_overall", 0),
"Mathematics (%)": result.results.get("sage_math", 0),
"Physics (%)": result.results.get("sage_physics", 0),
"Chemistry (%)": result.results.get("sage_chemistry", 0),
"Biology (%)": result.results.get("sage_biology", 0),
"Earth Science (%)": result.results.get("sage_earth_science", 0),
"Astronomy (%)": result.results.get("sage_astronomy", 0),
"Submission Date": result.submitted_time
}
leaderboard_data.append(row)
df = pd.DataFrame(leaderboard_data)
if not df.empty:
df = df.sort_values(by=["Overall (%)"], ascending=False)
print(f"✅ Generated dataframe with {len(df)} rows")
return df
except Exception as e:
print(f"❌ Error generating leaderboard dataframe: {e}")
import traceback
traceback.print_exc()
return pd.DataFrame()
def refresh_leaderboard():
"""Refresh the leaderboard data"""
print("🔄 Refreshing leaderboard data...")
return get_leaderboard_dataframe()
# Initialize data
print("🚀 Initializing SAGE-Bench leaderboard...")
leaderboard_df = get_leaderboard_dataframe()
print(f"📈 Leaderboard initialized with {len(leaderboard_df)} rows")
# Define column types for the dataframe
COLUMN_TYPES = ["markdown", "str", "number", "number", "number", "number", "number", "number", "number", "str"]
# Create Gradio interface
demo = gr.Blocks(css="""
.markdown-text {
font-size: 16px !important;
}
#citation-button {
font-family: monospace;
}
""")
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("📙 Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
lines=10,
max_lines=10,
interactive=False
)
# Main leaderboard table
gr.Markdown("## 🏆 SAGE Benchmark Results", elem_classes="markdown-text")
# Debug information
gr.Markdown(f"📊 **Showing {len(leaderboard_df)} results**")
leaderboard_table = gr.Dataframe(
value=leaderboard_df,
datatype=COLUMN_TYPES,
interactive=False,
wrap=True,
column_widths=["25%", "15%", "8%", "8%", "8%", "8%", "8%", "8%", "8%", "12%"]
)
# Refresh button
refresh_button = gr.Button("🔄 Refresh Leaderboard")
refresh_button.click(
refresh_leaderboard,
inputs=[],
outputs=[leaderboard_table]
)
# Submission section
with gr.Accordion("📊 Submit Your SAGE Results", open=False):
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Column():
org_textbox = gr.Textbox(label="Organization Name", placeholder="Your Organization")
email_textbox = gr.Textbox(label="Contact Email", placeholder="contact@example.com")
with gr.Column():
file_upload = gr.File(
label="Upload SAGE Results (JSON)",
file_types=[".json"],
type="filepath"
)
submit_button = gr.Button("Submit Results", variant="primary")
submission_result = gr.HTML()
# Simplified submission handling
submit_button.click(
lambda: format_warning("📋 Submission feature coming soon! For now, please contact administrators directly."),
inputs=[],
outputs=[submission_result]
)
# Launch the app
if __name__ == "__main__":
demo.launch(debug=True)