SAGE-Bench / src /populate.py
“pangjh3”
modified: app.py
85b2181
import json
import os
import pandas as pd
from typing import List
from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn
# Import SAGE-specific modules - avoid transformers dependency
process_sage_results_for_leaderboard = None
try:
# Import SAGE modules without triggering transformers dependency
import sys
import os
import json
from dataclasses import dataclass
from typing import Dict, List, Any
import numpy as np
# Copy SAGEResult class locally to avoid import issues
@dataclass
class SAGEResult:
submission_id: str
organization: str
email: str
tokens: str
accuracy: float
mg_pass_2: float
mg_pass_4: float
submitted_time: str
status: str = "EVALUATED"
def to_dict(self):
"""Converts the SAGE Result to a dict compatible with our dataframe display"""
# Extract model name from submission_id or use model_name directly
if hasattr(self, 'model_name'):
model_name = self.model_name
elif self.submission_id.startswith("oss_"):
# Extract model name from submission_id
model_name = self.submission_id.split("_", 2)[-1].replace("_", " ")
else:
model_name = self.submission_id
# Create display name
display_name = f"**{model_name}**"
model_symbol = "🤖"
# Format date to YYYY-MM-DD only
formatted_date = self.submitted_time
if isinstance(self.submitted_time, str):
# Try to parse and reformat date
try:
if 'T' in self.submitted_time:
# ISO format like "2025-09-09T14:37:23.616340"
formatted_date = self.submitted_time.split('T')[0]
else:
# Already in simple format
formatted_date = self.submitted_time.split(' ')[0]
except:
formatted_date = self.submitted_time
data_dict = {
"Model": display_name,
"Organization": self.organization,
"Accuracy (%)": round(self.accuracy, 2),
"mG-Pass@2 (%)": round(self.mg_pass_2, 2),
"mG-Pass@4 (%)": round(self.mg_pass_4, 2),
"Submission Date": formatted_date,
}
return data_dict
def load_initial_sage_results_from_oss() -> List[SAGEResult]:
"""Load initial SAGE results from OSS"""
sage_results = []
try:
# 导入OSS排行榜管理器
from src.oss.oss_leaderboard_manager import OSSLeaderboardManager
# 从OSS加载排行榜数据
leaderboard_manager = OSSLeaderboardManager()
initial_data = leaderboard_manager.load_leaderboard_from_oss()
if initial_data:
print(f"✅ 从OSS加载了 {len(initial_data)} 条排行榜记录")
for i, entry in enumerate(initial_data):
sage_result = SAGEResult(
submission_id=f"oss_{i:02d}_{entry['model_name'].replace(' ', '_').replace('-', '_')}",
organization=entry['organization'],
email=entry.get('contact_email', f"contact@{entry['organization'].lower().replace(' ', '')}.com"),
tokens=entry.get('tokens', 'N/A'),
accuracy=entry.get('accuracy', 0.0),
mg_pass_2=entry.get('mg_pass_2', 0.0),
mg_pass_4=entry.get('mg_pass_4', 0.0),
submitted_time=entry["submitted_time"],
status="EVALUATED"
)
# Add model_name as additional attribute for display
sage_result.model_name = entry['model_name']
sage_results.append(sage_result)
else:
print("⚠️ OSS中未找到排行榜数据")
except Exception as e:
print(f"❌ 从OSS加载排行榜失败: {e}")
return sage_results
def process_sage_results_for_leaderboard_oss() -> List[SAGEResult]:
"""Process all SAGE results from OSS"""
return load_initial_sage_results_from_oss()
# Set the function
process_sage_results_for_leaderboard = process_sage_results_for_leaderboard_oss
except ImportError as e:
print(f"Could not set up SAGE results processing: {e}")
process_sage_results_for_leaderboard = None
def get_sage_leaderboard_df() -> pd.DataFrame:
"""Creates a dataframe from SAGE evaluation results"""
if process_sage_results_for_leaderboard is None:
return pd.DataFrame()
# Get SAGE results
sage_results = process_sage_results_for_leaderboard()
all_data_json = [result.to_dict() for result in sage_results]
if not all_data_json:
return pd.DataFrame()
df = pd.DataFrame.from_records(all_data_json)
# Remove duplicates: for same Model+Organization, keep only the latest submission date
if "Model" in df.columns and "Organization" in df.columns and "Submission Date" in df.columns:
# Sort by Submission Date (descending) to get the latest first
df = df.sort_values(by=["Submission Date"], ascending=False)
# Drop duplicates, keeping first (latest) occurrence of each Model+Organization combination
df = df.drop_duplicates(subset=["Model", "Organization"], keep="first")
# Sort by accuracy (descending)
if "Accuracy (%)" in df.columns:
df = df.sort_values(by=["Accuracy (%)"], ascending=False)
# Round numeric columns
numeric_cols = ["Accuracy (%)", "mG-Pass@2 (%)", "mG-Pass@4 (%)"]
for col in numeric_cols:
if col in df.columns:
df[col] = df[col].round(2)
return df