Spaces:
Sleeping
Sleeping
File size: 6,173 Bytes
f9e337d dbc2e78 f9e337d b1a0fd2 f9e337d ca09cf3 2086543 ca09cf3 326dd8e ca09cf3 326dd8e ca09cf3 326dd8e ca09cf3 326dd8e ca09cf3 85b2181 ca09cf3 326dd8e 85b2181 ca09cf3 b1a0fd2 7844386 b1a0fd2 7844386 326dd8e 7844386 326dd8e 7844386 326dd8e 7844386 b1a0fd2 7844386 b1a0fd2 ca09cf3 b1a0fd2 ca09cf3 b1a0fd2 ca09cf3 2086543 f9e337d 326dd8e 2086543 85b2181 326dd8e b1a0fd2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import json
import os
import pandas as pd
from typing import List
from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn
# Import SAGE-specific modules - avoid transformers dependency
process_sage_results_for_leaderboard = None
try:
# Import SAGE modules without triggering transformers dependency
import sys
import os
import json
from dataclasses import dataclass
from typing import Dict, List, Any
import numpy as np
# Copy SAGEResult class locally to avoid import issues
@dataclass
class SAGEResult:
submission_id: str
organization: str
email: str
tokens: str
accuracy: float
mg_pass_2: float
mg_pass_4: float
submitted_time: str
status: str = "EVALUATED"
def to_dict(self):
"""Converts the SAGE Result to a dict compatible with our dataframe display"""
# Extract model name from submission_id or use model_name directly
if hasattr(self, 'model_name'):
model_name = self.model_name
elif self.submission_id.startswith("oss_"):
# Extract model name from submission_id
model_name = self.submission_id.split("_", 2)[-1].replace("_", " ")
else:
model_name = self.submission_id
# Create display name
display_name = f"**{model_name}**"
model_symbol = "🤖"
# Format date to YYYY-MM-DD only
formatted_date = self.submitted_time
if isinstance(self.submitted_time, str):
# Try to parse and reformat date
try:
if 'T' in self.submitted_time:
# ISO format like "2025-09-09T14:37:23.616340"
formatted_date = self.submitted_time.split('T')[0]
else:
# Already in simple format
formatted_date = self.submitted_time.split(' ')[0]
except:
formatted_date = self.submitted_time
data_dict = {
"Model": display_name,
"Organization": self.organization,
"Accuracy (%)": round(self.accuracy, 2),
"mG-Pass@2 (%)": round(self.mg_pass_2, 2),
"mG-Pass@4 (%)": round(self.mg_pass_4, 2),
"Submission Date": formatted_date,
}
return data_dict
def load_initial_sage_results_from_oss() -> List[SAGEResult]:
"""Load initial SAGE results from OSS"""
sage_results = []
try:
# 导入OSS排行榜管理器
from src.oss.oss_leaderboard_manager import OSSLeaderboardManager
# 从OSS加载排行榜数据
leaderboard_manager = OSSLeaderboardManager()
initial_data = leaderboard_manager.load_leaderboard_from_oss()
if initial_data:
print(f"✅ 从OSS加载了 {len(initial_data)} 条排行榜记录")
for i, entry in enumerate(initial_data):
sage_result = SAGEResult(
submission_id=f"oss_{i:02d}_{entry['model_name'].replace(' ', '_').replace('-', '_')}",
organization=entry['organization'],
email=entry.get('contact_email', f"contact@{entry['organization'].lower().replace(' ', '')}.com"),
tokens=entry.get('tokens', 'N/A'),
accuracy=entry.get('accuracy', 0.0),
mg_pass_2=entry.get('mg_pass_2', 0.0),
mg_pass_4=entry.get('mg_pass_4', 0.0),
submitted_time=entry["submitted_time"],
status="EVALUATED"
)
# Add model_name as additional attribute for display
sage_result.model_name = entry['model_name']
sage_results.append(sage_result)
else:
print("⚠️ OSS中未找到排行榜数据")
except Exception as e:
print(f"❌ 从OSS加载排行榜失败: {e}")
return sage_results
def process_sage_results_for_leaderboard_oss() -> List[SAGEResult]:
"""Process all SAGE results from OSS"""
return load_initial_sage_results_from_oss()
# Set the function
process_sage_results_for_leaderboard = process_sage_results_for_leaderboard_oss
except ImportError as e:
print(f"Could not set up SAGE results processing: {e}")
process_sage_results_for_leaderboard = None
def get_sage_leaderboard_df() -> pd.DataFrame:
"""Creates a dataframe from SAGE evaluation results"""
if process_sage_results_for_leaderboard is None:
return pd.DataFrame()
# Get SAGE results
sage_results = process_sage_results_for_leaderboard()
all_data_json = [result.to_dict() for result in sage_results]
if not all_data_json:
return pd.DataFrame()
df = pd.DataFrame.from_records(all_data_json)
# Remove duplicates: for same Model+Organization, keep only the latest submission date
if "Model" in df.columns and "Organization" in df.columns and "Submission Date" in df.columns:
# Sort by Submission Date (descending) to get the latest first
df = df.sort_values(by=["Submission Date"], ascending=False)
# Drop duplicates, keeping first (latest) occurrence of each Model+Organization combination
df = df.drop_duplicates(subset=["Model", "Organization"], keep="first")
# Sort by accuracy (descending)
if "Accuracy (%)" in df.columns:
df = df.sort_values(by=["Accuracy (%)"], ascending=False)
# Round numeric columns
numeric_cols = ["Accuracy (%)", "mG-Pass@2 (%)", "mG-Pass@4 (%)"]
for col in numeric_cols:
if col in df.columns:
df[col] = df[col].round(2)
return df |