Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import datetime | |
| import requests | |
| from email.utils import parseaddr | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from src.about import ( | |
| CITATION_BUTTON_LABEL, | |
| CITATION_BUTTON_TEXT, | |
| EVALUATION_QUEUE_TEXT, | |
| INTRODUCTION_TEXT, | |
| LLM_BENCHMARKS_TEXT, | |
| TITLE, | |
| ) | |
| from src.display.css_html_js import custom_css | |
| from src.display.utils import ( | |
| BENCHMARK_COLS, | |
| COLS, | |
| EVAL_COLS, | |
| EVAL_TYPES, | |
| AutoEvalColumn, | |
| ModelType, | |
| fields, | |
| WeightType, | |
| Precision | |
| ) | |
| # SAGE specific imports - use populate module to avoid transformers dependency | |
| try: | |
| from src.populate import process_sage_results_for_leaderboard, get_sage_leaderboard_df | |
| SAGE_MODULES_AVAILABLE = process_sage_results_for_leaderboard is not None | |
| if SAGE_MODULES_AVAILABLE: | |
| print("✅ SAGE modules loaded successfully") | |
| else: | |
| print("❌ SAGE modules not available") | |
| except ImportError as e: | |
| print(f"Warning: SAGE modules not available: {e}") | |
| SAGE_MODULES_AVAILABLE = False | |
| # Configuration | |
| TOKEN = os.environ.get("HF_TOKEN", None) | |
| OWNER = "opencompass" | |
| # OSS submission tracking paths | |
| SUBMISSION_TRACKING_PATH = "atlas_eval/submissions/user_tracking/" | |
| SUBMISSION_HISTORY_FILE = "submission_history.json" | |
| def format_error(msg): | |
| return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>" | |
| def format_warning(msg): | |
| return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>" | |
| def format_log(msg): | |
| return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>" | |
| def model_hyperlink(link, model_name): | |
| if link and link.startswith("http"): | |
| return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
| return model_name | |
| def load_submission_history(): | |
| """Load user submission history from OSS""" | |
| try: | |
| from src.oss.oss_file_manager import OSSFileManager | |
| oss_manager = OSSFileManager() | |
| # Try to download submission history file | |
| history_content = oss_manager.download_file_content( | |
| SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE | |
| ) | |
| if history_content: | |
| return json.loads(history_content) | |
| else: | |
| print("📝 Creating new submission history") | |
| return {} | |
| except Exception as e: | |
| print(f"⚠️ Failed to load submission history: {e}") | |
| return {} | |
| def save_submission_history(history): | |
| """Save user submission history to OSS""" | |
| try: | |
| from src.oss.oss_file_manager import OSSFileManager | |
| oss_manager = OSSFileManager() | |
| # Upload submission history | |
| history_json = json.dumps(history, indent=2, ensure_ascii=False) | |
| success = oss_manager.upload_file_content( | |
| content=history_json, | |
| object_key=SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE | |
| ) | |
| return success | |
| except Exception as e: | |
| print(f"❌ Failed to save submission history: {e}") | |
| return False | |
| def check_user_submission_eligibility(profile: gr.OAuthProfile, org_name: str): | |
| """Check user submission eligibility""" | |
| try: | |
| # 1. Check account age limit (60 days) | |
| user_data = requests.get(f"https://huggingface.co/api/users/{profile.username}/overview") | |
| if user_data.status_code == 200: | |
| creation_date = json.loads(user_data.content)["createdAt"] | |
| account_age = datetime.datetime.now() - datetime.datetime.strptime(creation_date, '%Y-%m-%dT%H:%M:%S.%fZ') | |
| if account_age < datetime.timedelta(days=60): | |
| return False, "This account does not meet the submission requirement. Account age must exceed 60 days." | |
| else: | |
| return False, "Unable to verify account information. Please try again later." | |
| # 2. Check daily submission limit | |
| submission_history = load_submission_history() | |
| user_submissions = submission_history.get(profile.username, []) | |
| today = datetime.datetime.today().strftime('%Y-%m-%d') | |
| today_submissions = [s for s in user_submissions if s.get("date", "") == today] | |
| if len(today_submissions) >= 2: | |
| return False, "You have already submitted twice today. Please try again tomorrow." | |
| return True, "Eligibility check passed" | |
| except Exception as e: | |
| print(f"❌ User eligibility check failed: {e}") | |
| return False, f"System check error, please try again later: {str(e)}" | |
| def record_user_submission(profile: gr.OAuthProfile, model_name: str, org_name: str, email: str): | |
| """Record user submission""" | |
| try: | |
| submission_history = load_submission_history() | |
| if profile.username not in submission_history: | |
| submission_history[profile.username] = [] | |
| # Record this submission | |
| submission_record = { | |
| "date": datetime.datetime.today().strftime('%Y-%m-%d'), | |
| "time": datetime.datetime.now().strftime('%H:%M:%S'), | |
| "model": model_name, | |
| "organization": org_name, | |
| "email": email, | |
| "username": profile.username | |
| } | |
| submission_history[profile.username].append(submission_record) | |
| # Save submission history | |
| return save_submission_history(submission_history) | |
| except Exception as e: | |
| print(f"❌ Failed to record submission history: {e}") | |
| return False | |
| def get_leaderboard_dataframe(): | |
| """Generate leaderboard dataframe from SAGE results""" | |
| print("🔄 Loading SAGE leaderboard data...") | |
| if not SAGE_MODULES_AVAILABLE: | |
| print("❌ SAGE modules not available") | |
| return pd.DataFrame() | |
| try: | |
| # Use the updated get_sage_leaderboard_df function | |
| df = get_sage_leaderboard_df() | |
| if df.empty: | |
| print("❌ No SAGE results found") | |
| return pd.DataFrame() | |
| print(f"✅ Generated dataframe with {len(df)} rows") | |
| return df | |
| except Exception as e: | |
| print(f"❌ Error generating leaderboard dataframe: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return pd.DataFrame() | |
| def refresh_leaderboard(): | |
| """Refresh the leaderboard data""" | |
| print("🔄 Refreshing leaderboard data...") | |
| return get_leaderboard_dataframe() | |
| # Initialize data | |
| print("🚀 Initializing SAGE-Bench leaderboard...") | |
| leaderboard_df = get_leaderboard_dataframe() | |
| print(f"📈 Leaderboard initialized with {len(leaderboard_df)} rows") | |
| # Define column types for the dataframe (Model, Organization, Accuracy, mG-Pass@2, mG-Pass@4, Submission Date) | |
| COLUMN_TYPES = ["markdown", "str", "number", "number", "number", "str"] | |
| # Create Gradio interface | |
| demo = gr.Blocks(css=""" | |
| .markdown-text { | |
| font-size: 16px !important; | |
| } | |
| #citation-button { | |
| font-family: monospace; | |
| } | |
| """) | |
| with demo: | |
| gr.HTML(TITLE) | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Accordion("📙 Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id="citation-button", | |
| lines=10, | |
| max_lines=10, | |
| interactive=False | |
| ) | |
| # Main leaderboard table | |
| gr.Markdown("## 🏆 SAGE Benchmark Results", elem_classes="markdown-text") | |
| # Debug information - dynamic component | |
| results_count = gr.Markdown(f"📊 **Showing {len(leaderboard_df)} results**") | |
| leaderboard_table = gr.Dataframe( | |
| value=leaderboard_df, | |
| datatype=COLUMN_TYPES, | |
| interactive=False, | |
| wrap=True, | |
| column_widths=["30%", "20%", "12%", "12%", "12%", "14%"] | |
| ) | |
| # Refresh button | |
| refresh_button = gr.Button("🔄 Refresh Leaderboard") | |
| def refresh_leaderboard_with_count(): | |
| """Refresh leaderboard and update count display""" | |
| df = refresh_leaderboard() | |
| count_text = f"📊 **Showing {len(df)} results**" | |
| return df, count_text | |
| refresh_button.click( | |
| refresh_leaderboard_with_count, | |
| inputs=[], | |
| outputs=[leaderboard_table, results_count] | |
| ) | |
| # Submission section | |
| with gr.Accordion("📊 Submit Your SAGE Results", open=False): | |
| gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") | |
| # 添加提交说明(登录要求暂时注释) | |
| gr.Markdown(""" | |
| ### 📋 Submission Requirements | |
| <!-- | |
| - Login required: You must log in with a Hugging Face account | |
| - Account age: Account must be older than 60 days | |
| - Submission frequency: Each user can submit up to 2 times per day | |
| --> | |
| - File format: Upload a JSON file in the SAGE format | |
| - Organization: Provide the exact organization name (shown on the leaderboard) | |
| - Contact email: Provide a valid email for notifications | |
| - Auto evaluation: After submission, the system will run LLM-based evaluation and update the leaderboard | |
| <!-- | |
| ### 🔐 Security Policy | |
| To prevent spam and ensure evaluation quality, we enforce: | |
| - New accounts must wait 60 days before submitting (prevents abuse) | |
| - Daily submission limits to ensure leaderboard quality and system stability | |
| - Duplicate checks to avoid multiple submissions for the same organization | |
| --> | |
| """, elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_textbox = gr.Textbox( | |
| label="Model Name - will be shown on the leaderboard", | |
| placeholder="Your Model Name (e.g., GPT-4, Llama-2-70B)" | |
| ) | |
| org_textbox = gr.Textbox( | |
| label="Organization Name - will be shown on the leaderboard", | |
| placeholder="Your Organization" | |
| ) | |
| email_textbox = gr.Textbox( | |
| label="Contact Email - used for contact, not publicly visible", | |
| placeholder="contact@example.com" | |
| ) | |
| with gr.Column(): | |
| file_upload = gr.File( | |
| label="Upload SAGE Results (JSON)", | |
| file_types=[".json"], | |
| type="filepath" | |
| ) | |
| # 提交按钮 (登录功能暂时注释) | |
| with gr.Row(): | |
| login_button = gr.LoginButton("🔐 Login with HuggingFace", size="lg") | |
| submit_button = gr.Button("Submit Results", variant="primary", size="lg") | |
| # 登录状态与用户信息 | |
| profile_state = gr.State() | |
| login_status = gr.Markdown(visible=True) | |
| # def on_login(profile: gr.OAuthProfile): | |
| # try: | |
| # if profile and getattr(profile, "name", None): | |
| # name = profile.name | |
| # text = f"✅ Logged in as: **{name}**" | |
| # else: | |
| # text = "❌ Login failed, please try again" | |
| # return profile, text | |
| # except Exception: | |
| # return None, "❌ Login failed, please try again" | |
| # login_button.click(on_login, inputs=None, outputs=[profile_state, login_status]) | |
| # 进度显示和结果显示区域 | |
| progress_info = gr.HTML() | |
| submission_result = gr.HTML() | |
| def show_progress(step, message, total_steps=4): | |
| """Show progress information""" | |
| progress_percentage = int((step / total_steps) * 100) | |
| progress_html = f""" | |
| <div style="background-color: #e7f3ff; border: 1px solid #4dabf7; border-radius: 5px; padding: 15px; margin: 10px 0;"> | |
| <div style="display: flex; align-items: center; margin-bottom: 10px;"> | |
| <h4 style="color: #1971c2; margin: 0; flex-grow: 1;">⏳ Processing submission...</h4> | |
| <span style="color: #1971c2; font-weight: bold;">{progress_percentage}%</span> | |
| </div> | |
| <p style="color: #1971c2; margin: 5px 0;"><strong>Step {step}/{total_steps}:</strong> {message}</p> | |
| <div style="background-color: #fff; border-radius: 10px; height: 20px; margin: 10px 0; border: 1px solid #dee2e6;"> | |
| <div style="background: linear-gradient(90deg, #4dabf7, #74c0fc); height: 100%; width: {progress_percentage}%; border-radius: 10px; transition: width 0.5s ease; display: flex; align-items: center; justify-content: center;"> | |
| {f'<span style="color: white; font-size: 12px; font-weight: bold;">{progress_percentage}%</span>' if progress_percentage > 20 else ''} | |
| </div> | |
| </div> | |
| <p style="color: #495057; font-size: 14px; margin: 5px 0;"> | |
| {'✨ Almost done, please wait...' if step >= total_steps else '📤 Please wait, processing your submission...'} | |
| </p> | |
| </div> | |
| """ | |
| return progress_html | |
| def handle_submission(file_upload, model_name, org_name, email, user_profile: gr.OAuthProfile): | |
| try: | |
| # 步骤1: 基本验证 | |
| yield show_progress(1, "Validating submission info"), "" | |
| # 校验登录 | |
| if user_profile is None or getattr(user_profile, "name", None) is None: | |
| yield "", format_error("Please log in with Hugging Face before submitting") | |
| return | |
| print(f"user_profile: {user_profile}") | |
| print(f"user_profile.name: {user_profile.name}") | |
| if not file_upload: | |
| yield "", format_error("Please select a file to upload") | |
| return | |
| if not model_name or not model_name.strip(): | |
| yield "", format_error("Please enter model name") | |
| return | |
| if not org_name or not org_name.strip(): | |
| yield "", format_error("Please enter organization name") | |
| return | |
| if not email or not email.strip(): | |
| yield "", format_error("Please enter email address") | |
| return | |
| # 验证邮箱格式 | |
| _, parsed_email = parseaddr(email) | |
| if "@" not in parsed_email: | |
| yield "", format_warning("Please provide a valid email address") | |
| return | |
| # 步骤2: 文件验证和读取 | |
| yield show_progress(2, "Validating file format and content"), "" | |
| import time | |
| time.sleep(0.5) # allow users to see progress update | |
| # 用户资格检查(账号年龄/频率/重复提交) | |
| eligible, msg = check_user_submission_eligibility(user_profile, org_name) | |
| if not eligible: | |
| yield "", format_error(msg) | |
| return | |
| # 步骤3: 上传到OSS | |
| yield show_progress(3, "Uploading file to OSS storage"), "" | |
| # 处理文件提交 | |
| from src.submission.submit import process_sage_submission_simple | |
| result = process_sage_submission_simple(file_upload, model_name, org_name, email) | |
| # 步骤4: 完成 | |
| yield show_progress(4, "Submission completed, preparing evaluation"), "" | |
| time.sleep(0.5) # allow users to see completion state | |
| # 记录提交历史 | |
| try: | |
| record_user_submission(user_profile, model_name, org_name, email) | |
| except Exception: | |
| pass | |
| # 生成成功信息 | |
| success_info = f""" | |
| <div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 5px; padding: 15px; margin: 10px 0;"> | |
| <h4 style="color: #155724; margin-top: 0;">🎉 Submission successful!</h4> | |
| <p style="color: #155724; margin: 5px 0;"><strong>Model:</strong> {model_name}</p> | |
| <p style="color: #155724; margin: 5px 0;"><strong>Organization:</strong> {org_name}</p> | |
| <p style="color: #155724; margin: 5px 0;"><strong>Email:</strong> {email}</p> | |
| <p style="color: #155724; margin: 5px 0;"><strong>Submitted at:</strong> {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p> | |
| <p style="color: #155724; margin-bottom: 0;">Your results have been submitted via OSS. LLM evaluation will complete in 5-10 minutes and the leaderboard will be updated.</p> | |
| </div> | |
| """ | |
| # 清除进度条,显示最终结果 | |
| yield "", success_info + result | |
| except ImportError as e: | |
| yield "", format_error(f"Submission system modules unavailable: {e}") | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| yield "", format_error(f"An error occurred during submission: {str(e)}") | |
| submit_button.click( | |
| handle_submission, | |
| inputs=[file_upload, model_textbox, org_textbox, email_textbox], # profile_state | |
| outputs=[progress_info, submission_result] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| # Disable SSR mode for better OAuth compatibility | |
| # Note: OAuth is handled internally via gr.LoginButton, not at launch level | |
| demo.launch(ssr_mode=False) |