SAGE-Bench

Sleeping

App Files Files Community

sudanl commited on Sep 6

Commit

1232cb8

1 Parent(s): c1060a1

feat: Simplify SAGE-Bench for OSS integration - remove complex validation, add OSS submission

Browse files

Files changed (8) hide show

README.md +1 -2
app.py +58 -9
initial_sage_results.json +85 -1
reference_answers.json +0 -44
src/submission/check_validity.py +0 -99
src/submission/sage_submit.py +0 -221
src/submission/simple_submit.py +37 -185
src/submission/submit.py +0 -119

README.md CHANGED Viewed

@@ -61,5 +61,4 @@ Submit your evaluation results as JSON files with the following format:
 - `src/about.py` - SAGE-specific task definitions and content
 - `src/leaderboard/sage_eval.py` - SAGE evaluation logic and result processing
 - `src/submission/sage_submit.py` - Simplified submission processing
-- `initial_sage_results.json` - Benchmark results from major models
-- `reference_answers.json` - Reference data for evaluation

 - `src/about.py` - SAGE-specific task definitions and content
 - `src/leaderboard/sage_eval.py` - SAGE evaluation logic and result processing
 - `src/submission/sage_submit.py` - Simplified submission processing
+- `initial_sage_results.json` - Benchmark results from major models

app.py CHANGED Viewed

@@ -177,27 +177,76 @@ with demo:
     with gr.Accordion("📊 Submit Your SAGE Results", open=False):
         gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
         with gr.Row():
             with gr.Column():
-                org_textbox = gr.Textbox(label="Organization Name", placeholder="Your Organization")
-                email_textbox = gr.Textbox(label="Contact Email", placeholder="contact@example.com")
             with gr.Column():
                 file_upload = gr.File(
                     label="Upload SAGE Results (JSON)",
                     file_types=[".json"],
-                    type="filepath"
                 )
-        submit_button = gr.Button("Submit Results", variant="primary")
         submission_result = gr.HTML()
-        # File collection submission handling
-        def handle_submission(file_upload, org_name, email):
             try:
                 from src.submission.simple_submit import process_sage_submission_simple
-                return process_sage_submission_simple(file_upload, org_name, email)
-            except ImportError:
-                return format_error("❌ 提交系统暂时不可用，请稍后再试。")
         submit_button.click(
             handle_submission,

     with gr.Accordion("📊 Submit Your SAGE Results", open=False):
         gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+        # 添加提交说明
+        gr.Markdown("""
+### 📋 提交要求
+- **HuggingFace账户**: 必须登录HuggingFace账户
+- **账户年龄**: 账户创建时间需超过60天
+- **提交限制**: 每个账户每天只能提交一次
+- **组织唯一**: 每个组织只能提交一次结果
+        """, elem_classes="markdown-text")
         with gr.Row():
             with gr.Column():
+                org_textbox = gr.Textbox(
+                    label="Organization Name",
+                    placeholder="Your Organization",
+                    info="组织名称将显示在排行榜上"
+                )
+                email_textbox = gr.Textbox(
+                    label="Contact Email",
+                    placeholder="contact@example.com",
+                    info="邮箱仅用于联系，不会公开显示"
+                )
             with gr.Column():
                 file_upload = gr.File(
                     label="Upload SAGE Results (JSON)",
                     file_types=[".json"],
+                    type="filepath",
+                    info="上传符合SAGE格式的JSON结果文件"
                 )
+        # HuggingFace登录按钮
+        with gr.Row():
+            gr.LoginButton(value="🔐 Login with HuggingFace")
+            submit_button = gr.Button("Submit Results", variant="primary")
         submission_result = gr.HTML()
+        # Simplified submission handling via OSS
+        def handle_submission(file_upload, org_name, email, profile: gr.OAuthProfile):
             try:
+                # 基本验证
+                if not file_upload:
+                    return format_error("请选择要上传的文件")
+                if not org_name or not org_name.strip():
+                    return format_error("请输入组织名称")
+                if not email or not email.strip():
+                    return format_error("请输入邮箱地址")
+                if not profile:
+                    return format_error("请先登录HuggingFace账户")
+                # 处理文件提交 (通过OSS)
                 from src.submission.simple_submit import process_sage_submission_simple
+                result = process_sage_submission_simple(file_upload, org_name, email)
+                # 添加成功提交的额外信息
+                success_info = f"""
+<div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 5px; padding: 15px; margin: 10px 0;">
+    <h4 style="color: #155724; margin-top: 0;">🎉 提交成功!</h4>
+    <p style="color: #155724; margin: 5px 0;"><strong>组织:</strong> {org_name}</p>
+    <p style="color: #155724; margin: 5px 0;"><strong>用户:</strong> {profile.username if profile else 'Unknown'}</p>
+    <p style="color: #155724; margin: 5px 0;"><strong>提交时间:</strong> {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
+    <p style="color: #155724; margin-bottom: 0;">您的结果已通过OSS提交，将在5-10分钟内完成评测并更新排行榜。</p>
+</div>
+                """
+                return success_info + result
+            except ImportError as e:
+                return format_error(f"提交系统模块不可用: {e}")
+            except Exception as e:
+                return format_error(f"提交过程中出现错误: {e}")
         submit_button.click(
             handle_submission,

initial_sage_results.json CHANGED Viewed

@@ -1,4 +1,25 @@
 [
   {
     "model_name": "OpenAI GPT-5-High",
     "organization": "OpenAI",
@@ -232,5 +253,68 @@
       "sage_earth_science": 9.2,
       "sage_astronomy": 9.2
     }
   }
-]

 [
+  {
+    "model_name": "Unknown",
+    "organization": "Unknown",
+    "tokens": "User Submission",
+    "accuracy": 100.0,
+    "mg_pass_2": 100.0,
+    "mg_pass_4": 100.0,
+    "submitted_time": "2025-09-05",
+    "results": {
+      "sage_mathematics": 100.0,
+      "sage_physics": 100.0,
+      "sage_chemistry": 100.0,
+      "sage_biology": 100.0,
+      "sage_earth_science": 0.0,
+      "sage_astronomy": 0.0,
+      "sage_overall": 100.0
+    },
+    "contact_email": "",
+    "evaluation_timestamp": "2025-09-05T16:14:32.476871",
+    "result_file": "results/result_Demo_Test_Org_20250905_161432.json"
+  },
   {
     "model_name": "OpenAI GPT-5-High",
     "organization": "OpenAI",
       "sage_earth_science": 9.2,
       "sage_astronomy": 9.2
     }
+  },
+  {
+    "model_name": "QuickDemo_TestOrg",
+    "organization": "QuickDemo_TestOrg",
+    "tokens": "User Submission (Simulated)",
+    "accuracy": 0.619,
+    "mg_pass_2": 0.619,
+    "mg_pass_4": 0.619,
+    "submitted_time": "2025-09-05",
+    "results": {
+      "sage_mathematics": 0.877,
+      "sage_physics": 0.895,
+      "sage_chemistry": 0.756,
+      "sage_biology": 0.316,
+      "sage_earth_science": 0.312,
+      "sage_astronomy": 0.56,
+      "sage_overall": 0.619
+    },
+    "contact_email": "test@demo.com",
+    "evaluation_timestamp": "2025-09-05T16:19:39.864071",
+    "result_file": "results/simulated_result_QuickDemo_TestOrg_20250905_161939.json"
+  },
+  {
+    "model_name": "QuickDemo_HighAccuracy",
+    "organization": "QuickDemo_HighAccuracy",
+    "tokens": "User Submission (Simulated)",
+    "accuracy": 0.598,
+    "mg_pass_2": 0.598,
+    "mg_pass_4": 0.598,
+    "submitted_time": "2025-09-05",
+    "results": {
+      "sage_mathematics": 0.88,
+      "sage_physics": 0.331,
+      "sage_chemistry": 0.646,
+      "sage_biology": 0.501,
+      "sage_earth_science": 0.818,
+      "sage_astronomy": 0.415,
+      "sage_overall": 0.598
+    },
+    "contact_email": "high@demo.com",
+    "evaluation_timestamp": "2025-09-05T16:19:43.874748",
+    "result_file": "results/simulated_result_QuickDemo_HighAccuracy_20250905_161943.json"
+  },
+  {
+    "model_name": "QuickDemo_MediumAccuracy",
+    "organization": "QuickDemo_MediumAccuracy",
+    "tokens": "User Submission (Simulated)",
+    "accuracy": 0.516,
+    "mg_pass_2": 0.516,
+    "mg_pass_4": 0.516,
+    "submitted_time": "2025-09-05",
+    "results": {
+      "sage_mathematics": 0.474,
+      "sage_physics": 0.518,
+      "sage_chemistry": 0.674,
+      "sage_biology": 0.638,
+      "sage_earth_science": 0.318,
+      "sage_astronomy": 0.473,
+      "sage_overall": 0.516
+    },
+    "contact_email": "medium@demo.com",
+    "evaluation_timestamp": "2025-09-05T16:19:41.868409",
+    "result_file": "results/simulated_result_QuickDemo_MediumAccuracy_20250905_161941.json"
   }
+]

reference_answers.json DELETED Viewed

@@ -1,44 +0,0 @@
-{
-  "reference_answers": [
-    {
-      "question_id": 0,
-      "domain": "mathematics",
-      "question": "What is 6 multiplied by 7?",
-      "correct_answer": "42",
-      "alternative_answers": ["42", "forty-two", "6×7", "6*7"],
-      "explanation": "The multiplication of 6 and 7 equals 42."
-    },
-    {
-      "question_id": 1,
-      "domain": "chemistry",
-      "question": "What is the chemical formula for water?",
-      "correct_answer": "H2O",
-      "alternative_answers": ["H2O", "water", "dihydrogen monoxide"],
-      "explanation": "Water consists of two hydrogen atoms and one oxygen atom."
-    },
-    {
-      "question_id": 2,
-      "domain": "biology",
-      "question": "What molecule carries genetic information in living organisms?",
-      "correct_answer": "DNA",
-      "alternative_answers": ["DNA", "deoxyribonucleic acid", "genetic material"],
-      "explanation": "DNA stores and transmits genetic information in all living organisms."
-    },
-    {
-      "question_id": 3,
-      "domain": "physics",
-      "question": "What is the acceleration due to gravity on Earth?",
-      "correct_answer": "9.8 m/s²",
-      "alternative_answers": ["9.8 m/s²", "9.81 m/s²", "9.8", "9.81"],
-      "explanation": "Earth's gravitational acceleration is approximately 9.8 meters per second squared."
-    },
-    {
-      "question_id": 4,
-      "domain": "biology",
-      "question": "What is the process by which plants convert sunlight into energy?",
-      "correct_answer": "photosynthesis",
-      "alternative_answers": ["photosynthesis", "6CO2 + 6H2O + light → C6H12O6 + 6O2"],
-      "explanation": "Photosynthesis converts light energy into chemical energy in plants."
-    }
-  ]
-}

src/submission/check_validity.py DELETED Viewed

@@ -1,99 +0,0 @@
-import json
-import os
-import re
-from collections import defaultdict
-from datetime import datetime, timedelta, timezone
-import huggingface_hub
-from huggingface_hub import ModelCard
-from huggingface_hub.hf_api import ModelInfo
-from transformers import AutoConfig
-from transformers.models.auto.tokenization_auto import AutoTokenizer
-def check_model_card(repo_id: str) -> tuple[bool, str]:
-    """Checks if the model card and license exist and have been filled"""
-    try:
-        card = ModelCard.load(repo_id)
-    except huggingface_hub.utils.EntryNotFoundError:
-        return False, "Please add a model card to your model to explain how you trained/fine-tuned it."
-    # Enforce license metadata
-    if card.data.license is None:
-        if not ("license_name" in card.data and "license_link" in card.data):
-            return False, (
-                "License not found. Please add a license to your model card using the `license` metadata or a"
-                " `license_name`/`license_link` pair."
-            )
-    # Enforce card content
-    if len(card.text) < 200:
-        return False, "Please add a description to your model card, it is too short."
-    return True, ""
-def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
-    """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
-    try:
-        config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
-        if test_tokenizer:
-            try:
-                tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
-            except ValueError as e:
-                return (
-                    False,
-                    f"uses a tokenizer which is not in a transformers release: {e}",
-                    None
-                )
-            except Exception as e:
-                return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
-        return True, None, config
-    except ValueError:
-        return (
-            False,
-            "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
-            None
-        )
-    except Exception as e:
-        return False, "was not found on hub!", None
-def get_model_size(model_info: ModelInfo, precision: str):
-    """Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
-    try:
-        model_size = round(model_info.safetensors["total"] / 1e9, 3)
-    except (AttributeError, TypeError):
-        return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
-    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
-    model_size = size_factor * model_size
-    return model_size
-def get_model_arch(model_info: ModelInfo):
-    """Gets the model architecture from the configuration"""
-    return model_info.config.get("architectures", "Unknown")
-def already_submitted_models(requested_models_dir: str) -> set[str]:
-    """Gather a list of already submitted models to avoid duplicates"""
-    depth = 1
-    file_names = []
-    users_to_submission_dates = defaultdict(list)
-    for root, _, files in os.walk(requested_models_dir):
-        current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
-        if current_depth == depth:
-            for file in files:
-                if not file.endswith(".json"):
-                    continue
-                with open(os.path.join(root, file), "r") as f:
-                    info = json.load(f)
-                    file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
-                    # Select organisation
-                    if info["model"].count("/") == 0 or "submitted_time" not in info:
-                        continue
-                    organisation, _ = info["model"].split("/")
-                    users_to_submission_dates[organisation].append(info["submitted_time"])
-    return set(file_names), users_to_submission_dates

src/submission/sage_submit.py DELETED Viewed

@@ -1,221 +0,0 @@
-import json
-import os
-from datetime import datetime, timezone
-from typing import Dict, List, Any
-from src.display.formatting import styled_error, styled_message, styled_warning
-def validate_sage_submission(submission_data: Dict[str, Any]) -> tuple[bool, str]:
-    """Validates SAGE benchmark submission format"""
-    # Check required top-level fields
-    required_fields = ["submission_org", "submission_email", "predictions"]
-    for field in required_fields:
-        if field not in submission_data:
-            return False, f"Missing required field: {field}"
-    # Validate email format (basic)
-    email = submission_data["submission_email"]
-    if "@" not in email or "." not in email:
-        return False, "Invalid email format"
-    # Validate predictions
-    predictions = submission_data["predictions"]
-    if not isinstance(predictions, list) or len(predictions) == 0:
-        return False, "Predictions must be a non-empty list"
-    for i, prediction in enumerate(predictions):
-        # Check required prediction fields
-        pred_required_fields = ["original_question_id", "content", "reasoning_content"]
-        for field in pred_required_fields:
-            if field not in prediction:
-                return False, f"Missing field '{field}' in prediction {i}"
-        # Validate content arrays
-        content = prediction["content"]
-        reasoning_content = prediction["reasoning_content"]
-        if not isinstance(content, list) or len(content) != 4:
-            return False, f"Content in prediction {i} must be a list of exactly 4 items"
-        if not isinstance(reasoning_content, list) or len(reasoning_content) != 4:
-            return False, f"Reasoning content in prediction {i} must be a list of exactly 4 items"
-        # Validate question ID
-        if not isinstance(prediction["original_question_id"], int):
-            return False, f"Question ID in prediction {i} must be an integer"
-    return True, "Valid submission format"
-def process_sage_submission_simple(submission_file, org_name=None, email=None) -> str:
-    """Process SAGE benchmark submission file - simplified version for basic leaderboard"""
-    try:
-        # Read the submitted file (receives file path)
-        if submission_file is None:
-            return styled_error("No file uploaded. Please select a JSON file.")
-        # submission_file is a file path string
-        try:
-            with open(submission_file, 'r', encoding='utf-8') as f:
-                content = f.read()
-        except Exception as e:
-            return styled_error(f"Error reading file: {str(e)}")
-        # Parse JSON
-        try:
-            submission_data = json.loads(content)
-        except json.JSONDecodeError as e:
-            return styled_error(f"Invalid JSON format: {str(e)}")
-        # Use form inputs if submission data doesn't contain org/email
-        if org_name and email:
-            submission_data["submission_org"] = org_name
-            submission_data["submission_email"] = email
-        # Validate submission format
-        is_valid, message = validate_sage_submission(submission_data)
-        if not is_valid:
-            return styled_error(f"Submission validation failed: {message}")
-        # Save submission for later processing
-        current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-        org = submission_data["submission_org"].replace(" ", "_").replace("/", "_")
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        # Save raw submission
-        submission_dir = f"./sage_submissions/{org}"
-        os.makedirs(submission_dir, exist_ok=True)
-        raw_submission_path = f"{submission_dir}/submission_{timestamp}.json"
-        with open(raw_submission_path, 'w') as f:
-            json.dump(submission_data, f, indent=2)
-        # Simple evaluation using the evaluation module
-        try:
-            from src.leaderboard.sage_eval import evaluate_sage_submission
-            domain_accuracies = evaluate_sage_submission(submission_data)
-            # Update initial_sage_results.json directly for persistence
-            # Try multiple possible paths for the initial results file
-            possible_paths = [
-                "./initial_sage_results.json",
-                "initial_sage_results.json",
-                os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "initial_sage_results.json")
-            ]
-            initial_results_file = None
-            for path in possible_paths:
-                if os.path.exists(path):
-                    initial_results_file = path
-                    break
-            if not initial_results_file:
-                initial_results_file = possible_paths[0]  # Use first path as fallback
-            try:
-                # Load existing initial results
-                if os.path.exists(initial_results_file):
-                    with open(initial_results_file, 'r') as f:
-                        initial_results = json.load(f)
-                else:
-                    initial_results = []
-                # Convert to initial results format
-                new_result = {
-                    "model_name": submission_data["submission_org"],
-                    "organization": submission_data["submission_org"],
-                    "tokens": "User Submission",
-                    "accuracy": domain_accuracies["sage_overall"],
-                    "mg_pass_2": domain_accuracies["sage_overall"],  # Use same value for now
-                    "mg_pass_4": domain_accuracies["sage_overall"],  # Use same value for now
-                    "submitted_time": datetime.now().strftime("%Y-%m-%d"),
-                    "results": domain_accuracies,
-                    "contact_email": submission_data["submission_email"]
-                }
-                # Check if organization already exists, update or add
-                org_name = submission_data["submission_org"]
-                updated = False
-                for i, result in enumerate(initial_results):
-                    if (result.get("model_name") == org_name or
-                        result.get("organization") == org_name):
-                        initial_results[i] = new_result
-                        updated = True
-                        break
-                if not updated:
-                    initial_results.append(new_result)
-                # Save updated initial results
-                with open(initial_results_file, 'w') as f:
-                    json.dump(initial_results, f, indent=2)
-                print(f"✅ Updated {initial_results_file} with new submission from {org_name}")
-            except Exception as e:
-                print(f"⚠️ Failed to update initial results file: {e}")
-            # Format success message with scores
-            overall_accuracy = domain_accuracies.get("sage_overall", 0)
-            success_msg = styled_message(
-                f"🎉 SAGE submission processed successfully!\n\n"
-                f"**Organization:** {submission_data['submission_org']}\n"
-                f"**Overall Accuracy:** {overall_accuracy:.2f}%\n\n"
-                f"**Domain Scores:**\n"
-                f"  • Mathematics: {domain_accuracies.get('sage_math', 0):.2f}%\n"
-                f"  • Physics: {domain_accuracies.get('sage_physics', 0):.2f}%\n"
-                f"  • Chemistry: {domain_accuracies.get('sage_chemistry', 0):.2f}%\n"
-                f"  • Biology: {domain_accuracies.get('sage_biology', 0):.2f}%\n"
-                f"  • Earth Science: {domain_accuracies.get('sage_earth_science', 0):.2f}%\n"
-                f"  • Astronomy: {domain_accuracies.get('sage_astronomy', 0):.2f}%\n\n"
-                f"Your results have been added to the leaderboard. "
-                f"Please refresh the page to see updated rankings."
-            )
-            return success_msg
-        except Exception as eval_error:
-            # If evaluation fails, still save submission but mark as failed
-            return styled_warning(
-                f"⚠️ Submission received but evaluation failed.\n\n"
-                f"Error: {str(eval_error)}\n\n"
-                f"Your submission has been saved and will be processed manually. "
-                f"Please contact administrators if this issue persists."
-            )
-    except Exception as e:
-        return styled_error(f"Submission processing failed: {str(e)}")
-def load_sage_submissions(submissions_dir: str = "./sage_submissions") -> List[Dict]:
-    """Load all SAGE submissions for display in queue"""
-    if not os.path.exists(submissions_dir):
-        return []
-    submissions = []
-    for org_dir in os.listdir(submissions_dir):
-        org_path = os.path.join(submissions_dir, org_dir)
-        if not os.path.isdir(org_path):
-            continue
-        for file in os.listdir(org_path):
-            if file.startswith("submission_") and file.endswith(".json"):
-                try:
-                    with open(os.path.join(org_path, file), 'r') as f:
-                        submission = json.load(f)
-                        # Add metadata
-                        submission["_filename"] = file
-                        submission["_org_dir"] = org_dir
-                        submissions.append(submission)
-                except Exception:
-                    continue
-    # Sort by submission time (most recent first)
-    submissions.sort(key=lambda x: x.get("_filename", ""), reverse=True)
-    return submissions

src/submission/simple_submit.py CHANGED Viewed

@@ -1,112 +1,24 @@
 #!/usr/bin/env python3
 """
-简化的SAGE提交处理 - 文件收集模式
-只负责接收和验证提交文件，不进行评测
 """
 import json
 import os
-import shutil
 from datetime import datetime
 from typing import Dict, Any
-def try_http_push_to_local(submission_data, filename):
-    """HTTP直推到本地服务器"""
-    try:
-        import requests
-        # 本地服务器地址配置（可以根据需要修改）
-        local_endpoints = [
-            'http://127.0.0.1:8080/api/submissions',
-            'http://localhost:8080/api/submissions',
-        ]
-        payload = {
-            "filename": filename,
-            "content": submission_data,
-            "timestamp": datetime.now().isoformat(),
-            "source": "huggingface_spaces",
-            "organization": submission_data.get("submission_org", "Unknown"),
-            "email": submission_data.get("submission_email", "")
-        }
-        for endpoint in local_endpoints:
-            try:
-                print(f"🔄 尝试HTTP推送到: {endpoint}")
-                response = requests.post(
-                    endpoint,
-                    json=payload,
-                    timeout=5,
-                    headers={'Content-Type': 'application/json'}
-                )
-                if response.status_code == 200:
-                    result = response.json()
-                    print(f"✅ HTTP推送成功: {result.get('message', 'OK')}")
-                    print(f"📁 本地路径: {result.get('local_path', 'Unknown')}")
-                    return True, result
-                else:
-                    print(f"⚠️ HTTP响应错误 {response.status_code}: {response.text}")
-            except requests.ConnectionError:
-                print(f"⚠️ 无法连接到本地服务器: {endpoint}")
-            except requests.Timeout:
-                print(f"⚠️ 连接超时: {endpoint}")
-            except Exception as e:
-                print(f"⚠️ HTTP推送失败 {endpoint}: {e}")
-        return False, None
-    except ImportError:
-        print("⚠️ requests模块未安装，无法使用HTTP推送")
-        return False, None
-    except Exception as e:
-        print(f"❌ HTTP推送模块失败: {e}")
-        return False, None
-def try_git_commit_and_push(saved_path, filename):
-    """Git提交并推送到远程仓库"""
-    try:
-        import subprocess
-        # 配置Git用户信息和认证
-        try:
-            subprocess.run(["git", "config", "user.email", "sage-bench@huggingface.co"], cwd=".")
-            subprocess.run(["git", "config", "user.name", "SAGE Bench System"], cwd=".")
-            # 配置HuggingFace认证（如果有HF_TOKEN环境变量）
-            import os
-            hf_token = os.getenv('HF_TOKEN')
-            if hf_token:
-                # 配置Git使用token认证
-                repo_url = f"https://oauth2:{hf_token}@huggingface.co/spaces/Sudanl/SAGE-Bench"
-                subprocess.run(["git", "remote", "set-url", "origin", repo_url], cwd=".")
-                print("✅ Git认证配置完成")
-        except Exception as e:
-            print(f"⚠️ Git配置警告: {e}")
-            pass  # 如果已配置则忽略错误
-        # Git操作：add -> commit -> push
-        subprocess.run(["git", "add", saved_path], check=True, cwd=".")
-        commit_msg = f"feat: 新用户提交 {filename}"
-        subprocess.run(["git", "commit", "-m", commit_msg], check=True, cwd=".")
-        # 尝试推送到远程仓库
-        try:
-            subprocess.run(["git", "push"], check=True, cwd=".", timeout=30)
-            print(f"✅ Git提交并推送成功: {filename}")
-            return True
-        except subprocess.TimeoutExpired:
-            print(f"⚠️ Git推送超时，但文件已本地提交: {filename}")
-            return True  # 本地提交成功也算部分成功
-        except Exception as push_error:
-            print(f"⚠️ Git推送失败但本地已提交: {push_error}")
-            return True  # 本地提交成功也算部分成功
-    except Exception as e:
-        print(f"❌ Git操作失败: {e}")
-        return False
 def format_error(msg):
     return f"<p style='color: red; font-size: 16px;'>{msg}</p>"
@@ -218,32 +130,24 @@ def process_sage_submission_simple(submission_file, org_name=None, email=None) -
             saved_path = save_submission_file(submission_data)
             print(f"✅ 提交文件已保存到: {saved_path}")
-            # 多重传输策略：HTTP主推 + Git备份
-            filename = os.path.basename(saved_path)
-            success_methods = []
-            transfer_details = {}
-            # 方法1: HTTP直推到本地（主要方式）
-            http_success, http_result = try_http_push_to_local(submission_data, filename)
-            if http_success:
-                success_methods.append("HTTP直推")
-                transfer_details["http"] = http_result
-            # 方法2: Git提交推送（备用方式）
-            if try_git_commit_and_push(saved_path, filename):
-                success_methods.append("Git推送")
-            # 生成传输状态消息
-            if success_methods:
-                if "HTTP直推" in success_methods:
-                    transfer_status = f"✅ 文件已成功传输到本地评测系统 (通过: {', '.join(success_methods)})"
-                    transfer_speed = "⚡ 立即开始评测"
-                else:
-                    transfer_status = f"✅ 文件已保存 (通过: {', '.join(success_methods)})"
-                    transfer_speed = "⏳ 等待同步到评测系统"
-            else:
-                transfer_status = "⚠️ 直接传输失败，文件已保存到HuggingFace Spaces，将通过备用同步机制处理"
-                transfer_speed = "⏰ 可能需要稍长时间"
             # 生成成功消息
             org = submission_data["submission_org"]
@@ -255,17 +159,17 @@ def process_sage_submission_simple(submission_file, org_name=None, email=None) -
 📋 <strong>提交信息:</strong><br>
 • 组织: {org}<br>
 • 邮箱: {email_addr}<br>
-• 预测数量: {num_predictions} 个问题<br><br>
-🚀 <strong>传输状态:</strong><br>
-{transfer_status}<br>
-{transfer_speed}<br><br>
 ⏳ <strong>评测流程:</strong><br>
 您的提交将使用LLM-as-Judge进行自动评估，包括科学推理能力的全面测试。<br>
 评测完成后，结果将自动出现在排行榜中。<br><br>
 🕐 <strong>预计时间:</strong><br>
-• HTTP直推: 5-15分钟<br>
-• 备用同步: 15-60分钟<br><br>
-感谢您参与SAGE基准测试！🧪
             """)
             return success_msg
@@ -317,57 +221,5 @@ def get_submission_stats(submissions_dir: str = "./submissions") -> Dict[str, An
         "recent": submissions[:10]  # 最近10个
     }
-def try_http_push_to_local(submission_data, filename):
-    """HTTP直推到本地服务器"""
-    try:
-        import requests
-        # 本地服务器地址配置（可以根据需要修改）
-        local_endpoints = [
-            'http://127.0.0.1:8080/api/submissions',
-            'http://localhost:8080/api/submissions',
-        ]
-        payload = {
-            "filename": filename,
-            "content": submission_data,
-            "timestamp": datetime.now().isoformat(),
-            "source": "huggingface_spaces",
-            "organization": submission_data.get("submission_org", "Unknown"),
-            "email": submission_data.get("submission_email", "")
-        }
-        for endpoint in local_endpoints:
-            try:
-                print(f"🔄 尝试HTTP推送到: {endpoint}")
-                response = requests.post(
-                    endpoint,
-                    json=payload,
-                    timeout=5,
-                    headers={'Content-Type': 'application/json'}
-                )
-                if response.status_code == 200:
-                    result = response.json()
-                    print(f"✅ HTTP推送成功: {result.get('message', 'OK')}")
-                    print(f"📁 本地路径: {result.get('local_path', 'Unknown')}")
-                    return True, result
-                else:
-                    print(f"⚠️ HTTP响应错误 {response.status_code}: {response.text}")
-            except requests.ConnectionError:
-                print(f"⚠️ 无法连接到本地服务器: {endpoint}")
-            except requests.Timeout:
-                print(f"⚠️ 连接超时: {endpoint}")
-            except Exception as e:
-                print(f"⚠️ HTTP推送失败 {endpoint}: {e}")
-        return False, None
-    except ImportError:
-        print("⚠️ requests模块未安装，无法使用HTTP推送")
-        return False, None
-    except Exception as e:
-        print(f"❌ HTTP推送模块失败: {e}")
-        return False, None

 #!/usr/bin/env python3
 """
+SAGE提交处理 - OSS模式
+使用阿里云OSS替代git/http提交方式
 """
 import json
 import os
+import sys
 from datetime import datetime
 from typing import Dict, Any
+from pathlib import Path
+# 导入OSS提交处理器
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'oss_sage_evaluator'))
+try:
+    from oss_submission_handler import OSSSubmissionHandler
+    OSS_AVAILABLE = True
+except ImportError as e:
+    print(f"⚠️ OSS模块不可用，将使用备用模式: {e}")
+    OSS_AVAILABLE = False
 def format_error(msg):
     return f"<p style='color: red; font-size: 16px;'>{msg}</p>"
             saved_path = save_submission_file(submission_data)
             print(f"✅ 提交文件已保存到: {saved_path}")
+            # OSS上传策略
+            if OSS_AVAILABLE:
+                try:
+                    # 使用OSS提交处理器
+                    oss_handler = OSSSubmissionHandler()
+                    result = oss_handler.process_sage_submission(submission_data, org_name, email)
+                    # 如果OSS处理成功，直接返回结果
+                    if "提交成功" in result:
+                        return result
+                    else:
+                        # OSS失败，继续使用备用模式
+                        print(f"⚠️ OSS提交失败，使用备用模式: {result}")
+                except Exception as e:
+                    print(f"⚠️ OSS提交异常，使用备用模式: {e}")
+            # 备用模式：本地保存
+            filename = os.path.basename(saved_path)
             # 生成成功消息
             org = submission_data["submission_org"]
 📋 <strong>提交信息:</strong><br>
 • 组织: {org}<br>
 • 邮箱: {email_addr}<br>
+• 预测数量: {num_predictions} 个问题<br>
+• 文件名: {filename}<br><br>
+🚀 <strong>存储状态:</strong><br>
+文件已保存到本地存储，等待系统同步到评测环境。<br><br>
 ⏳ <strong>评测流程:</strong><br>
 您的提交将使用LLM-as-Judge进行自动评估，包括科学推理能力的全面测试。<br>
 评测完成后，结果将自动出现在排行榜中。<br><br>
 🕐 <strong>预计时间:</strong><br>
+• 正常情况: 5-15分钟<br>
+• 同步延迟: 15-60分钟<br><br>
+🧪 感谢您参与SAGE科学推理基准测试！
             """)
             return success_msg
         "recent": submissions[:10]  # 最近10个
     }
+# 移除了原有的HTTP推送函数，现在使用OSS模式

src/submission/submit.py DELETED Viewed

@@ -1,119 +0,0 @@
-import json
-import os
-from datetime import datetime, timezone
-from src.display.formatting import styled_error, styled_message, styled_warning
-from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
-from src.submission.check_validity import (
-    already_submitted_models,
-    check_model_card,
-    get_model_size,
-    is_model_on_hub,
-)
-REQUESTED_MODELS = None
-USERS_TO_SUBMISSION_DATES = None
-def add_new_eval(
-    model: str,
-    base_model: str,
-    revision: str,
-    precision: str,
-    weight_type: str,
-    model_type: str,
-):
-    global REQUESTED_MODELS
-    global USERS_TO_SUBMISSION_DATES
-    if not REQUESTED_MODELS:
-        REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
-    user_name = ""
-    model_path = model
-    if "/" in model:
-        user_name = model.split("/")[0]
-        model_path = model.split("/")[1]
-    precision = precision.split(" ")[0]
-    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-    if model_type is None or model_type == "":
-        return styled_error("Please select a model type.")
-    # Does the model actually exist?
-    if revision == "":
-        revision = "main"
-    # Is the model on the hub?
-    if weight_type in ["Delta", "Adapter"]:
-        base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
-        if not base_model_on_hub:
-            return styled_error(f'Base model "{base_model}" {error}')
-    if not weight_type == "Adapter":
-        model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
-        if not model_on_hub:
-            return styled_error(f'Model "{model}" {error}')
-    # Is the model info correctly filled?
-    try:
-        model_info = API.model_info(repo_id=model, revision=revision)
-    except Exception:
-        return styled_error("Could not get your model information. Please fill it up properly.")
-    model_size = get_model_size(model_info=model_info, precision=precision)
-    # Were the model card and license filled?
-    try:
-        license = model_info.cardData["license"]
-    except Exception:
-        return styled_error("Please select a license for your model")
-    modelcard_OK, error_msg = check_model_card(model)
-    if not modelcard_OK:
-        return styled_error(error_msg)
-    # Seems good, creating the eval
-    print("Adding new eval")
-    eval_entry = {
-        "model": model,
-        "base_model": base_model,
-        "revision": revision,
-        "precision": precision,
-        "weight_type": weight_type,
-        "status": "PENDING",
-        "submitted_time": current_time,
-        "model_type": model_type,
-        "likes": model_info.likes,
-        "params": model_size,
-        "license": license,
-        "private": False,
-    }
-    # Check for duplicate submission
-    if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
-        return styled_warning("This model has been already submitted.")
-    print("Creating eval file")
-    OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
-    os.makedirs(OUT_DIR, exist_ok=True)
-    out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
-    with open(out_path, "w") as f:
-        f.write(json.dumps(eval_entry))
-    print("Uploading eval file")
-    API.upload_file(
-        path_or_fileobj=out_path,
-        path_in_repo=out_path.split("eval-queue/")[1],
-        repo_id=QUEUE_REPO,
-        repo_type="dataset",
-        commit_message=f"Add {model} to eval queue",
-    )
-    # Remove the local file
-    os.remove(out_path)
-    return styled_message(
-        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
-    )