File size: 9,839 Bytes
2086543
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f24b2b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2086543
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import json
import os
from datetime import datetime, timezone
from typing import Dict, List, Any

from src.display.formatting import styled_error, styled_message, styled_warning


def validate_sage_submission(submission_data: Dict[str, Any]) -> tuple[bool, str]:
    """Validates SAGE benchmark submission format"""
    
    # Check required top-level fields
    required_fields = ["submission_org", "submission_email", "predictions"]
    for field in required_fields:
        if field not in submission_data:
            return False, f"Missing required field: {field}"
    
    # Validate email format (basic)
    email = submission_data["submission_email"]
    if "@" not in email or "." not in email:
        return False, "Invalid email format"
    
    # Validate predictions
    predictions = submission_data["predictions"]
    if not isinstance(predictions, list) or len(predictions) == 0:
        return False, "Predictions must be a non-empty list"
    
    for i, prediction in enumerate(predictions):
        # Check required prediction fields
        pred_required_fields = ["original_question_id", "content", "reasoning_content"]
        for field in pred_required_fields:
            if field not in prediction:
                return False, f"Missing field '{field}' in prediction {i}"
        
        # Validate content arrays
        content = prediction["content"]
        reasoning_content = prediction["reasoning_content"]
        
        if not isinstance(content, list) or len(content) != 4:
            return False, f"Content in prediction {i} must be a list of exactly 4 items"
        
        if not isinstance(reasoning_content, list) or len(reasoning_content) != 4:
            return False, f"Reasoning content in prediction {i} must be a list of exactly 4 items"
        
        # Validate question ID
        if not isinstance(prediction["original_question_id"], int):
            return False, f"Question ID in prediction {i} must be an integer"
    
    return True, "Valid submission format"


def process_sage_submission_simple(submission_file, org_name=None, email=None) -> str:
    """Process SAGE benchmark submission file - simplified version for basic leaderboard"""
    
    try:
        # Read the submitted file (receives file path)
        if submission_file is None:
            return styled_error("No file uploaded. Please select a JSON file.")
        
        # submission_file is a file path string
        try:
            with open(submission_file, 'r', encoding='utf-8') as f:
                content = f.read()
        except Exception as e:
            return styled_error(f"Error reading file: {str(e)}")
        
        # Parse JSON
        try:
            submission_data = json.loads(content)
        except json.JSONDecodeError as e:
            return styled_error(f"Invalid JSON format: {str(e)}")
        
        # Use form inputs if submission data doesn't contain org/email
        if org_name and email:
            submission_data["submission_org"] = org_name
            submission_data["submission_email"] = email
        
        # Validate submission format
        is_valid, message = validate_sage_submission(submission_data)
        if not is_valid:
            return styled_error(f"Submission validation failed: {message}")
        
        # Save submission for later processing
        current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
        org = submission_data["submission_org"].replace(" ", "_").replace("/", "_")
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Save raw submission
        submission_dir = f"./sage_submissions/{org}"
        os.makedirs(submission_dir, exist_ok=True)
        raw_submission_path = f"{submission_dir}/submission_{timestamp}.json"
        
        with open(raw_submission_path, 'w') as f:
            json.dump(submission_data, f, indent=2)
        
        # Simple evaluation using the evaluation module
        try:
            from src.leaderboard.sage_eval import evaluate_sage_submission
            domain_accuracies = evaluate_sage_submission(submission_data)
            
            # Update initial_sage_results.json directly for persistence  
            # Try multiple possible paths for the initial results file
            possible_paths = [
                "./initial_sage_results.json",
                "initial_sage_results.json",
                os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "initial_sage_results.json")
            ]
            
            initial_results_file = None
            for path in possible_paths:
                if os.path.exists(path):
                    initial_results_file = path
                    break
            
            if not initial_results_file:
                initial_results_file = possible_paths[0]  # Use first path as fallback
            
            try:
                # Load existing initial results
                if os.path.exists(initial_results_file):
                    with open(initial_results_file, 'r') as f:
                        initial_results = json.load(f)
                else:
                    initial_results = []
                
                # Convert to initial results format
                new_result = {
                    "model_name": submission_data["submission_org"],
                    "organization": submission_data["submission_org"],
                    "tokens": "User Submission",
                    "accuracy": domain_accuracies["sage_overall"],
                    "mg_pass_2": domain_accuracies["sage_overall"],  # Use same value for now
                    "mg_pass_4": domain_accuracies["sage_overall"],  # Use same value for now
                    "submitted_time": datetime.now().strftime("%Y-%m-%d"),
                    "results": domain_accuracies,
                    "contact_email": submission_data["submission_email"]
                }
                
                # Check if organization already exists, update or add
                org_name = submission_data["submission_org"]
                updated = False
                for i, result in enumerate(initial_results):
                    if (result.get("model_name") == org_name or 
                        result.get("organization") == org_name):
                        initial_results[i] = new_result
                        updated = True
                        break
                
                if not updated:
                    initial_results.append(new_result)
                
                # Save updated initial results
                with open(initial_results_file, 'w') as f:
                    json.dump(initial_results, f, indent=2)
                
                print(f"✅ Updated {initial_results_file} with new submission from {org_name}")
                
            except Exception as e:
                print(f"⚠️ Failed to update initial results file: {e}")
            
            # Format success message with scores
            overall_accuracy = domain_accuracies.get("sage_overall", 0)
            
            success_msg = styled_message(
                f"🎉 SAGE submission processed successfully!\n\n"
                f"**Organization:** {submission_data['submission_org']}\n"
                f"**Overall Accuracy:** {overall_accuracy:.2f}%\n\n"
                f"**Domain Scores:**\n"
                f"  • Mathematics: {domain_accuracies.get('sage_math', 0):.2f}%\n"
                f"  • Physics: {domain_accuracies.get('sage_physics', 0):.2f}%\n"
                f"  • Chemistry: {domain_accuracies.get('sage_chemistry', 0):.2f}%\n"
                f"  • Biology: {domain_accuracies.get('sage_biology', 0):.2f}%\n"
                f"  • Earth Science: {domain_accuracies.get('sage_earth_science', 0):.2f}%\n"
                f"  • Astronomy: {domain_accuracies.get('sage_astronomy', 0):.2f}%\n\n"
                f"Your results have been added to the leaderboard. "
                f"Please refresh the page to see updated rankings."
            )
            
            return success_msg
            
        except Exception as eval_error:
            # If evaluation fails, still save submission but mark as failed
            return styled_warning(
                f"⚠️ Submission received but evaluation failed.\n\n"
                f"Error: {str(eval_error)}\n\n"
                f"Your submission has been saved and will be processed manually. "
                f"Please contact administrators if this issue persists."
            )
        
    except Exception as e:
        return styled_error(f"Submission processing failed: {str(e)}")


def load_sage_submissions(submissions_dir: str = "./sage_submissions") -> List[Dict]:
    """Load all SAGE submissions for display in queue"""
    
    if not os.path.exists(submissions_dir):
        return []
    
    submissions = []
    
    for org_dir in os.listdir(submissions_dir):
        org_path = os.path.join(submissions_dir, org_dir)
        if not os.path.isdir(org_path):
            continue
            
        for file in os.listdir(org_path):
            if file.startswith("submission_") and file.endswith(".json"):
                try:
                    with open(os.path.join(org_path, file), 'r') as f:
                        submission = json.load(f)
                        # Add metadata
                        submission["_filename"] = file
                        submission["_org_dir"] = org_dir
                        submissions.append(submission)
                except Exception:
                    continue
    
    # Sort by submission time (most recent first)
    submissions.sort(key=lambda x: x.get("_filename", ""), reverse=True)
    return submissions