SAGE OSS Evaluator commited on
Commit
bb10451
·
1 Parent(s): f235878
Files changed (1) hide show
  1. app.py +72 -72
app.py CHANGED
@@ -64,12 +64,12 @@ def model_hyperlink(link, model_name):
64
  return model_name
65
 
66
  def load_submission_history():
67
- """OSS加载用户提交历史"""
68
  try:
69
  from src.oss.oss_file_manager import OSSFileManager
70
  oss_manager = OSSFileManager()
71
 
72
- # 尝试下载提交历史文件
73
  history_content = oss_manager.download_file_content(
74
  SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE
75
  )
@@ -77,20 +77,20 @@ def load_submission_history():
77
  if history_content:
78
  return json.loads(history_content)
79
  else:
80
- print("📝 创建新的提交历史记录")
81
  return {}
82
 
83
  except Exception as e:
84
- print(f"⚠️ 加载提交历史失败: {e}")
85
  return {}
86
 
87
  def save_submission_history(history):
88
- """保存用户提交历史到OSS"""
89
  try:
90
  from src.oss.oss_file_manager import OSSFileManager
91
  oss_manager = OSSFileManager()
92
 
93
- # 上传提交历史
94
  history_json = json.dumps(history, indent=2, ensure_ascii=False)
95
  success = oss_manager.upload_file_content(
96
  content=history_json,
@@ -100,53 +100,53 @@ def save_submission_history(history):
100
  return success
101
 
102
  except Exception as e:
103
- print(f"❌ 保存提交历史失败: {e}")
104
  return False
105
 
106
  def check_user_submission_eligibility(profile: gr.OAuthProfile, org_name: str):
107
- """检查用户提交资格"""
108
  try:
109
- # 1. 检查账号年龄限制 (60)
110
  user_data = requests.get(f"https://huggingface.co/api/users/{profile.username}/overview")
111
  if user_data.status_code == 200:
112
  creation_date = json.loads(user_data.content)["createdAt"]
113
  account_age = datetime.datetime.now() - datetime.datetime.strptime(creation_date, '%Y-%m-%dT%H:%M:%S.%fZ')
114
 
115
  if account_age < datetime.timedelta(days=60):
116
- return False, "此账号不满足提交要求。账号创建时间必须超过60天。"
117
  else:
118
- return False, "无法验证账号信息,请稍后再试。"
119
 
120
- # 2. 检查每日提交限制
121
  submission_history = load_submission_history()
122
  user_submissions = submission_history.get(profile.username, [])
123
 
124
  today = datetime.datetime.today().strftime('%Y-%m-%d')
125
  today_submissions = [s for s in user_submissions if s.get("date", "") == today]
126
 
127
- if len(today_submissions) > 0:
128
- return False, "您今天已经提交过一次,请明天再试。"
129
 
130
- # 3. 检查重复提交 (组织名+用户名组合)
131
  for submission in user_submissions:
132
  if submission.get("organization", "").lower() == org_name.lower():
133
- return False, f"您已经为组织 '{org_name}' 提交过结果。如需更新,请联系管理员。"
134
 
135
- return True, "检查通过"
136
 
137
  except Exception as e:
138
- print(f"❌ 用户资格检查失败: {e}")
139
- return False, f"系统检查出现错误,请稍后再试: {str(e)}"
140
 
141
  def record_user_submission(profile: gr.OAuthProfile, org_name: str, email: str):
142
- """记录用户提交"""
143
  try:
144
  submission_history = load_submission_history()
145
 
146
  if profile.username not in submission_history:
147
  submission_history[profile.username] = []
148
 
149
- # 记录本次提交
150
  submission_record = {
151
  "date": datetime.datetime.today().strftime('%Y-%m-%d'),
152
  "time": datetime.datetime.now().strftime('%H:%M:%S'),
@@ -157,11 +157,11 @@ def record_user_submission(profile: gr.OAuthProfile, org_name: str, email: str):
157
 
158
  submission_history[profile.username].append(submission_record)
159
 
160
- # 保存历史记录
161
  return save_submission_history(submission_history)
162
 
163
  except Exception as e:
164
- print(f"❌ 记录提交历史失败: {e}")
165
  return False
166
 
167
  def get_leaderboard_dataframe():
@@ -231,7 +231,7 @@ with demo:
231
  # Main leaderboard table
232
  gr.Markdown("## 🏆 SAGE Benchmark Results", elem_classes="markdown-text")
233
 
234
- # Debug information - 设为动态更新组件
235
  results_count = gr.Markdown(f"📊 **Showing {len(leaderboard_df)} results**")
236
 
237
  leaderboard_table = gr.Dataframe(
@@ -246,7 +246,7 @@ with demo:
246
  refresh_button = gr.Button("🔄 Refresh Leaderboard")
247
 
248
  def refresh_leaderboard_with_count():
249
- """刷新排行榜并更新计数显示"""
250
  df = refresh_leaderboard()
251
  count_text = f"📊 **Showing {len(df)} results**"
252
  return df, count_text
@@ -263,39 +263,39 @@ with demo:
263
 
264
  # 添加提交说明(登录要求暂时注释)
265
  gr.Markdown("""
266
- ### 📋 提交要求
267
  <!--
268
- - **登录要求**: 必须使用 HuggingFace 账号登录
269
- - **账号限制**: 账号创建时间必须超过 60
270
- - **提交频率**: 每个用户每天只能提交 1
271
  -->
272
- - **文件格式**: 上传符合SAGE格式的JSON文件
273
- - **组织信息**: 填写准确的组织名称(将显示在排行榜)
274
- - **联系邮箱**: 提供有效邮箱用于结果通知
275
- - **自动评测**: 提交后将自动进行LLM评测并更新排行榜
276
 
277
  <!--
278
- ### 🔐 安全政策
279
- 为防止垃圾提交和确保评测质量,我们采用了严格的提交限制:
280
- - 新账号需要等待60天才能提交(防止恶意注册)
281
- - 每日提交限制确保排行榜质量和服务器稳定性
282
- - 重复提交检查避免同一组织多次提交相同结果
283
  -->
284
  """, elem_classes="markdown-text")
285
 
286
  with gr.Row():
287
  with gr.Column():
288
  org_textbox = gr.Textbox(
289
- label="Organization Name - 组织名称将显示在排行榜上",
290
  placeholder="Your Organization"
291
  )
292
  email_textbox = gr.Textbox(
293
- label="Contact Email - 邮箱仅用于联系,不会公开显示",
294
  placeholder="contact@example.com"
295
  )
296
  with gr.Column():
297
  file_upload = gr.File(
298
- label="Upload SAGE Results (JSON) - 上传符合SAGE格式的JSON结果文件",
299
  file_types=[".json"],
300
  type="filepath"
301
  )
@@ -312,35 +312,35 @@ with demo:
312
  try:
313
  if profile and getattr(profile, "username", None):
314
  name = profile.username
315
- text = f"✅ 已登录: **{name}**"
316
  else:
317
- text = "❌ 登录失败,请重试"
318
  return profile, text
319
  except Exception:
320
- return None, "❌ 登录失败,请重试"
321
  login_button.click(on_login, inputs=[], outputs=[profile_state, login_status])
322
 
323
  # 进度显示和结果显示区域
324
- progress_info = gr.HTML() # 进度显示区域
325
- submission_result = gr.HTML() # 结果显示区域
326
 
327
  def show_progress(step, message, total_steps=4):
328
- """显示进度信息"""
329
  progress_percentage = int((step / total_steps) * 100)
330
  progress_html = f"""
331
  <div style="background-color: #e7f3ff; border: 1px solid #4dabf7; border-radius: 5px; padding: 15px; margin: 10px 0;">
332
  <div style="display: flex; align-items: center; margin-bottom: 10px;">
333
- <h4 style="color: #1971c2; margin: 0; flex-grow: 1;">⏳ 提交处理中...</h4>
334
  <span style="color: #1971c2; font-weight: bold;">{progress_percentage}%</span>
335
  </div>
336
- <p style="color: #1971c2; margin: 5px 0;"><strong>步骤 {step}/{total_steps}:</strong> {message}</p>
337
  <div style="background-color: #fff; border-radius: 10px; height: 20px; margin: 10px 0; border: 1px solid #dee2e6;">
338
  <div style="background: linear-gradient(90deg, #4dabf7, #74c0fc); height: 100%; width: {progress_percentage}%; border-radius: 10px; transition: width 0.5s ease; display: flex; align-items: center; justify-content: center;">
339
  {f'<span style="color: white; font-size: 12px; font-weight: bold;">{progress_percentage}%</span>' if progress_percentage > 20 else ''}
340
  </div>
341
  </div>
342
  <p style="color: #495057; font-size: 14px; margin: 5px 0;">
343
- {'✨ 即将完成,请稍候...' if step >= total_steps else '📤 请稍候,正在处理您的提交...'}
344
  </p>
345
  </div>
346
  """
@@ -349,36 +349,36 @@ with demo:
349
  def handle_submission(file_upload, org_name, email, user_profile, progress=gr.Progress()):
350
  try:
351
  # 步骤1: 基本验证
352
- progress(0.1, desc="验证提交信息...")
353
- yield show_progress(1, "验证提交信息"), ""
354
 
355
  # 校验登录
356
  if user_profile is None or getattr(user_profile, "username", None) is None:
357
- yield "", format_error("请先登录 HuggingFace 账号后再提交")
358
  return
359
 
360
  if not file_upload:
361
- yield "", format_error("请选择要上传的文件")
362
  return
363
  if not org_name or not org_name.strip():
364
- yield "", format_error("请输入组织名称")
365
  return
366
  if not email or not email.strip():
367
- yield "", format_error("请输入邮箱地址")
368
  return
369
 
370
  # 验证邮箱格式
371
  _, parsed_email = parseaddr(email)
372
  if "@" not in parsed_email:
373
- yield "", format_warning("请提供有效的邮箱地址")
374
  return
375
 
376
  # 步骤2: 文件验证和读取
377
- progress(0.3, desc="验证文件格式...")
378
- yield show_progress(2, "验证文件格式和内容"), ""
379
 
380
  import time
381
- time.sleep(0.5) # 让用户看到进度更新
382
 
383
  # 用户资格检查(账号年龄/频率/重复提交)
384
  eligible, msg = check_user_submission_eligibility(user_profile, org_name)
@@ -387,18 +387,18 @@ with demo:
387
  return
388
 
389
  # 步骤3: 上传到OSS
390
- progress(0.6, desc="上传文件到OSS...")
391
- yield show_progress(3, "上传文件到OSS存储"), ""
392
 
393
  # 处理文件提交
394
  from src.submission.simple_submit import process_sage_submission_simple
395
  result = process_sage_submission_simple(file_upload, org_name, email)
396
 
397
  # 步骤4: 完成
398
- progress(1.0, desc="提交完成!")
399
- yield show_progress(4, "提交完成,准备评测"), ""
400
 
401
- time.sleep(0.5) # 让用户看到完成状态
402
 
403
  # 记录提交历史
404
  try:
@@ -409,11 +409,11 @@ with demo:
409
  # 生成成功信息
410
  success_info = f"""
411
  <div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 5px; padding: 15px; margin: 10px 0;">
412
- <h4 style="color: #155724; margin-top: 0;">🎉 提交成功!</h4>
413
- <p style="color: #155724; margin: 5px 0;"><strong>组织:</strong> {org_name}</p>
414
- <p style="color: #155724; margin: 5px 0;"><strong>邮箱:</strong> {email}</p>
415
- <p style="color: #155724; margin: 5px 0;"><strong>提交时间:</strong> {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
416
- <p style="color: #155724; margin-bottom: 0;">您的结果已通过OSS提交,将在5-10分钟内完成LLM评测并更新排行榜。</p>
417
  </div>
418
  """
419
 
@@ -421,11 +421,11 @@ with demo:
421
  yield "", success_info + result
422
 
423
  except ImportError as e:
424
- yield "", format_error(f"提交系统模块不可用: {e}")
425
  except Exception as e:
426
  import traceback
427
  traceback.print_exc()
428
- yield "", format_error(f"提交过程中出现错误: {str(e)}")
429
 
430
  submit_button.click(
431
  handle_submission,
 
64
  return model_name
65
 
66
  def load_submission_history():
67
+ """Load user submission history from OSS"""
68
  try:
69
  from src.oss.oss_file_manager import OSSFileManager
70
  oss_manager = OSSFileManager()
71
 
72
+ # Try to download submission history file
73
  history_content = oss_manager.download_file_content(
74
  SUBMISSION_TRACKING_PATH + SUBMISSION_HISTORY_FILE
75
  )
 
77
  if history_content:
78
  return json.loads(history_content)
79
  else:
80
+ print("📝 Creating new submission history")
81
  return {}
82
 
83
  except Exception as e:
84
+ print(f"⚠️ Failed to load submission history: {e}")
85
  return {}
86
 
87
  def save_submission_history(history):
88
+ """Save user submission history to OSS"""
89
  try:
90
  from src.oss.oss_file_manager import OSSFileManager
91
  oss_manager = OSSFileManager()
92
 
93
+ # Upload submission history
94
  history_json = json.dumps(history, indent=2, ensure_ascii=False)
95
  success = oss_manager.upload_file_content(
96
  content=history_json,
 
100
  return success
101
 
102
  except Exception as e:
103
+ print(f"❌ Failed to save submission history: {e}")
104
  return False
105
 
106
  def check_user_submission_eligibility(profile: gr.OAuthProfile, org_name: str):
107
+ """Check user submission eligibility"""
108
  try:
109
+ # 1. Check account age limit (60 days)
110
  user_data = requests.get(f"https://huggingface.co/api/users/{profile.username}/overview")
111
  if user_data.status_code == 200:
112
  creation_date = json.loads(user_data.content)["createdAt"]
113
  account_age = datetime.datetime.now() - datetime.datetime.strptime(creation_date, '%Y-%m-%dT%H:%M:%S.%fZ')
114
 
115
  if account_age < datetime.timedelta(days=60):
116
+ return False, "This account does not meet the submission requirement. Account age must exceed 60 days."
117
  else:
118
+ return False, "Unable to verify account information. Please try again later."
119
 
120
+ # 2. Check daily submission limit
121
  submission_history = load_submission_history()
122
  user_submissions = submission_history.get(profile.username, [])
123
 
124
  today = datetime.datetime.today().strftime('%Y-%m-%d')
125
  today_submissions = [s for s in user_submissions if s.get("date", "") == today]
126
 
127
+ if len(today_submissions) > 1:
128
+ return False, "You have already submitted twice today. Please try again tomorrow."
129
 
130
+ # 3. Check duplicate submission (organization + username)
131
  for submission in user_submissions:
132
  if submission.get("organization", "").lower() == org_name.lower():
133
+ return False, f"You have already submitted results for organization '{org_name}'. To update, please contact the administrator."
134
 
135
+ return True, "Eligibility check passed"
136
 
137
  except Exception as e:
138
+ print(f"❌ User eligibility check failed: {e}")
139
+ return False, f"System check error, please try again later: {str(e)}"
140
 
141
  def record_user_submission(profile: gr.OAuthProfile, org_name: str, email: str):
142
+ """Record user submission"""
143
  try:
144
  submission_history = load_submission_history()
145
 
146
  if profile.username not in submission_history:
147
  submission_history[profile.username] = []
148
 
149
+ # Record this submission
150
  submission_record = {
151
  "date": datetime.datetime.today().strftime('%Y-%m-%d'),
152
  "time": datetime.datetime.now().strftime('%H:%M:%S'),
 
157
 
158
  submission_history[profile.username].append(submission_record)
159
 
160
+ # Save submission history
161
  return save_submission_history(submission_history)
162
 
163
  except Exception as e:
164
+ print(f"❌ Failed to record submission history: {e}")
165
  return False
166
 
167
  def get_leaderboard_dataframe():
 
231
  # Main leaderboard table
232
  gr.Markdown("## 🏆 SAGE Benchmark Results", elem_classes="markdown-text")
233
 
234
+ # Debug information - dynamic component
235
  results_count = gr.Markdown(f"📊 **Showing {len(leaderboard_df)} results**")
236
 
237
  leaderboard_table = gr.Dataframe(
 
246
  refresh_button = gr.Button("🔄 Refresh Leaderboard")
247
 
248
  def refresh_leaderboard_with_count():
249
+ """Refresh leaderboard and update count display"""
250
  df = refresh_leaderboard()
251
  count_text = f"📊 **Showing {len(df)} results**"
252
  return df, count_text
 
263
 
264
  # 添加提交说明(登录要求暂时注释)
265
  gr.Markdown("""
266
+ ### 📋 Submission Requirements
267
  <!--
268
+ - Login required: You must log in with a Hugging Face account
269
+ - Account age: Account must be older than 60 days
270
+ - Submission frequency: Each user can submit up to 2 times per day
271
  -->
272
+ - File format: Upload a JSON file in the SAGE format
273
+ - Organization: Provide the exact organization name (shown on the leaderboard)
274
+ - Contact email: Provide a valid email for notifications
275
+ - Auto evaluation: After submission, the system will run LLM-based evaluation and update the leaderboard
276
 
277
  <!--
278
+ ### 🔐 Security Policy
279
+ To prevent spam and ensure evaluation quality, we enforce:
280
+ - New accounts must wait 60 days before submitting (prevents abuse)
281
+ - Daily submission limits to ensure leaderboard quality and system stability
282
+ - Duplicate checks to avoid multiple submissions for the same organization
283
  -->
284
  """, elem_classes="markdown-text")
285
 
286
  with gr.Row():
287
  with gr.Column():
288
  org_textbox = gr.Textbox(
289
+ label="Organization Name - will be shown on the leaderboard",
290
  placeholder="Your Organization"
291
  )
292
  email_textbox = gr.Textbox(
293
+ label="Contact Email - used for contact, not publicly visible",
294
  placeholder="contact@example.com"
295
  )
296
  with gr.Column():
297
  file_upload = gr.File(
298
+ label="Upload SAGE Results (JSON)",
299
  file_types=[".json"],
300
  type="filepath"
301
  )
 
312
  try:
313
  if profile and getattr(profile, "username", None):
314
  name = profile.username
315
+ text = f"✅ Logged in as: **{name}**"
316
  else:
317
+ text = "❌ Login failed, please try again"
318
  return profile, text
319
  except Exception:
320
+ return None, "❌ Login failed, please try again"
321
  login_button.click(on_login, inputs=[], outputs=[profile_state, login_status])
322
 
323
  # 进度显示和结果显示区域
324
+ progress_info = gr.HTML()
325
+ submission_result = gr.HTML()
326
 
327
  def show_progress(step, message, total_steps=4):
328
+ """Show progress information"""
329
  progress_percentage = int((step / total_steps) * 100)
330
  progress_html = f"""
331
  <div style="background-color: #e7f3ff; border: 1px solid #4dabf7; border-radius: 5px; padding: 15px; margin: 10px 0;">
332
  <div style="display: flex; align-items: center; margin-bottom: 10px;">
333
+ <h4 style="color: #1971c2; margin: 0; flex-grow: 1;">⏳ Processing submission...</h4>
334
  <span style="color: #1971c2; font-weight: bold;">{progress_percentage}%</span>
335
  </div>
336
+ <p style="color: #1971c2; margin: 5px 0;"><strong>Step {step}/{total_steps}:</strong> {message}</p>
337
  <div style="background-color: #fff; border-radius: 10px; height: 20px; margin: 10px 0; border: 1px solid #dee2e6;">
338
  <div style="background: linear-gradient(90deg, #4dabf7, #74c0fc); height: 100%; width: {progress_percentage}%; border-radius: 10px; transition: width 0.5s ease; display: flex; align-items: center; justify-content: center;">
339
  {f'<span style="color: white; font-size: 12px; font-weight: bold;">{progress_percentage}%</span>' if progress_percentage > 20 else ''}
340
  </div>
341
  </div>
342
  <p style="color: #495057; font-size: 14px; margin: 5px 0;">
343
+ {'✨ Almost done, please wait...' if step >= total_steps else '📤 Please wait, processing your submission...'}
344
  </p>
345
  </div>
346
  """
 
349
  def handle_submission(file_upload, org_name, email, user_profile, progress=gr.Progress()):
350
  try:
351
  # 步骤1: 基本验证
352
+ progress(0.1, desc="Validating submission info...")
353
+ yield show_progress(1, "Validating submission info"), ""
354
 
355
  # 校验登录
356
  if user_profile is None or getattr(user_profile, "username", None) is None:
357
+ yield "", format_error("Please log in with Hugging Face before submitting")
358
  return
359
 
360
  if not file_upload:
361
+ yield "", format_error("Please select a file to upload")
362
  return
363
  if not org_name or not org_name.strip():
364
+ yield "", format_error("Please enter organization name")
365
  return
366
  if not email or not email.strip():
367
+ yield "", format_error("Please enter email address")
368
  return
369
 
370
  # 验证邮箱格式
371
  _, parsed_email = parseaddr(email)
372
  if "@" not in parsed_email:
373
+ yield "", format_warning("Please provide a valid email address")
374
  return
375
 
376
  # 步骤2: 文件验证和读取
377
+ progress(0.3, desc="Validating file format...")
378
+ yield show_progress(2, "Validating file format and content"), ""
379
 
380
  import time
381
+ time.sleep(0.5) # allow users to see progress update
382
 
383
  # 用户资格检查(账号年龄/频率/重复提交)
384
  eligible, msg = check_user_submission_eligibility(user_profile, org_name)
 
387
  return
388
 
389
  # 步骤3: 上传到OSS
390
+ progress(0.6, desc="Uploading file to OSS...")
391
+ yield show_progress(3, "Uploading file to OSS storage"), ""
392
 
393
  # 处理文件提交
394
  from src.submission.simple_submit import process_sage_submission_simple
395
  result = process_sage_submission_simple(file_upload, org_name, email)
396
 
397
  # 步骤4: 完成
398
+ progress(1.0, desc="Submission completed!")
399
+ yield show_progress(4, "Submission completed, preparing evaluation"), ""
400
 
401
+ time.sleep(0.5) # allow users to see completion state
402
 
403
  # 记录提交历史
404
  try:
 
409
  # 生成成功信息
410
  success_info = f"""
411
  <div style="background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 5px; padding: 15px; margin: 10px 0;">
412
+ <h4 style="color: #155724; margin-top: 0;">🎉 Submission successful!</h4>
413
+ <p style="color: #155724; margin: 5px 0;"><strong>Organization:</strong> {org_name}</p>
414
+ <p style="color: #155724; margin: 5px 0;"><strong>Email:</strong> {email}</p>
415
+ <p style="color: #155724; margin: 5px 0;"><strong>Submitted at:</strong> {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
416
+ <p style="color: #155724; margin-bottom: 0;">Your results have been submitted via OSS. LLM evaluation will complete in 5-10 minutes and the leaderboard will be updated.</p>
417
  </div>
418
  """
419
 
 
421
  yield "", success_info + result
422
 
423
  except ImportError as e:
424
+ yield "", format_error(f"Submission system modules unavailable: {e}")
425
  except Exception as e:
426
  import traceback
427
  traceback.print_exc()
428
+ yield "", format_error(f"An error occurred during submission: {str(e)}")
429
 
430
  submit_button.click(
431
  handle_submission,