Maria Castellanos commited on
Commit
b52c947
·
1 Parent(s): abd9b44

order and duplicate fix

Browse files
Files changed (2) hide show
  1. intermediate_leaderboard.py +11 -11
  2. utils.py +4 -2
intermediate_leaderboard.py CHANGED
@@ -13,7 +13,7 @@ import pandas as pd
13
  def validate_hf_username(username):
14
  username = str(username).strip()
15
  hf_url = f"https://huggingface.co/{username}"
16
- return check_page_exists(hf_url, delay=1)
17
  # return True # For testing purposes, assume all usernames are valid
18
 
19
  def validate_model_details(tag):
@@ -37,6 +37,8 @@ def make_intermediate_lb():
37
  test_repo=results_repo_test
38
  )
39
 
 
 
40
 
41
  # HF username validation
42
  hf_usernames = df_latest_raw["hf_username"].unique()
@@ -53,11 +55,12 @@ def make_intermediate_lb():
53
  # make sure to only keep the latest submission per user for the 'Average' endpoint
54
  df_latest_raw["submission_time"] = pd.to_datetime(df_latest_raw["submission_time"])
55
  df_latest_raw = df_latest_raw.query("Endpoint == 'Average'")
56
- df_latest_raw['latest_time_per_user'] = df_latest_raw.groupby('user')['submission_time'].transform('max')
57
  latest_submissions_df = df_latest_raw[df_latest_raw['submission_time'] == df_latest_raw['latest_time_per_user']].copy()
58
-
 
59
  latest_submissions_df = latest_submissions_df.sort_values(
60
- ['RAE'], ascending=True
61
  ).reset_index(drop=True)
62
 
63
  # Get the unique users in the order of their first appearance
@@ -75,9 +78,6 @@ def make_intermediate_lb():
75
  # Apply the mapping to create a new column with prefixed usernames
76
  latest_submissions_df['user'] = latest_submissions_df['user'].map(user_mapping)
77
 
78
-
79
-
80
-
81
  # Perform Tukey's HSD test
82
  tukey = pairwise_tukeyhsd(endog=latest_submissions_df['RAE'], groups=latest_submissions_df['user'], alpha=0.05)
83
  tukey_df = pd.DataFrame(data=tukey._results_table.data[1:],
@@ -89,7 +89,6 @@ def make_intermediate_lb():
89
  cld_df = pd.DataFrame(cld_dict.items(),columns=["group","letter"]).sort_values("group")
90
  cld_df.letter = [",".join(x) for x in cld_df.letter]
91
  cld_df["user"] = cld_df.group
92
- cld_df["user_fixed"] = cld_df.group.str.split("___").str[1]
93
 
94
  # clean up CLD letters for extended alphabet (i.e with @ symbols)
95
  def clean_up(ser):
@@ -113,14 +112,15 @@ def make_intermediate_lb():
113
  metric_stats[f"{metric}_display"] = metric_stats.apply(
114
  lambda row: f"{row[f'{metric}_mean']:.4f} ± {row[f'{metric}_std']:.4f}", axis=1
115
  )
116
- cld_df = cld_df.merge(metric_stats[['user', f'{metric}_mean', f'{metric}_std', f'{metric}_display']], on='user', how='left')
117
 
118
  # re-sort by RAE mean, lowest is best
119
  cld_df = cld_df.sort_values(by='RAE_mean', ascending=True).reset_index(drop=True)
 
120
 
 
 
121
 
122
- cld_subset = cld_df[['user_fixed', 'fixed_letter'] + [f'{metric}_display' for metric in METRICS]]
123
- cld_subset = cld_subset.rename(columns={'user_fixed': 'user', 'fixed_letter': 'CLD'})
124
  print(cld_subset.head())
125
  cld_subset.to_csv("leaderboard_cld_results.csv", index=False)
126
 
 
13
  def validate_hf_username(username):
14
  username = str(username).strip()
15
  hf_url = f"https://huggingface.co/{username}"
16
+ return check_page_exists(hf_url, delay=1, max_retries=10)
17
  # return True # For testing purposes, assume all usernames are valid
18
 
19
  def validate_model_details(tag):
 
37
  test_repo=results_repo_test
38
  )
39
 
40
+ # Make all usernames lowercase
41
+ df_latest_raw["hf_username"] = df_latest_raw["hf_username"].str.lower()
42
 
43
  # HF username validation
44
  hf_usernames = df_latest_raw["hf_username"].unique()
 
55
  # make sure to only keep the latest submission per user for the 'Average' endpoint
56
  df_latest_raw["submission_time"] = pd.to_datetime(df_latest_raw["submission_time"])
57
  df_latest_raw = df_latest_raw.query("Endpoint == 'Average'")
58
+ df_latest_raw['latest_time_per_user'] = df_latest_raw.groupby('hf_username')['submission_time'].transform('max')
59
  latest_submissions_df = df_latest_raw[df_latest_raw['submission_time'] == df_latest_raw['latest_time_per_user']].copy()
60
+ # Fix to order by the mean RAE and not the RAE of all samples (slight missmatch for some users)
61
+ latest_submissions_df['mean_RAE'] = latest_submissions_df.groupby('hf_username')['RAE'].transform('mean')
62
  latest_submissions_df = latest_submissions_df.sort_values(
63
+ by=['mean_RAE', 'Sample'], ascending=True
64
  ).reset_index(drop=True)
65
 
66
  # Get the unique users in the order of their first appearance
 
78
  # Apply the mapping to create a new column with prefixed usernames
79
  latest_submissions_df['user'] = latest_submissions_df['user'].map(user_mapping)
80
 
 
 
 
81
  # Perform Tukey's HSD test
82
  tukey = pairwise_tukeyhsd(endog=latest_submissions_df['RAE'], groups=latest_submissions_df['user'], alpha=0.05)
83
  tukey_df = pd.DataFrame(data=tukey._results_table.data[1:],
 
89
  cld_df = pd.DataFrame(cld_dict.items(),columns=["group","letter"]).sort_values("group")
90
  cld_df.letter = [",".join(x) for x in cld_df.letter]
91
  cld_df["user"] = cld_df.group
 
92
 
93
  # clean up CLD letters for extended alphabet (i.e with @ symbols)
94
  def clean_up(ser):
 
112
  metric_stats[f"{metric}_display"] = metric_stats.apply(
113
  lambda row: f"{row[f'{metric}_mean']:.4f} ± {row[f'{metric}_std']:.4f}", axis=1
114
  )
115
+ cld_df = metric_stats[['user', f'{metric}_mean', f'{metric}_std', f'{metric}_display']].merge(cld_df, on='user', how='left')
116
 
117
  # re-sort by RAE mean, lowest is best
118
  cld_df = cld_df.sort_values(by='RAE_mean', ascending=True).reset_index(drop=True)
119
+ cld_df['user'] = cld_df['user'].str.split('___').str[1]
120
 
121
+ cld_subset = cld_df[['user', 'fixed_letter'] + [f'{metric}_display' for metric in METRICS]]
122
+ cld_subset = cld_subset.rename(columns={'user': 'user', 'fixed_letter': 'CLD'})
123
 
 
 
124
  print(cld_subset.head())
125
  cld_subset.to_csv("leaderboard_cld_results.csv", index=False)
126
 
utils.py CHANGED
@@ -43,8 +43,10 @@ def check_page_exists(url: str, delay=0.2, max_retries=3, current_retries=0):
43
  # Check for Rate Limit Error and retry if under the limit
44
  if response.status_code == 429:
45
  if current_retries < max_retries:
46
- print(f"Warning: Rate limit hit on {safe_url}. Attempt {current_retries + 1}/{max_retries}. Waiting for 5 seconds...")
47
- time.sleep(5)
 
 
48
  # Recurse with an incremented retry counter
49
  return check_page_exists(safe_url, delay=delay, max_retries=max_retries, current_retries=current_retries + 1)
50
  else:
 
43
  # Check for Rate Limit Error and retry if under the limit
44
  if response.status_code == 429:
45
  if current_retries < max_retries:
46
+ # Make wait time exponential
47
+ wait_time = 5 * (2 ** current_retries)
48
+ print(f"Warning: Rate limit hit on {safe_url}. Attempt {current_retries + 1}/{max_retries}. Waiting for {wait_time} seconds...")
49
+ time.sleep(wait_time)
50
  # Recurse with an incremented retry counter
51
  return check_page_exists(safe_url, delay=delay, max_retries=max_retries, current_retries=current_retries + 1)
52
  else: