uparekh01151 commited on
Commit
9271fde
·
1 Parent(s): b62caf4

Reorder leaderboard columns with comprehensive display

Browse files
Files changed (2) hide show
  1. app.py +9 -3
  2. config/app.yaml +7 -1
app.py CHANGED
@@ -168,9 +168,15 @@ def run_evaluation(dataset_name: str, dialect: str, case_selection: str,
168
  model_name,
169
  formatting["composite_score"].format(result['composite_score']),
170
  formatting["correctness_exact"].format(result['correctness_exact']),
171
- formatting["exec_success"].format(result['exec_success']),
172
  formatting["result_match_f1"].format(result['result_match_f1']),
173
- formatting["latency_ms"].format(result['latency_ms'])
 
 
 
 
 
 
 
174
  ])
175
 
176
  detailed_results.append(f"""
@@ -195,7 +201,7 @@ def run_evaluation(dataset_name: str, dialect: str, case_selection: str,
195
  except Exception as e:
196
  error_msg = f"Error evaluating {model_name}: {str(e)}"
197
  print(error_msg)
198
- results.append([len(results) + 1, model_name, "ERROR", "ERROR", "ERROR", "ERROR", "ERROR"])
199
  detailed_results.append(f"**Error with {model_name}:** {error_msg}\n\n---\n")
200
 
201
  # Create results DataFrame using config
 
168
  model_name,
169
  formatting["composite_score"].format(result['composite_score']),
170
  formatting["correctness_exact"].format(result['correctness_exact']),
 
171
  formatting["result_match_f1"].format(result['result_match_f1']),
172
+ formatting["exec_success"].format(result['exec_success']),
173
+ formatting["latency_ms"].format(result['latency_ms']),
174
+ result['dataset_name'],
175
+ result['case_id'],
176
+ result['question'][:100] + "..." if len(result['question']) > 100 else result['question'],
177
+ result['reference_sql'][:100] + "..." if len(result['reference_sql']) > 100 else result['reference_sql'],
178
+ result['candidate_sql'][:100] + "..." if len(result['candidate_sql']) > 100 else result['candidate_sql'],
179
+ formatting["dialect_ok"].format(result['dialect_ok'])
180
  ])
181
 
182
  detailed_results.append(f"""
 
201
  except Exception as e:
202
  error_msg = f"Error evaluating {model_name}: {str(e)}"
203
  print(error_msg)
204
+ results.append([len(results) + 1, model_name, "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR"])
205
  detailed_results.append(f"**Error with {model_name}:** {error_msg}\n\n---\n")
206
 
207
  # Create results DataFrame using config
config/app.yaml CHANGED
@@ -34,9 +34,15 @@ leaderboard:
34
  - "Model"
35
  - "Composite Score"
36
  - "Correctness"
37
- - "Exec Success"
38
  - "Result F1"
 
39
  - "Latency"
 
 
 
 
 
 
40
 
41
  # Available SQL Dialects
42
  dialects:
 
34
  - "Model"
35
  - "Composite Score"
36
  - "Correctness"
 
37
  - "Result F1"
38
+ - "Exec Success"
39
  - "Latency"
40
+ - "Dataset"
41
+ - "Case ID"
42
+ - "Question"
43
+ - "Reference SQL"
44
+ - "Generated SQL"
45
+ - "Dialect OK"
46
 
47
  # Available SQL Dialects
48
  dialects: