Spaces:
Sleeping
Sleeping
Commit
·
9271fde
1
Parent(s):
b62caf4
Reorder leaderboard columns with comprehensive display
Browse files- app.py +9 -3
- config/app.yaml +7 -1
app.py
CHANGED
|
@@ -168,9 +168,15 @@ def run_evaluation(dataset_name: str, dialect: str, case_selection: str,
|
|
| 168 |
model_name,
|
| 169 |
formatting["composite_score"].format(result['composite_score']),
|
| 170 |
formatting["correctness_exact"].format(result['correctness_exact']),
|
| 171 |
-
formatting["exec_success"].format(result['exec_success']),
|
| 172 |
formatting["result_match_f1"].format(result['result_match_f1']),
|
| 173 |
-
formatting["
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
])
|
| 175 |
|
| 176 |
detailed_results.append(f"""
|
|
@@ -195,7 +201,7 @@ def run_evaluation(dataset_name: str, dialect: str, case_selection: str,
|
|
| 195 |
except Exception as e:
|
| 196 |
error_msg = f"Error evaluating {model_name}: {str(e)}"
|
| 197 |
print(error_msg)
|
| 198 |
-
results.append([len(results) + 1, model_name, "ERROR", "ERROR", "ERROR", "ERROR", "ERROR"])
|
| 199 |
detailed_results.append(f"**Error with {model_name}:** {error_msg}\n\n---\n")
|
| 200 |
|
| 201 |
# Create results DataFrame using config
|
|
|
|
| 168 |
model_name,
|
| 169 |
formatting["composite_score"].format(result['composite_score']),
|
| 170 |
formatting["correctness_exact"].format(result['correctness_exact']),
|
|
|
|
| 171 |
formatting["result_match_f1"].format(result['result_match_f1']),
|
| 172 |
+
formatting["exec_success"].format(result['exec_success']),
|
| 173 |
+
formatting["latency_ms"].format(result['latency_ms']),
|
| 174 |
+
result['dataset_name'],
|
| 175 |
+
result['case_id'],
|
| 176 |
+
result['question'][:100] + "..." if len(result['question']) > 100 else result['question'],
|
| 177 |
+
result['reference_sql'][:100] + "..." if len(result['reference_sql']) > 100 else result['reference_sql'],
|
| 178 |
+
result['candidate_sql'][:100] + "..." if len(result['candidate_sql']) > 100 else result['candidate_sql'],
|
| 179 |
+
formatting["dialect_ok"].format(result['dialect_ok'])
|
| 180 |
])
|
| 181 |
|
| 182 |
detailed_results.append(f"""
|
|
|
|
| 201 |
except Exception as e:
|
| 202 |
error_msg = f"Error evaluating {model_name}: {str(e)}"
|
| 203 |
print(error_msg)
|
| 204 |
+
results.append([len(results) + 1, model_name, "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR"])
|
| 205 |
detailed_results.append(f"**Error with {model_name}:** {error_msg}\n\n---\n")
|
| 206 |
|
| 207 |
# Create results DataFrame using config
|
config/app.yaml
CHANGED
|
@@ -34,9 +34,15 @@ leaderboard:
|
|
| 34 |
- "Model"
|
| 35 |
- "Composite Score"
|
| 36 |
- "Correctness"
|
| 37 |
-
- "Exec Success"
|
| 38 |
- "Result F1"
|
|
|
|
| 39 |
- "Latency"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
# Available SQL Dialects
|
| 42 |
dialects:
|
|
|
|
| 34 |
- "Model"
|
| 35 |
- "Composite Score"
|
| 36 |
- "Correctness"
|
|
|
|
| 37 |
- "Result F1"
|
| 38 |
+
- "Exec Success"
|
| 39 |
- "Latency"
|
| 40 |
+
- "Dataset"
|
| 41 |
+
- "Case ID"
|
| 42 |
+
- "Question"
|
| 43 |
+
- "Reference SQL"
|
| 44 |
+
- "Generated SQL"
|
| 45 |
+
- "Dialect OK"
|
| 46 |
|
| 47 |
# Available SQL Dialects
|
| 48 |
dialects:
|