DataEngEval

Sleeping

uparekh01151 commited on Sep 20

Commit

9271fde

1 Parent(s): b62caf4

Reorder leaderboard columns with comprehensive display

Files changed (2) hide show

app.py CHANGED Viewed

@@ -168,9 +168,15 @@ def run_evaluation(dataset_name: str, dialect: str, case_selection: str,
                 model_name,
                 formatting["composite_score"].format(result['composite_score']),
                 formatting["correctness_exact"].format(result['correctness_exact']),
-                formatting["exec_success"].format(result['exec_success']),
                 formatting["result_match_f1"].format(result['result_match_f1']),
-                formatting["latency_ms"].format(result['latency_ms'])
             ])
             detailed_results.append(f"""
@@ -195,7 +201,7 @@ def run_evaluation(dataset_name: str, dialect: str, case_selection: str,
         except Exception as e:
             error_msg = f"Error evaluating {model_name}: {str(e)}"
             print(error_msg)
-            results.append([len(results) + 1, model_name, "ERROR", "ERROR", "ERROR", "ERROR", "ERROR"])
             detailed_results.append(f"**Error with {model_name}:** {error_msg}\n\n---\n")
     # Create results DataFrame using config

                 model_name,
                 formatting["composite_score"].format(result['composite_score']),
                 formatting["correctness_exact"].format(result['correctness_exact']),
                 formatting["result_match_f1"].format(result['result_match_f1']),
+                formatting["exec_success"].format(result['exec_success']),
+                formatting["latency_ms"].format(result['latency_ms']),
+                result['dataset_name'],
+                result['case_id'],
+                result['question'][:100] + "..." if len(result['question']) > 100 else result['question'],
+                result['reference_sql'][:100] + "..." if len(result['reference_sql']) > 100 else result['reference_sql'],
+                result['candidate_sql'][:100] + "..." if len(result['candidate_sql']) > 100 else result['candidate_sql'],
+                formatting["dialect_ok"].format(result['dialect_ok'])
             ])
             detailed_results.append(f"""
         except Exception as e:
             error_msg = f"Error evaluating {model_name}: {str(e)}"
             print(error_msg)
+            results.append([len(results) + 1, model_name, "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR", "ERROR"])
             detailed_results.append(f"**Error with {model_name}:** {error_msg}\n\n---\n")
     # Create results DataFrame using config

config/app.yaml CHANGED Viewed

@@ -34,9 +34,15 @@ leaderboard:
       - "Model"
       - "Composite Score"
       - "Correctness"
-      - "Exec Success"
       - "Result F1"
       - "Latency"
 # Available SQL Dialects
 dialects:

       - "Model"
       - "Composite Score"
       - "Correctness"
       - "Result F1"
+      - "Exec Success"
       - "Latency"
+      - "Dataset"
+      - "Case ID"
+      - "Question"
+      - "Reference SQL"
+      - "Generated SQL"
+      - "Dialect OK"
 # Available SQL Dialects
 dialects: