Spaces:
Running
Running
Refactor evaluation interface in evaluation_interface.py: Improve code structure and readability
Browse files- web/evaluation_interface.py +31 -30
web/evaluation_interface.py
CHANGED
|
@@ -111,37 +111,38 @@ def load_qa_pair_for_evaluation(conversation_id: str, evaluator: ChatEvaluator)
|
|
| 111 |
# Get all QA pairs
|
| 112 |
qa_pairs = evaluator.get_qa_pairs_for_evaluation(limit=1000)
|
| 113 |
|
| 114 |
-
#
|
| 115 |
-
|
| 116 |
-
if pair.get("conversation_id") == conversation_id:
|
| 117 |
-
question = pair.get("question", "")
|
| 118 |
-
original_answer = pair.get("original_answer", "")
|
| 119 |
-
|
| 120 |
-
# Check if there's an existing annotation
|
| 121 |
-
annotation = evaluator.get_annotation_by_conversation_id(conversation_id)
|
| 122 |
-
|
| 123 |
-
if annotation:
|
| 124 |
-
ratings = annotation.get("ratings", {})
|
| 125 |
-
improved_answer = annotation.get("improved_answer", original_answer)
|
| 126 |
-
notes = annotation.get("notes", "")
|
| 127 |
-
|
| 128 |
-
# Get individual ratings with default value of 3
|
| 129 |
-
accuracy = ratings.get("accuracy", 3)
|
| 130 |
-
completeness = ratings.get("completeness", 3)
|
| 131 |
-
relevance = ratings.get("relevance", 3)
|
| 132 |
-
clarity = ratings.get("clarity", 3)
|
| 133 |
-
legal_correctness = ratings.get("legal_correctness", 3)
|
| 134 |
-
|
| 135 |
-
return (question, original_answer, improved_answer,
|
| 136 |
-
accuracy, completeness, relevance, clarity,
|
| 137 |
-
legal_correctness, notes)
|
| 138 |
-
|
| 139 |
-
# Return default values for new evaluation
|
| 140 |
-
return (question, original_answer, original_answer,
|
| 141 |
-
3, 3, 3, 3, 3, "") # Default rating of 3 for all criteria
|
| 142 |
|
| 143 |
-
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
def save_evaluation(
|
| 147 |
conversation_id: str,
|
|
|
|
| 111 |
# Get all QA pairs
|
| 112 |
qa_pairs = evaluator.get_qa_pairs_for_evaluation(limit=1000)
|
| 113 |
|
| 114 |
+
# Get existing annotation if any
|
| 115 |
+
annotation = evaluator.get_annotation(conversation_id) # Changed from get_annotation_by_conversation_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
+
if annotation:
|
| 118 |
+
return (
|
| 119 |
+
annotation.get("question", ""),
|
| 120 |
+
annotation.get("original_answer", ""),
|
| 121 |
+
annotation.get("improved_answer", ""),
|
| 122 |
+
annotation.get("ratings", {}).get("accuracy", 1),
|
| 123 |
+
annotation.get("ratings", {}).get("completeness", 1),
|
| 124 |
+
annotation.get("ratings", {}).get("relevance", 1),
|
| 125 |
+
annotation.get("ratings", {}).get("clarity", 1),
|
| 126 |
+
annotation.get("ratings", {}).get("legal_correctness", 1),
|
| 127 |
+
annotation.get("notes", "")
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# If no annotation exists, find the conversation in QA pairs
|
| 131 |
+
for qa_pair in qa_pairs:
|
| 132 |
+
if qa_pair.get("conversation_id") == conversation_id:
|
| 133 |
+
return (
|
| 134 |
+
qa_pair.get("question", ""),
|
| 135 |
+
qa_pair.get("answer", ""),
|
| 136 |
+
"", # Empty improved answer
|
| 137 |
+
1, # Default ratings
|
| 138 |
+
1,
|
| 139 |
+
1,
|
| 140 |
+
1,
|
| 141 |
+
1,
|
| 142 |
+
"" # Empty notes
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
return "", "", "", 1, 1, 1, 1, 1, "" # Return empty values if not found
|
| 146 |
|
| 147 |
def save_evaluation(
|
| 148 |
conversation_id: str,
|