toxic-comment-classifier_rlhf

Paused

App Files Files Community

JanviMl commited on Mar 25

Commit

685f359

verified ·

1 Parent(s): b69b713

Update metrics.py

Browse files

Files changed (1) hide show

metrics.py +101 -44

metrics.py CHANGED Viewed

@@ -1,59 +1,116 @@
 # metrics.py
-import torch
-from sentence_transformers import SentenceTransformer, util
-from transformers import pipeline
-# Load Sentence-BERT model for semantic similarity
-sentence_bert_model = SentenceTransformer('all-MiniLM-L6-v2')
-# Load a pre-trained emotion classifier
-emotion_classifier = pipeline("text-classification", model="bhadresh-savani/distilbert-base-uncased-emotion", top_k=None)
-def compute_semantic_similarity(original_comment, paraphrased_comment):
     """
-    Compute the semantic similarity between the original and paraphrased comments using Sentence-BERT.
-    Returns a score between 0 and 1 (higher is better).
     """
-    original_embedding = sentence_bert_model.encode(original_comment, convert_to_tensor=True)
-    paraphrased_embedding = sentence_bert_model.encode(paraphrased_comment, convert_to_tensor=True)
-    similarity_score = util.cos_sim(original_embedding, paraphrased_embedding)[0][0].item()
-    return round(similarity_score, 2)
-def compute_emotion_shift(original_comment, paraphrased_comment):
     """
-    Compute the shift in emotional tone between the original and paraphrased comments.
-    Returns the dominant emotion labels for both comments and a flag indicating if the shift is positive.
     """
-    # Classify emotions in the original comment
-    original_emotions = emotion_classifier(original_comment)
-    # Since pipeline returns a list of lists, take the first (and only) inner list
-    original_emotions = original_emotions[0] if isinstance(original_emotions, list) and original_emotions else []
-    original_dominant_emotion = max(original_emotions, key=lambda x: x['score'])['label'] if original_emotions else "unknown"
-    # Classify emotions in the paraphrased comment
-    paraphrased_emotions = emotion_classifier(paraphrased_comment)
-    paraphrased_emotions = paraphrased_emotions[0] if isinstance(paraphrased_emotions, list) and paraphrased_emotions else []
-    paraphrased_dominant_emotion = max(paraphrased_emotions, key=lambda x: x['score'])['label'] if paraphrased_emotions else "unknown"
-    # Define negative and positive emotions
-    negative_emotions = ['anger', 'sadness', 'fear']
-    positive_emotions = ['joy', 'love']
-    # Check if the shift is positive (e.g., from a negative emotion to a neutral/positive one)
-    is_positive_shift = (
-        original_dominant_emotion in negative_emotions and
-        (paraphrased_dominant_emotion in positive_emotions or paraphrased_dominant_emotion not in negative_emotions)
-    )
-    return original_dominant_emotion, paraphrased_dominant_emotion, is_positive_shift
-def compute_empathy_score(paraphrased_comment):
     """
-    Compute a proxy empathy score based on politeness keywords.
-    Returns a score between 0 and 1 (higher indicates more empathy).
     """
-    empathy_keywords = ['please', 'thank you', 'appreciate', 'understand', 'sorry', 'consider', 'kindly', 'help', 'support']
-    comment_lower = paraphrased_comment.lower()
-    keyword_count = sum(1 for keyword in empathy_keywords if keyword in comment_lower)
-    empathy_score = min(keyword_count / 3, 1.0)
-    return round(empathy_score, 2)

 # metrics.py
+import nltk
+from nltk.translate.bleu_score import sentence_bleu
+from rouge_score import rouge_scorer
+from model_loader import metrics_models
+# Download required NLTK data
+nltk.download('punkt')
+def compute_semantic_similarity(original, paraphrased):
     """
+    Compute semantic similarity between the original and paraphrased comment using Sentence-BERT.
+    Returns a similarity score between 0 and 1.
     """
+    try:
+        sentence_bert = metrics_models.load_sentence_bert()
+        embeddings = sentence_bert.encode([original, paraphrased])
+        similarity = float(embeddings[0] @ embeddings[1].T)
+        return round(similarity, 2)
+    except Exception as e:
+        print(f"Error computing semantic similarity: {str(e)}")
+        return None
+def compute_emotion_shift(original, paraphrased):
     """
+    Compute the emotion shift between the original and paraphrased comment.
+    Returns the original emotion, paraphrased emotion, and whether the shift is positive.
     """
+    try:
+        emotion_classifier = metrics_models.load_emotion_classifier()
+        original_emotions = emotion_classifier(original)
+        paraphrased_emotions = emotion_classifier(paraphrased)
+        # Get the top emotion for each
+        original_emotion = max(original_emotions[0], key=lambda x: x['score'])['label']
+        paraphrased_emotion = max(paraphrased_emotions[0], key=lambda x: x['score'])['label']
+        # Define negative and positive emotions
+        negative_emotions = ['anger', 'sadness', 'fear']
+        positive_emotions = ['joy', 'love', 'surprise']
+        # Determine if the shift is positive
+        emotion_shift_positive = (
+            (original_emotion in negative_emotions and paraphrased_emotion in positive_emotions) or
+            (original_emotion in negative_emotions and paraphrased_emotion not in negative_emotions)
+        )
+        return original_emotion, paraphrased_emotion, emotion_shift_positive
+    except Exception as e:
+        print(f"Error computing emotion shift: {str(e)}")
+        return None, None, None
+def compute_empathy_score(paraphrased):
+    """
+    Compute an empathy score for the paraphrased comment (placeholder).
+    Returns a score between 0 and 1.
+    """
+    try:
+        # Placeholder: Compute empathy based on word presence (e.g., "sorry", "understand")
+        empathy_words = ["sorry", "understand", "care", "help", "support"]
+        words = paraphrased.lower().split()
+        empathy_count = sum(1 for word in words if word in empathy_words)
+        score = empathy_count / len(words) if words else 0
+        return round(score, 2)
+    except Exception as e:
+        print(f"Error computing empathy score: {str(e)}")
+        return None
+def compute_bleu_score(original, paraphrased):
+    """
+    Compute the BLEU score between the original and paraphrased comment.
+    Returns a score between 0 and 1.
+    """
+    try:
+        reference = [nltk.word_tokenize(original.lower())]
+        candidate = nltk.word_tokenize(paraphrased.lower())
+        score = sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25))
+        return round(score, 2)
+    except Exception as e:
+        print(f"Error computing BLEU score: {str(e)}")
+        return None
+def compute_rouge_score(original, paraphrased):
+    """
+    Compute ROUGE scores (ROUGE-1, ROUGE-2, ROUGE-L) between the original and paraphrased comment.
+    Returns a dictionary with ROUGE scores.
+    """
+    try:
+        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
+        scores = scorer.score(original, paraphrased)
+        return {
+            'rouge1': round(scores['rouge1'].fmeasure, 2),
+            'rouge2': round(scores['rouge2'].fmeasure, 2),
+            'rougeL': round(scores['rougeL'].fmeasure, 2)
+        }
+    except Exception as e:
+        print(f"Error computing ROUGE scores: {str(e)}")
+        return None
+def compute_entailment_score(original, paraphrased):
     """
+    Compute the entailment score to check factual consistency using an NLI model.
+    Returns a score between 0 and 1.
     """
+    try:
+        nli_classifier = metrics_models.load_nli_classifier()
+        result = nli_classifier(
+            original,
+            paraphrased,
+            candidate_labels=["entailment", "contradiction", "neutral"]
+        )
+        entailment_score = next(score for label, score in zip(result['labels'], result['scores']) if label == "entailment")
+        return round(entailment_score, 2)
+    except Exception as e:
+        print(f"Error computing entailment score: {str(e)}")
+        return None