Spaces:

ApsidalSolid4
/

CITProjectAIDetector

Running

App Files Files Community

ApsidalSolid4 commited on Feb 20

Commit

dd69342

verified ·

1 Parent(s): 17941c3

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -67

app.py CHANGED Viewed

@@ -176,8 +176,8 @@ class TextClassifier:
             'num_windows': len(predictions)
         }
-    def predict_with_local_context(self, text: str) -> Dict:
-        """Enhanced prediction that maintains high confidence while preventing bleeding"""
         if self.model is None or self.tokenizer is None:
             self.load_model()
@@ -186,19 +186,21 @@ class TextClassifier:
         if not sentences:
             return {}
-        # Initialize scores for each sentence
-        sentence_predictions = []
-        # First pass: Get base predictions for each sentence
-        for i in range(len(sentences)):
-            # Get a small window around the current sentence
-            start_idx = max(0, i - 1)
-            end_idx = min(len(sentences), i + 2)
-            window = sentences[start_idx:end_idx]
-            # Get model prediction for this window
             inputs = self.tokenizer(
-                " ".join(window),
                 truncation=True,
                 padding=True,
                 max_length=MAX_LENGTH,
@@ -208,11 +210,51 @@ class TextClassifier:
             with torch.no_grad():
                 outputs = self.model(**inputs)
                 probs = F.softmax(outputs.logits, dim=-1)
-                # Extract probabilities
-                human_prob = probs[0][1].item()
-                ai_prob = probs[0][0].item()
                 sentence_predictions.append({
                     'sentence': sentences[i],
                     'human_prob': human_prob,
@@ -221,55 +263,11 @@ class TextClassifier:
                     'confidence': max(human_prob, ai_prob)
                 })
-            del inputs, outputs, probs
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-        # Second pass: Minimal smoothing only at significant prediction boundaries
-        smoothed_predictions = []
-        for i in range(len(sentence_predictions)):
-            pred = sentence_predictions[i].copy()
-            # Only apply smoothing if this sentence is at a prediction boundary
-            if i > 0 and i < len(sentence_predictions) - 1:
-                prev_pred = sentence_predictions[i-1]
-                next_pred = sentence_predictions[i+1]
-                # Check if we're at a prediction boundary
-                at_boundary = (
-                    pred['prediction'] != prev_pred['prediction'] or
-                    pred['prediction'] != next_pred['prediction']
-                )
-                if at_boundary:
-                    # Calculate average confidence of neighbors
-                    neighbor_conf = (prev_pred['confidence'] + next_pred['confidence']) / 2
-                    # If neighbors are very confident and different from current prediction,
-                    # slightly adjust current prediction
-                    if neighbor_conf > 0.85 and pred['confidence'] < 0.75:
-                        # Adjust probabilities slightly toward neighbors
-                        weight = 0.15  # Small adjustment weight
-                        pred['human_prob'] = (
-                            pred['human_prob'] * (1 - weight) +
-                            ((prev_pred['human_prob'] + next_pred['human_prob']) / 2) * weight
-                        )
-                        pred['ai_prob'] = (
-                            pred['ai_prob'] * (1 - weight) +
-                            ((prev_pred['ai_prob'] + next_pred['ai_prob']) / 2) * weight
-                        )
-                        # Update prediction and confidence
-                        pred['prediction'] = 'human' if pred['human_prob'] > pred['ai_prob'] else 'ai'
-                        pred['confidence'] = max(pred['human_prob'], pred['ai_prob'])
-            smoothed_predictions.append(pred)
         return {
-            'sentence_predictions': smoothed_predictions,
-            'highlighted_text': self.format_predictions_html(smoothed_predictions),
             'full_text': text,
-            'overall_prediction': self.aggregate_predictions(smoothed_predictions)
         }
     def detailed_scan(self, text: str) -> Dict:
@@ -436,7 +434,7 @@ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
             quick_analysis
         )
     else:
-        analysis = classifier.predict_with_local_context(text)
         detailed_analysis = []
         for pred in analysis['sentence_predictions']:

             'num_windows': len(predictions)
         }
+    def detailed_scan(self, text: str) -> Dict:
+        """Original prediction method with modified window handling"""
         if self.model is None or self.tokenizer is None:
             self.load_model()
         if not sentences:
             return {}
+        # Create centered windows for each sentence
+        windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
+        # Track scores for each sentence
+        sentence_appearances = {i: 0 for i in range(len(sentences))}
+        sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0} for i in range(len(sentences))}
+        # Process windows in batches
+        batch_size = 16
+        for i in range(0, len(windows), batch_size):
+            batch_windows = windows[i:i + batch_size]
+            batch_indices = window_sentence_indices[i:i + batch_size]
             inputs = self.tokenizer(
+                batch_windows,
                 truncation=True,
                 padding=True,
                 max_length=MAX_LENGTH,
             with torch.no_grad():
                 outputs = self.model(**inputs)
                 probs = F.softmax(outputs.logits, dim=-1)
+                # Attribute predictions more carefully
+                for window_idx, indices in enumerate(batch_indices):
+                    center_idx = len(indices) // 2
+                    center_weight = 0.7  # Higher weight for center sentence
+                    edge_weight = 0.3 / (len(indices) - 1)  # Distribute remaining weight
+                    for pos, sent_idx in enumerate(indices):
+                        # Apply higher weight to center sentence
+                        weight = center_weight if pos == center_idx else edge_weight
+                        sentence_appearances[sent_idx] += weight
+                        sentence_scores[sent_idx]['human_prob'] += weight * probs[window_idx][1].item()
+                        sentence_scores[sent_idx]['ai_prob'] += weight * probs[window_idx][0].item()
+            del inputs, outputs, probs
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        # Calculate final predictions
+        sentence_predictions = []
+        for i in range(len(sentences)):
+            if sentence_appearances[i] > 0:
+                human_prob = sentence_scores[i]['human_prob'] / sentence_appearances[i]
+                ai_prob = sentence_scores[i]['ai_prob'] / sentence_appearances[i]
+                # Only apply minimal smoothing at prediction boundaries
+                if i > 0 and i < len(sentences) - 1:
+                    prev_human = sentence_scores[i-1]['human_prob'] / sentence_appearances[i-1]
+                    prev_ai = sentence_scores[i-1]['ai_prob'] / sentence_appearances[i-1]
+                    next_human = sentence_scores[i+1]['human_prob'] / sentence_appearances[i+1]
+                    next_ai = sentence_scores[i+1]['ai_prob'] / sentence_appearances[i+1]
+                    # Check if we're at a prediction boundary
+                    current_pred = 'human' if human_prob > ai_prob else 'ai'
+                    prev_pred = 'human' if prev_human > prev_ai else 'ai'
+                    next_pred = 'human' if next_human > next_ai else 'ai'
+                    if current_pred != prev_pred or current_pred != next_pred:
+                        # Small adjustment at boundaries
+                        smooth_factor = 0.1
+                        human_prob = (human_prob * (1 - smooth_factor) +
+                                    (prev_human + next_human) * smooth_factor / 2)
+                        ai_prob = (ai_prob * (1 - smooth_factor) +
+                                (prev_ai + next_ai) * smooth_factor / 2)
                 sentence_predictions.append({
                     'sentence': sentences[i],
                     'human_prob': human_prob,
                     'confidence': max(human_prob, ai_prob)
                 })
         return {
+            'sentence_predictions': sentence_predictions,
+            'highlighted_text': self.format_predictions_html(sentence_predictions),
             'full_text': text,
+            'overall_prediction': self.aggregate_predictions(sentence_predictions)
         }
     def detailed_scan(self, text: str) -> Dict:
             quick_analysis
         )
     else:
+                    analysis = classifier.predict_with_local_context(text)
         detailed_analysis = []
         for pred in analysis['sentence_predictions']: