Spaces:

ApsidalSolid4
/

CITProjectAIDetector

Running

App Files Files Community

ApsidalSolid4 commited on Feb 20

Commit

59eee68

verified ·

1 Parent(s): 53f5f55

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -2

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
 # Constants
 MAX_LENGTH = 512
 MODEL_NAME = "microsoft/deberta-v3-small"
-WINDOW_SIZE = 17
 WINDOW_OVERLAP = 2
 CONFIDENCE_THRESHOLD = 0.65
 BATCH_SIZE = 8  # Reduced batch size for CPU
@@ -176,6 +176,102 @@ class TextClassifier:
             'num_windows': len(predictions)
         }
     def detailed_scan(self, text: str) -> Dict:
         """Perform a detailed scan with improved sentence-level analysis."""
         if not text.strip():
@@ -340,7 +436,7 @@ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
             quick_analysis
         )
     else:
-        analysis = classifier.detailed_scan(text)
         detailed_analysis = []
         for pred in analysis['sentence_predictions']:

 # Constants
 MAX_LENGTH = 512
 MODEL_NAME = "microsoft/deberta-v3-small"
+WINDOW_SIZE = 6
 WINDOW_OVERLAP = 2
 CONFIDENCE_THRESHOLD = 0.65
 BATCH_SIZE = 8  # Reduced batch size for CPU
             'num_windows': len(predictions)
         }
+    def predict_with_local_context(self, text: str) -> Dict:
+        """Enhanced prediction that maintains high confidence while preventing bleeding"""
+        if self.model is None or self.tokenizer is None:
+            self.load_model()
+        self.model.eval()
+        sentences = self.processor.split_into_sentences(text)
+        if not sentences:
+            return {}
+        # Initialize scores for each sentence
+        sentence_predictions = []
+        # First pass: Get base predictions for each sentence
+        for i in range(len(sentences)):
+            # Get a small window around the current sentence
+            start_idx = max(0, i - 1)
+            end_idx = min(len(sentences), i + 2)
+            window = sentences[start_idx:end_idx]
+            # Get model prediction for this window
+            inputs = self.tokenizer(
+                " ".join(window),
+                truncation=True,
+                padding=True,
+                max_length=MAX_LENGTH,
+                return_tensors="pt"
+            ).to(self.device)
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+                probs = F.softmax(outputs.logits, dim=-1)
+                # Extract probabilities
+                human_prob = probs[0][1].item()
+                ai_prob = probs[0][0].item()
+                sentence_predictions.append({
+                    'sentence': sentences[i],
+                    'human_prob': human_prob,
+                    'ai_prob': ai_prob,
+                    'prediction': 'human' if human_prob > ai_prob else 'ai',
+                    'confidence': max(human_prob, ai_prob)
+                })
+            del inputs, outputs, probs
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        # Second pass: Minimal smoothing only at significant prediction boundaries
+        smoothed_predictions = []
+        for i in range(len(sentence_predictions)):
+            pred = sentence_predictions[i].copy()
+            # Only apply smoothing if this sentence is at a prediction boundary
+            if i > 0 and i < len(sentence_predictions) - 1:
+                prev_pred = sentence_predictions[i-1]
+                next_pred = sentence_predictions[i+1]
+                # Check if we're at a prediction boundary
+                at_boundary = (
+                    pred['prediction'] != prev_pred['prediction'] or
+                    pred['prediction'] != next_pred['prediction']
+                )
+                if at_boundary:
+                    # Calculate average confidence of neighbors
+                    neighbor_conf = (prev_pred['confidence'] + next_pred['confidence']) / 2
+                    # If neighbors are very confident and different from current prediction,
+                    # slightly adjust current prediction
+                    if neighbor_conf > 0.85 and pred['confidence'] < 0.75:
+                        # Adjust probabilities slightly toward neighbors
+                        weight = 0.15  # Small adjustment weight
+                        pred['human_prob'] = (
+                            pred['human_prob'] * (1 - weight) +
+                            ((prev_pred['human_prob'] + next_pred['human_prob']) / 2) * weight
+                        )
+                        pred['ai_prob'] = (
+                            pred['ai_prob'] * (1 - weight) +
+                            ((prev_pred['ai_prob'] + next_pred['ai_prob']) / 2) * weight
+                        )
+                        # Update prediction and confidence
+                        pred['prediction'] = 'human' if pred['human_prob'] > pred['ai_prob'] else 'ai'
+                        pred['confidence'] = max(pred['human_prob'], pred['ai_prob'])
+            smoothed_predictions.append(pred)
+        return {
+            'sentence_predictions': smoothed_predictions,
+            'highlighted_text': self.format_predictions_html(smoothed_predictions),
+            'full_text': text,
+            'overall_prediction': self.aggregate_predictions(smoothed_predictions)
+        }
     def detailed_scan(self, text: str) -> Dict:
         """Perform a detailed scan with improved sentence-level analysis."""
         if not text.strip():
             quick_analysis
         )
     else:
+                    analysis = classifier.predict_with_local_context(text)
         detailed_analysis = []
         for pred in analysis['sentence_predictions']: