Spaces:
Running
Running
| import os | |
| import json | |
| import ast | |
| import streamlit as st | |
| import torch | |
| import torch.nn.functional as F | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import re | |
| import math | |
| import logging | |
| import pandas as pd | |
| st.set_page_config( | |
| page_title="AI Article Detection by DEJAN", | |
| page_icon="🧠", | |
| layout="wide" | |
| ) | |
| st.logo( | |
| image="https://dejan.ai/wp-content/uploads/2024/02/dejan-300x103.png", | |
| link="https://dejan.ai/", | |
| ) | |
| # --- Load heuristic weights from environment secrets, with JSON→Python fallback --- | |
| def load_heuristic_weights(): | |
| def _load(env_key): | |
| raw = os.environ[env_key] | |
| try: | |
| return json.loads(raw) | |
| except json.JSONDecodeError: | |
| return ast.literal_eval(raw) | |
| ai = _load("AI_WEIGHTS_JSON") | |
| og = _load("OG_WEIGHTS_JSON") | |
| return ai, og | |
| AI_WEIGHTS, OG_WEIGHTS = load_heuristic_weights() | |
| SIGMOID_K = 0.5 | |
| def tokenize(text): | |
| return re.findall(r'\b[a-z]{2,}\b', text.lower()) | |
| def classify_text_likelihood(text: str) -> float: | |
| tokens = tokenize(text) | |
| if not tokens: | |
| return 0.5 | |
| ai_score = og_score = matched = 0 | |
| for t in tokens: | |
| aw = AI_WEIGHTS.get(t, 0) | |
| ow = OG_WEIGHTS.get(t, 0) | |
| if aw or ow: | |
| matched += 1 | |
| ai_score += aw | |
| og_score += ow | |
| if matched == 0: | |
| return 0.5 | |
| net = ai_score - og_score | |
| return 1 / (1 + math.exp(-SIGMOID_K * net)) | |
| # --- Logging & Streamlit setup --- | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| st.markdown(""" | |
| <link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet"> | |
| <style> | |
| html, body, [class*="css"] { | |
| font-family: 'Roboto', sans-serif; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def load_model_and_tokenizer(model_name): | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| dtype = torch.bfloat16 if (device.type=="cuda" and torch.cuda.is_bf16_supported()) else torch.float32 | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=dtype) | |
| model.to(device).eval() | |
| return tokenizer, model, device | |
| MODEL_NAME = "dejanseo/ai-cop" | |
| try: | |
| tokenizer, model, device = load_model_and_tokenizer(MODEL_NAME) | |
| except Exception as e: | |
| st.error(f"Error loading model: {e}") | |
| logger.error(f"Failed to load model: {e}", exc_info=True) | |
| st.stop() | |
| def sent_tokenize(text): | |
| return [s for s in re.split(r'(?<=[\.!?])\s+', text.strip()) if s] | |
| st.title("AI Article Detection") | |
| text = st.text_area("Enter text to classify", height=200, placeholder="Paste your text here…") | |
| if st.button("Classify", type="primary"): | |
| if not text.strip(): | |
| st.warning("Please enter some text.") | |
| else: | |
| with st.spinner("Analyzing…"): | |
| sentences = sent_tokenize(text) | |
| if not sentences: | |
| st.warning("No sentences detected.") | |
| st.stop() | |
| inputs = tokenizer( | |
| sentences, | |
| return_tensors="pt", | |
| padding=True, | |
| truncation=True, | |
| max_length=model.config.max_position_embeddings | |
| ).to(device) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| probs = F.softmax(logits, dim=-1).cpu() | |
| preds = torch.argmax(probs, dim=-1).cpu() | |
| # Create dataframe for sentences | |
| sentences_data = [] | |
| for i, s in enumerate(sentences): | |
| p = preds[i].item() | |
| conf = probs[i, p].item() | |
| label = "AI" if p == 0 else "Human" | |
| sentences_data.append({ | |
| "sentence": s, | |
| "classification": label, | |
| "confidence": conf | |
| }) | |
| # Display as dataframe with progress column | |
| df = pd.DataFrame(sentences_data) | |
| st.dataframe( | |
| df, | |
| column_config={ | |
| "sentence": st.column_config.TextColumn("Sentence"), | |
| "classification": st.column_config.TextColumn("Classification"), | |
| "confidence": st.column_config.ProgressColumn( | |
| "Confidence", | |
| help="Model's confidence in the classification", | |
| format="%.2f", | |
| min_value=0, | |
| max_value=1, | |
| ), | |
| }, | |
| hide_index=True, | |
| ) | |
| avg = torch.mean(probs, dim=0) | |
| model_ai = avg[0].item() | |
| heuristic_ai = classify_text_likelihood(text) | |
| combined = min(model_ai + heuristic_ai, 1.0) | |
| st.subheader(f"⚖️ AI Likelihood: {combined*100:.1f}%") | |
| st.write(f"🤖 Model: {model_ai*100:.1f}%") | |
| st.write(f"🛠️ Heuristic: {heuristic_ai*100:.1f}%") | |