Spaces:

dejanseo
/

ai-detection-small

Running

App Files Files Community

dejanseo commited on Apr 17

Commit

bd98692

verified ·

1 Parent(s): 68d1553

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -35

app.py CHANGED Viewed

@@ -3,9 +3,9 @@ import torch
 import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import re
-import logging
-# Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -16,34 +16,38 @@ st.set_page_config(
     layout="wide"
 )
-# Logo
-st.markdown(
-    """
-    <a href="https://dejan.ai/" target="_blank">
-      <img src="https://dejan.ai/wp-content/uploads/2024/02/dejan-300x103.png" alt="DEJAN logo">
-    </a>
-    """,
-    unsafe_allow_html=True
 )
-# Custom font
 st.markdown("""
 <link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet">
 <style>
-  html, body, [class*="css"] {
-    font-family: 'Roboto', sans-serif;
-  }
 </style>
 """, unsafe_allow_html=True)
-@st.cache_resource
 def load_model_and_tokenizer(model_name):
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     dtype = torch.bfloat16 if (device.type == "cuda" and torch.cuda.is_bf16_supported()) else torch.float32
     model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=dtype)
     model.to(device)
     model.eval()
     return tokenizer, model, device
 MODEL_NAME = "dejanseo/ai-detection-small"
@@ -51,33 +55,33 @@ try:
     tokenizer, model, device = load_model_and_tokenizer(MODEL_NAME)
 except Exception as e:
     st.error(f"Error loading model: {e}")
-    logger.error("Failed to load model or tokenizer", exc_info=True)
     st.stop()
 # Labels
 LABELS = ["AI Content", "Human Content"]
-# Sentence splitter
 def sent_tokenize(text):
     sentences = re.split(r'(?<=[\.!?])\s+', text.strip())
     return [s for s in sentences if s]
 # UI
 st.title("AI Article Detection")
-text = st.text_area("Enter text to classify", height=200)
 if st.button("Classify", type="primary"):
-    if not text.strip():
         st.warning("Please enter some text.")
     else:
-        with st.spinner("Analyzing..."):
             try:
                 sentences = sent_tokenize(text)
                 if not sentences:
                     st.warning("No sentences detected.")
                     st.stop()
-                # Tokenize each sentence
                 inputs = tokenizer(
                     sentences,
                     return_tensors="pt",
@@ -90,21 +94,17 @@ if st.button("Classify", type="primary"):
                 with torch.no_grad():
                     outputs = model(**inputs)
                     logits = outputs.logits
-                    probs = F.softmax(logits, dim=-1).cpu()  # shape [n_sentences, 2]
                     preds = torch.argmax(probs, dim=-1).cpu()
                 # Build inline styled text
                 styled_chunks = []
                 for i, sent in enumerate(sentences):
                     pred = preds[i].item()
-                    # select color channel
-                    if pred == 0:
-                        r, g = 255, 0   # red for AI
-                    else:
-                        r, g = 0, 255   # green for Human
-                    confidence = probs[i, pred].item()  # between 0 and 1
-                    alpha = confidence  # drive opacity directly
-                    # wrap sentence in span
                     span = (
                         f"<span "
                         f"style='background-color: rgba({r},{g},0,{alpha:.2f}); "
@@ -114,15 +114,14 @@ if st.button("Classify", type="primary"):
                     )
                     styled_chunks.append(span)
-                # join all sentences inline
                 full_text_html = "".join(styled_chunks)
                 st.markdown(full_text_html, unsafe_allow_html=True)
-                # Overall AI likelihood
                 avg_probs = torch.mean(probs, dim=0)
-                ai_likelihood = avg_probs[0].item() * 100  # class 0 is AI
                 st.subheader(f"🤖 AI Likelihood: {ai_likelihood:.1f}%")
             except Exception as e:
-                st.error(f"Analysis error: {e}")
-                logger.error("Classification failed", exc_info=True)

 import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import re
+import logging  # Optional: Add logging for better debugging
+# Set up logging (optional but helpful)
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
     layout="wide"
 )
+# Logo as provided
+st.logo(
+    image="https://dejan.ai/wp-content/uploads/2024/02/dejan-300x103.png",
+    link="https://dejan.ai/",
 )
+# Font styling
 st.markdown("""
 <link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet">
 <style>
+    html, body, [class*="css"] {
+        font-family: 'Roboto', sans-serif;
+    }
 </style>
 """, unsafe_allow_html=True)
+@st.cache_resource  # Cache the model and tokenizer to avoid reloading on every interaction
 def load_model_and_tokenizer(model_name):
+    """Loads the model and tokenizer."""
+    logger.info(f"Loading tokenizer: {model_name}")
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     dtype = torch.bfloat16 if (device.type == "cuda" and torch.cuda.is_bf16_supported()) else torch.float32
+    logger.info(f"Using device: {device} with dtype: {dtype}")
+    logger.info(f"Loading model: {model_name}")
     model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=dtype)
     model.to(device)
     model.eval()
+    logger.info("Model loaded successfully.")
     return tokenizer, model, device
 MODEL_NAME = "dejanseo/ai-detection-small"
     tokenizer, model, device = load_model_and_tokenizer(MODEL_NAME)
 except Exception as e:
     st.error(f"Error loading model: {e}")
+    logger.error(f"Failed to load model or tokenizer: {e}", exc_info=True)
     st.stop()
 # Labels
 LABELS = ["AI Content", "Human Content"]
+# Regex-based sentence splitter
 def sent_tokenize(text):
     sentences = re.split(r'(?<=[\.!?])\s+', text.strip())
     return [s for s in sentences if s]
 # UI
 st.title("AI Article Detection")
+text = st.text_area("Enter text to classify", height=200, placeholder="Paste your text here...")
 if st.button("Classify", type="primary"):
+    if not text or not text.strip():
         st.warning("Please enter some text.")
     else:
+        with st.spinner("Analyzing... Please wait."):
             try:
                 sentences = sent_tokenize(text)
                 if not sentences:
                     st.warning("No sentences detected.")
                     st.stop()
+                # Tokenize sentences
                 inputs = tokenizer(
                     sentences,
                     return_tensors="pt",
                 with torch.no_grad():
                     outputs = model(**inputs)
                     logits = outputs.logits
+                    probs = F.softmax(logits, dim=-1).cpu()  # [n_sentences, 2]
                     preds = torch.argmax(probs, dim=-1).cpu()
                 # Build inline styled text
                 styled_chunks = []
                 for i, sent in enumerate(sentences):
                     pred = preds[i].item()
+                    # red for AI (class 0), green for Human (class 1)
+                    r, g = (255, 0) if pred == 0 else (0, 255)
+                    confidence = probs[i, pred].item()  # 0.0–1.0
+                    alpha = confidence  # opacity
                     span = (
                         f"<span "
                         f"style='background-color: rgba({r},{g},0,{alpha:.2f}); "
                     )
                     styled_chunks.append(span)
                 full_text_html = "".join(styled_chunks)
                 st.markdown(full_text_html, unsafe_allow_html=True)
+                # Overall AI likelihood (class 0)
                 avg_probs = torch.mean(probs, dim=0)
+                ai_likelihood = avg_probs[0].item() * 100
                 st.subheader(f"🤖 AI Likelihood: {ai_likelihood:.1f}%")
             except Exception as e:
+                st.error(f"An error occurred during analysis: {e}")
+                logger.error("Analysis failed", exc_info=True)