from __future__ import annotations import os from pathlib import Path from typing import Optional, Dict, Any, Tuple import numpy as np import pandas as pd import streamlit as st import joblib from apify_client import ApifyClient # ---------- Page setup ---------- st.set_page_config( page_title="Fake Instagram Profile Detector", page_icon="🕵️‍♂️", layout="centered", ) # ---------- Minimal styling ---------- st.markdown(""" """, unsafe_allow_html=True) # ---------- Config & Secrets ---------- def get_apify_token(): import os, streamlit as st # 1) try env var env_token = os.getenv("APIFY_TOKEN") # 2) try secrets.toml, but don't crash if it's missing secret_token = None try: # access st.secrets inside try/except because it raises if file is missing secret_token = st.secrets.get("APIFY_TOKEN", None) # type: ignore except FileNotFoundError: secret_token = None except Exception: secret_token = None return secret_token or env_token APIFY_ACTOR_ID = "dSCLg0C3YEZ83HzYX" # your actor id # If your actor expects a different input shape, adjust below. DEFAULT_RUN_INPUT_KEY = "usernames" # ---------- Model loading ---------- @st.cache_resource(show_spinner=False) def load_model() -> Any: # Load relative to this file to avoid CWD issues here = Path(__file__).resolve().parent model_path = here / "classifier.pkl" # place classifier.pkl inside src/ if not model_path.exists(): raise FileNotFoundError(f"Model not found at: {model_path}") return joblib.load(model_path) model = None model_load_error = None try: with st.spinner("Loading model..."): model = load_model() except Exception as e: model_load_error = str(e) # ---------- Apify helpers ---------- @st.cache_data(show_spinner=False, ttl=60) # cache for a minute def fetch_instagram_profile(username: str, token: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]: try: client = ApifyClient(token) run_input = {DEFAULT_RUN_INPUT_KEY: [username]} run = client.actor(APIFY_ACTOR_ID).call(run_input=run_input) dataset = client.dataset(run["defaultDatasetId"]) # We'll take the first item that matches for item in dataset.iterate_items(): # normalize keys we care about out = { "postsCount": item.get("postsCount"), "followersCount": item.get("followersCount"), "followsCount": item.get("followsCount"), "private": item.get("private"), "verified": item.get("verified"), } return out, None return None, "No data returned for this username." except Exception as e: return None, f"Apify error: {e}" def to_numeric_features(raw: Dict[str, Any]) -> Optional[np.ndarray]: try: posts = int(raw.get("postsCount")) if raw.get("postsCount") is not None else None followers = int(raw.get("followersCount")) if raw.get("followersCount") is not None else None follows = int(raw.get("followsCount")) if raw.get("followsCount") is not None else None private = 1 if bool(raw.get("private")) else 0 verified = 1 if bool(raw.get("verified")) else 0 if None in (posts, followers, follows): return None return np.array([[posts, followers, follows, private, verified]], dtype=np.float64) except Exception: return None def predict_with_model(X: np.ndarray) -> Dict[str, Any]: # Try to get probabilities if available; else binary prediction result: Dict[str, Any] = {} if hasattr(model, "predict_proba"): proba = model.predict_proba(X) # Assume class 1 = Real, class 0 = Fake (adjust if reversed in your model) # Try to find mapping if model has classes_ label_index = getattr(model, "classes_", np.array([0, 1])) # map probabilities to classes probs = dict(zip(label_index.tolist(), proba[0].tolist())) result["proba_real"] = probs.get(1, None) result["proba_fake"] = probs.get(0, None) result["pred"] = int(model.predict(X)[0]) else: y = int(model.predict(X)[0]) result["pred"] = y result["proba_real"] = None result["proba_fake"] = None return result # ---------- Header ---------- st.title("🕵️‍♂️ Fake Instagram Profile Detector") st.write("Enter a username and we’ll fetch basic public signals, then run a trained model to estimate risk.") if model_load_error: st.error(f"Model failed to load: {model_load_error}") st.stop() # ---------- Main Form ---------- with st.form("username_form", clear_on_submit=False): username = st.text_input("Instagram Username", placeholder="e.g., nasa") submitted = st.form_submit_button("Analyze") if not submitted: st.info("Enter a username and click **Analyze** to get started.") st.stop() # ---------- Validation ---------- if not username.strip(): st.warning("Please provide a username.") st.stop() if not token: st.error("Missing Apify token. Add it to `.streamlit/secrets.toml` as `APIFY_TOKEN` or set the env var.") st.stop() # ---------- Fetch & Predict ---------- with st.spinner("Fetching profile data..."): raw_data, fetch_err = fetch_instagram_profile(username.strip(), token) if fetch_err: st.error(fetch_err) st.stop() if not raw_data: st.warning("No data found. Double-check the username.") st.stop() # KPIs st.subheader(f"Profile Signals — @{username}") c1, c2, c3 = st.columns(3) c4, c5 = st.columns(2) with c1: st.markdown('
Posts
' f'

{raw_data["postsCount"] if raw_data["postsCount"] is not None else "—"}

', unsafe_allow_html=True) with c2: st.markdown('
Followers
' f'

{raw_data["followersCount"] if raw_data["followersCount"] is not None else "—"}

', unsafe_allow_html=True) with c3: st.markdown('
Following
' f'

{raw_data["followsCount"] if raw_data["followsCount"] is not None else "—"}

', unsafe_allow_html=True) with c4: private_pill = 'Private' if raw_data.get("private") else 'Public' st.markdown(f'
Privacy
{private_pill}
', unsafe_allow_html=True) with c5: verified_pill = 'Verified' if raw_data.get("verified") else 'Not Verified' st.markdown(f'
Verification
{verified_pill}
', unsafe_allow_html=True) # Prepare features X = to_numeric_features(raw_data) if X is None: st.error("Insufficient numeric data to run the classifier (missing posts/followers/following).") st.stop() with st.spinner("Running prediction..."): out = predict_with_model(X) pred = out["pred"] proba_real = out.get("proba_real") proba_fake = out.get("proba_fake") # ---------- Verdict ---------- st.subheader("Verdict") if raw_data.get("postsCount") is None: st.error("The user may not exist or data could not be fetched.") elif pred == 0: st.error("The account is **likely to be Fake**.") else: st.success("The account is **likely to be Real**.") # ---------- Confidence ---------- if (proba_real is not None) or (proba_fake is not None): st.write("**Confidence**") cc1, cc2 = st.columns(2) with cc1: st.metric("Probability: Real", f"{(proba_real or 0)*100:0.1f}%") with cc2: st.metric("Probability: Fake", f"{(proba_fake or 0)*100:0.1f}%") # ---------- Raw data (expandable) ---------- with st.expander("See fetched features"): st.json(raw_data) st.caption("⚠️ This tool provides an indicative score. Use responsibly and verify via additional checks.")