from __future__ import annotations import os from pathlib import Path from typing import Optional, Dict, Any, Tuple import numpy as np import pandas as pd import streamlit as st import joblib from apify_client import ApifyClient # ---------- Page setup ---------- st.set_page_config( page_title="Fake Instagram Profile Detector", page_icon="🕵️‍♂️", layout="centered", initial_sidebar_state="expanded", ) # ---------- Minimal styling ---------- st.markdown(""" """, unsafe_allow_html=True) # ---------- Config & Secrets ---------- def get_apify_token() -> Optional[str]: # Prefer Streamlit secrets; fallback to env var; last resort None token = st.secrets.get("APIFY_TOKEN", None) if hasattr(st, "secrets") else None return token or os.getenv("APIFY_TOKEN") # don't hardcode into source code APIFY_ACTOR_ID = "dSCLg0C3YEZ83HzYX" # your actor id # If your actor expects a different input shape, adjust below. DEFAULT_RUN_INPUT_KEY = "usernames" # ---------- Model loading ---------- @st.cache_resource(show_spinner=False) def load_model() -> Any: # Load relative to this file to avoid CWD issues here = Path(__file__).resolve().parent model_path = here / "classifier.pkl" # place classifier.pkl inside src/ if not model_path.exists(): raise FileNotFoundError(f"Model not found at: {model_path}") return joblib.load(model_path) model = None model_load_error = None try: with st.spinner("Loading model..."): model = load_model() except Exception as e: model_load_error = str(e) # ---------- Apify helpers ---------- @st.cache_data(show_spinner=False, ttl=60) # cache for a minute def fetch_instagram_profile(username: str, token: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]: try: client = ApifyClient(token) run_input = {DEFAULT_RUN_INPUT_KEY: [username]} run = client.actor(APIFY_ACTOR_ID).call(run_input=run_input) dataset = client.dataset(run["defaultDatasetId"]) # We'll take the first item that matches for item in dataset.iterate_items(): # normalize keys we care about out = { "postsCount": item.get("postsCount"), "followersCount": item.get("followersCount"), "followsCount": item.get("followsCount"), "private": item.get("private"), "verified": item.get("verified"), } return out, None return None, "No data returned for this username." except Exception as e: return None, f"Apify error: {e}" def to_numeric_features(raw: Dict[str, Any]) -> Optional[np.ndarray]: try: posts = int(raw.get("postsCount")) if raw.get("postsCount") is not None else None followers = int(raw.get("followersCount")) if raw.get("followersCount") is not None else None follows = int(raw.get("followsCount")) if raw.get("followsCount") is not None else None private = 1 if bool(raw.get("private")) else 0 verified = 1 if bool(raw.get("verified")) else 0 if None in (posts, followers, follows): return None return np.array([[posts, followers, follows, private, verified]], dtype=np.float64) except Exception: return None def predict_with_model(X: np.ndarray) -> Dict[str, Any]: # Try to get probabilities if available; else binary prediction result: Dict[str, Any] = {} if hasattr(model, "predict_proba"): proba = model.predict_proba(X) # Assume class 1 = Real, class 0 = Fake (adjust if reversed in your model) # Try to find mapping if model has classes_ label_index = getattr(model, "classes_", np.array([0, 1])) # map probabilities to classes probs = dict(zip(label_index.tolist(), proba[0].tolist())) result["proba_real"] = probs.get(1, None) result["proba_fake"] = probs.get(0, None) result["pred"] = int(model.predict(X)[0]) else: y = int(model.predict(X)[0]) result["pred"] = y result["proba_real"] = None result["proba_fake"] = None return result # ---------- Sidebar ---------- with st.sidebar: st.header("⚙️ Settings") st.caption("Configure how the app connects and behaves.") token = get_apify_token() if not token: token = st.text_input("Apify API token (not saved)", type="password", placeholder="APIFY_...") st.divider() st.markdown("**About**") st.write( "This app checks basic Instagram profile signals " "and runs a classifier to estimate whether an account looks fake or real." ) st.markdown( 'For demo/educational purposes only. ' 'Always verify results with additional signals.', unsafe_allow_html=True ) # ---------- Header ---------- st.title("🕵️‍♂️ Fake Instagram Profile Detector") st.write("Enter a username and we’ll fetch basic public signals, then run a trained model to estimate risk.") if model_load_error: st.error(f"Model failed to load: {model_load_error}") st.stop() # ---------- Main Form ---------- with st.form("username_form", clear_on_submit=False): username = st.text_input("Instagram Username", placeholder="e.g., nasa") submitted = st.form_submit_button("Analyze") if not submitted: st.info("Enter a username and click **Analyze** to get started.") st.stop() # ---------- Validation ---------- if not username.strip(): st.warning("Please provide a username.") st.stop() if not token: st.error("Missing Apify token. Add it to `.streamlit/secrets.toml` as `APIFY_TOKEN` or set the env var.") st.stop() # ---------- Fetch & Predict ---------- with st.spinner("Fetching profile data..."): raw_data, fetch_err = fetch_instagram_profile(username.strip(), token) if fetch_err: st.error(fetch_err) st.stop() if not raw_data: st.warning("No data found. Double-check the username.") st.stop() # KPIs st.subheader(f"Profile Signals — @{username}") c1, c2, c3 = st.columns(3) c4, c5 = st.columns(2) with c1: st.markdown('

Posts

' f'

{raw_data["postsCount"] if raw_data["postsCount"] is not None else "—"}

', unsafe_allow_html=True) with c2: st.markdown('

Followers

' f'

{raw_data["followersCount"] if raw_data["followersCount"] is not None else "—"}

', unsafe_allow_html=True) with c3: st.markdown('

Following

' f'

{raw_data["followsCount"] if raw_data["followsCount"] is not None else "—"}

', unsafe_allow_html=True) with c4: private_pill = 'Private' if raw_data.get("private") else 'Public' st.markdown(f'

Privacy

{private_pill}

', unsafe_allow_html=True) with c5: verified_pill = 'Verified' if raw_data.get("verified") else 'Not Verified' st.markdown(f'

Verification

{verified_pill}

', unsafe_allow_html=True) # Prepare features X = to_numeric_features(raw_data) if X is None: st.error("Insufficient numeric data to run the classifier (missing posts/followers/following).") st.stop() with st.spinner("Running prediction..."): out = predict_with_model(X) pred = out["pred"] proba_real = out.get("proba_real") proba_fake = out.get("proba_fake") # ---------- Verdict ---------- st.subheader("Verdict") if raw_data.get("postsCount") is None: st.error("The user may not exist or data could not be fetched.") elif pred == 0: st.error("The account is **likely to be Fake**.") else: st.success("The account is **likely to be Real**.") # ---------- Confidence ---------- if (proba_real is not None) or (proba_fake is not None): st.write("**Confidence**") cc1, cc2 = st.columns(2) with cc1: st.metric("Probability: Real", f"{(proba_real or 0)*100:0.1f}%") with cc2: st.metric("Probability: Fake", f"{(proba_fake or 0)*100:0.1f}%") # ---------- Raw data (expandable) ---------- with st.expander("See fetched features"): st.json(raw_data) st.caption("⚠️ This tool provides an indicative score. Use responsibly and verify via additional checks.")