|
|
from __future__ import annotations |
|
|
import os |
|
|
from pathlib import Path |
|
|
from typing import Optional, Dict, Any, Tuple |
|
|
|
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import streamlit as st |
|
|
import joblib |
|
|
from apify_client import ApifyClient |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Fake Instagram Profile Detector", |
|
|
page_icon="π΅οΈββοΈ", |
|
|
layout="centered", |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
/* Make it feel app-like */ |
|
|
.reportview-container .main .block-container {padding-top: 2rem; padding-bottom: 2rem;} |
|
|
.small-muted {font-size: 0.9rem; color: rgba(0,0,0,0.55);} |
|
|
.kpi {padding: 0.75rem 1rem; border-radius: 0.75rem; border: 1px solid rgba(0,0,0,0.08);} |
|
|
.status-pill {display:inline-block; padding: .25rem .6rem; border-radius: 999px; font-weight:600;} |
|
|
.status-ok {background:#E7F6EC; color:#137333;} |
|
|
.status-bad {background:#FCE8E6; color:#B3261E;} |
|
|
.status-warn {background:#FFF4E5; color:#8A4D00;} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
def get_apify_token(): |
|
|
import os, streamlit as st |
|
|
|
|
|
env_token = os.getenv("APIFY_TOKEN") |
|
|
|
|
|
secret_token = None |
|
|
try: |
|
|
|
|
|
secret_token = st.secrets.get("APIFY_TOKEN", None) |
|
|
except FileNotFoundError: |
|
|
secret_token = None |
|
|
except Exception: |
|
|
secret_token = None |
|
|
return secret_token or env_token |
|
|
|
|
|
APIFY_ACTOR_ID = "dSCLg0C3YEZ83HzYX" |
|
|
|
|
|
DEFAULT_RUN_INPUT_KEY = "usernames" |
|
|
|
|
|
|
|
|
@st.cache_resource(show_spinner=False) |
|
|
def load_model() -> Any: |
|
|
|
|
|
here = Path(__file__).resolve().parent |
|
|
model_path = here / "classifier.pkl" |
|
|
if not model_path.exists(): |
|
|
raise FileNotFoundError(f"Model not found at: {model_path}") |
|
|
return joblib.load(model_path) |
|
|
|
|
|
model = None |
|
|
model_load_error = None |
|
|
try: |
|
|
with st.spinner("Loading model..."): |
|
|
model = load_model() |
|
|
except Exception as e: |
|
|
model_load_error = str(e) |
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner=False, ttl=60) |
|
|
def fetch_instagram_profile(username: str, token: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]: |
|
|
try: |
|
|
client = ApifyClient(token) |
|
|
run_input = {DEFAULT_RUN_INPUT_KEY: [username]} |
|
|
run = client.actor(APIFY_ACTOR_ID).call(run_input=run_input) |
|
|
dataset = client.dataset(run["defaultDatasetId"]) |
|
|
|
|
|
|
|
|
for item in dataset.iterate_items(): |
|
|
|
|
|
out = { |
|
|
"postsCount": item.get("postsCount"), |
|
|
"followersCount": item.get("followersCount"), |
|
|
"followsCount": item.get("followsCount"), |
|
|
"private": item.get("private"), |
|
|
"verified": item.get("verified"), |
|
|
} |
|
|
return out, None |
|
|
|
|
|
return None, "No data returned for this username." |
|
|
except Exception as e: |
|
|
return None, f"Apify error: {e}" |
|
|
|
|
|
def to_numeric_features(raw: Dict[str, Any]) -> Optional[np.ndarray]: |
|
|
try: |
|
|
posts = int(raw.get("postsCount")) if raw.get("postsCount") is not None else None |
|
|
followers = int(raw.get("followersCount")) if raw.get("followersCount") is not None else None |
|
|
follows = int(raw.get("followsCount")) if raw.get("followsCount") is not None else None |
|
|
private = 1 if bool(raw.get("private")) else 0 |
|
|
verified = 1 if bool(raw.get("verified")) else 0 |
|
|
if None in (posts, followers, follows): |
|
|
return None |
|
|
return np.array([[posts, followers, follows, private, verified]], dtype=np.float64) |
|
|
except Exception: |
|
|
return None |
|
|
|
|
|
def predict_with_model(X: np.ndarray) -> Dict[str, Any]: |
|
|
|
|
|
result: Dict[str, Any] = {} |
|
|
if hasattr(model, "predict_proba"): |
|
|
proba = model.predict_proba(X) |
|
|
|
|
|
|
|
|
label_index = getattr(model, "classes_", np.array([0, 1])) |
|
|
|
|
|
probs = dict(zip(label_index.tolist(), proba[0].tolist())) |
|
|
result["proba_real"] = probs.get(1, None) |
|
|
result["proba_fake"] = probs.get(0, None) |
|
|
result["pred"] = int(model.predict(X)[0]) |
|
|
else: |
|
|
y = int(model.predict(X)[0]) |
|
|
result["pred"] = y |
|
|
result["proba_real"] = None |
|
|
result["proba_fake"] = None |
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
st.title("π΅οΈββοΈ Fake Instagram Profile Detector") |
|
|
st.write("Enter a username and weβll fetch basic public signals, then run a trained model to estimate risk.") |
|
|
|
|
|
if model_load_error: |
|
|
st.error(f"Model failed to load: {model_load_error}") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
with st.form("username_form", clear_on_submit=False): |
|
|
username = st.text_input("Instagram Username", placeholder="e.g., nasa") |
|
|
submitted = st.form_submit_button("Analyze") |
|
|
|
|
|
if not submitted: |
|
|
st.info("Enter a username and click **Analyze** to get started.") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
if not username.strip(): |
|
|
st.warning("Please provide a username.") |
|
|
st.stop() |
|
|
|
|
|
if not token: |
|
|
st.error("Missing Apify token. Add it to `.streamlit/secrets.toml` as `APIFY_TOKEN` or set the env var.") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
with st.spinner("Fetching profile data..."): |
|
|
raw_data, fetch_err = fetch_instagram_profile(username.strip(), token) |
|
|
|
|
|
if fetch_err: |
|
|
st.error(fetch_err) |
|
|
st.stop() |
|
|
if not raw_data: |
|
|
st.warning("No data found. Double-check the username.") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
st.subheader(f"Profile Signals β @{username}") |
|
|
c1, c2, c3 = st.columns(3) |
|
|
c4, c5 = st.columns(2) |
|
|
|
|
|
with c1: |
|
|
st.markdown('<div class="kpi"><div class="small-muted">Posts</div>' |
|
|
f'<h3>{raw_data["postsCount"] if raw_data["postsCount"] is not None else "β"}</h3></div>', unsafe_allow_html=True) |
|
|
with c2: |
|
|
st.markdown('<div class="kpi"><div class="small-muted">Followers</div>' |
|
|
f'<h3>{raw_data["followersCount"] if raw_data["followersCount"] is not None else "β"}</h3></div>', unsafe_allow_html=True) |
|
|
with c3: |
|
|
st.markdown('<div class="kpi"><div class="small-muted">Following</div>' |
|
|
f'<h3>{raw_data["followsCount"] if raw_data["followsCount"] is not None else "β"}</h3></div>', unsafe_allow_html=True) |
|
|
|
|
|
with c4: |
|
|
private_pill = '<span class="status-pill status-warn">Private</span>' if raw_data.get("private") else '<span class="status-pill status-ok">Public</span>' |
|
|
st.markdown(f'<div class="kpi"><div class="small-muted">Privacy</div><div>{private_pill}</div></div>', unsafe_allow_html=True) |
|
|
|
|
|
with c5: |
|
|
verified_pill = '<span class="status-pill status-ok">Verified</span>' if raw_data.get("verified") else '<span class="status-pill status-bad">Not Verified</span>' |
|
|
st.markdown(f'<div class="kpi"><div class="small-muted">Verification</div><div>{verified_pill}</div></div>', unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
X = to_numeric_features(raw_data) |
|
|
if X is None: |
|
|
st.error("Insufficient numeric data to run the classifier (missing posts/followers/following).") |
|
|
st.stop() |
|
|
|
|
|
with st.spinner("Running prediction..."): |
|
|
out = predict_with_model(X) |
|
|
|
|
|
pred = out["pred"] |
|
|
proba_real = out.get("proba_real") |
|
|
proba_fake = out.get("proba_fake") |
|
|
|
|
|
|
|
|
st.subheader("Verdict") |
|
|
if raw_data.get("postsCount") is None: |
|
|
st.error("The user may not exist or data could not be fetched.") |
|
|
elif pred == 0: |
|
|
st.error("The account is **likely to be Fake**.") |
|
|
else: |
|
|
st.success("The account is **likely to be Real**.") |
|
|
|
|
|
|
|
|
if (proba_real is not None) or (proba_fake is not None): |
|
|
st.write("**Confidence**") |
|
|
cc1, cc2 = st.columns(2) |
|
|
with cc1: |
|
|
st.metric("Probability: Real", f"{(proba_real or 0)*100:0.1f}%") |
|
|
with cc2: |
|
|
st.metric("Probability: Fake", f"{(proba_fake or 0)*100:0.1f}%") |
|
|
|
|
|
|
|
|
with st.expander("See fetched features"): |
|
|
st.json(raw_data) |
|
|
|
|
|
st.caption("β οΈ This tool provides an indicative score. Use responsibly and verify via additional checks.") |
|
|
|