Shashank2k3's picture
Update src/streamlit_app.py
f702bfb verified
raw
history blame
9.13 kB
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional, Dict, Any, Tuple
import numpy as np
import pandas as pd
import streamlit as st
import joblib
from apify_client import ApifyClient
# ---------- Page setup ----------
st.set_page_config(
page_title="Fake Instagram Profile Detector",
page_icon="πŸ•΅οΈβ€β™‚οΈ",
layout="centered",
initial_sidebar_state="expanded",
)
# ---------- Minimal styling ----------
st.markdown("""
<style>
/* Make it feel app-like */
.reportview-container .main .block-container {padding-top: 2rem; padding-bottom: 2rem;}
.small-muted {font-size: 0.9rem; color: rgba(0,0,0,0.55);}
.kpi {padding: 0.75rem 1rem; border-radius: 0.75rem; border: 1px solid rgba(0,0,0,0.08);}
.status-pill {display:inline-block; padding: .25rem .6rem; border-radius: 999px; font-weight:600;}
.status-ok {background:#E7F6EC; color:#137333;}
.status-bad {background:#FCE8E6; color:#B3261E;}
.status-warn {background:#FFF4E5; color:#8A4D00;}
</style>
""", unsafe_allow_html=True)
# ---------- Config & Secrets ----------
def get_apify_token() -> Optional[str]:
# Prefer Streamlit secrets; fallback to env var; last resort None
token = st.secrets.get("APIFY_TOKEN", None) if hasattr(st, "secrets") else None
return token or os.getenv("APIFY_TOKEN") # don't hardcode into source code
APIFY_ACTOR_ID = "dSCLg0C3YEZ83HzYX" # your actor id
# If your actor expects a different input shape, adjust below.
DEFAULT_RUN_INPUT_KEY = "usernames"
# ---------- Model loading ----------
@st.cache_resource(show_spinner=False)
def load_model() -> Any:
# Load relative to this file to avoid CWD issues
here = Path(__file__).resolve().parent
model_path = here / "classifier.pkl" # place classifier.pkl inside src/
if not model_path.exists():
raise FileNotFoundError(f"Model not found at: {model_path}")
return joblib.load(model_path)
model = None
model_load_error = None
try:
with st.spinner("Loading model..."):
model = load_model()
except Exception as e:
model_load_error = str(e)
# ---------- Apify helpers ----------
@st.cache_data(show_spinner=False, ttl=60) # cache for a minute
def fetch_instagram_profile(username: str, token: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
try:
client = ApifyClient(token)
run_input = {DEFAULT_RUN_INPUT_KEY: [username]}
run = client.actor(APIFY_ACTOR_ID).call(run_input=run_input)
dataset = client.dataset(run["defaultDatasetId"])
# We'll take the first item that matches
for item in dataset.iterate_items():
# normalize keys we care about
out = {
"postsCount": item.get("postsCount"),
"followersCount": item.get("followersCount"),
"followsCount": item.get("followsCount"),
"private": item.get("private"),
"verified": item.get("verified"),
}
return out, None
return None, "No data returned for this username."
except Exception as e:
return None, f"Apify error: {e}"
def to_numeric_features(raw: Dict[str, Any]) -> Optional[np.ndarray]:
try:
posts = int(raw.get("postsCount")) if raw.get("postsCount") is not None else None
followers = int(raw.get("followersCount")) if raw.get("followersCount") is not None else None
follows = int(raw.get("followsCount")) if raw.get("followsCount") is not None else None
private = 1 if bool(raw.get("private")) else 0
verified = 1 if bool(raw.get("verified")) else 0
if None in (posts, followers, follows):
return None
return np.array([[posts, followers, follows, private, verified]], dtype=np.float64)
except Exception:
return None
def predict_with_model(X: np.ndarray) -> Dict[str, Any]:
# Try to get probabilities if available; else binary prediction
result: Dict[str, Any] = {}
if hasattr(model, "predict_proba"):
proba = model.predict_proba(X)
# Assume class 1 = Real, class 0 = Fake (adjust if reversed in your model)
# Try to find mapping if model has classes_
label_index = getattr(model, "classes_", np.array([0, 1]))
# map probabilities to classes
probs = dict(zip(label_index.tolist(), proba[0].tolist()))
result["proba_real"] = probs.get(1, None)
result["proba_fake"] = probs.get(0, None)
result["pred"] = int(model.predict(X)[0])
else:
y = int(model.predict(X)[0])
result["pred"] = y
result["proba_real"] = None
result["proba_fake"] = None
return result
# ---------- Sidebar ----------
with st.sidebar:
st.header("βš™οΈ Settings")
st.caption("Configure how the app connects and behaves.")
token = get_apify_token()
if not token:
token = st.text_input("Apify API token (not saved)", type="password", placeholder="APIFY_...")
st.divider()
st.markdown("**About**")
st.write(
"This app checks basic Instagram profile signals "
"and runs a classifier to estimate whether an account looks fake or real."
)
st.markdown(
'<span class="small-muted">For demo/educational purposes only. '
'Always verify results with additional signals.</span>',
unsafe_allow_html=True
)
# ---------- Header ----------
st.title("πŸ•΅οΈβ€β™‚οΈ Fake Instagram Profile Detector")
st.write("Enter a username and we’ll fetch basic public signals, then run a trained model to estimate risk.")
if model_load_error:
st.error(f"Model failed to load: {model_load_error}")
st.stop()
# ---------- Main Form ----------
with st.form("username_form", clear_on_submit=False):
username = st.text_input("Instagram Username", placeholder="e.g., nasa")
submitted = st.form_submit_button("Analyze")
if not submitted:
st.info("Enter a username and click **Analyze** to get started.")
st.stop()
# ---------- Validation ----------
if not username.strip():
st.warning("Please provide a username.")
st.stop()
if not token:
st.error("Missing Apify token. Add it to `.streamlit/secrets.toml` as `APIFY_TOKEN` or set the env var.")
st.stop()
# ---------- Fetch & Predict ----------
with st.spinner("Fetching profile data..."):
raw_data, fetch_err = fetch_instagram_profile(username.strip(), token)
if fetch_err:
st.error(fetch_err)
st.stop()
if not raw_data:
st.warning("No data found. Double-check the username.")
st.stop()
# KPIs
st.subheader(f"Profile Signals β€” @{username}")
c1, c2, c3 = st.columns(3)
c4, c5 = st.columns(2)
with c1:
st.markdown('<div class="kpi"><div class="small-muted">Posts</div>'
f'<h3>{raw_data["postsCount"] if raw_data["postsCount"] is not None else "β€”"}</h3></div>', unsafe_allow_html=True)
with c2:
st.markdown('<div class="kpi"><div class="small-muted">Followers</div>'
f'<h3>{raw_data["followersCount"] if raw_data["followersCount"] is not None else "β€”"}</h3></div>', unsafe_allow_html=True)
with c3:
st.markdown('<div class="kpi"><div class="small-muted">Following</div>'
f'<h3>{raw_data["followsCount"] if raw_data["followsCount"] is not None else "β€”"}</h3></div>', unsafe_allow_html=True)
with c4:
private_pill = '<span class="status-pill status-warn">Private</span>' if raw_data.get("private") else '<span class="status-pill status-ok">Public</span>'
st.markdown(f'<div class="kpi"><div class="small-muted">Privacy</div><div>{private_pill}</div></div>', unsafe_allow_html=True)
with c5:
verified_pill = '<span class="status-pill status-ok">Verified</span>' if raw_data.get("verified") else '<span class="status-pill status-bad">Not Verified</span>'
st.markdown(f'<div class="kpi"><div class="small-muted">Verification</div><div>{verified_pill}</div></div>', unsafe_allow_html=True)
# Prepare features
X = to_numeric_features(raw_data)
if X is None:
st.error("Insufficient numeric data to run the classifier (missing posts/followers/following).")
st.stop()
with st.spinner("Running prediction..."):
out = predict_with_model(X)
pred = out["pred"]
proba_real = out.get("proba_real")
proba_fake = out.get("proba_fake")
# ---------- Verdict ----------
st.subheader("Verdict")
if raw_data.get("postsCount") is None:
st.error("The user may not exist or data could not be fetched.")
elif pred == 0:
st.error("The account is **likely to be Fake**.")
else:
st.success("The account is **likely to be Real**.")
# ---------- Confidence ----------
if (proba_real is not None) or (proba_fake is not None):
st.write("**Confidence**")
cc1, cc2 = st.columns(2)
with cc1:
st.metric("Probability: Real", f"{(proba_real or 0)*100:0.1f}%")
with cc2:
st.metric("Probability: Fake", f"{(proba_fake or 0)*100:0.1f}%")
# ---------- Raw data (expandable) ----------
with st.expander("See fetched features"):
st.json(raw_data)
st.caption("⚠️ This tool provides an indicative score. Use responsibly and verify via additional checks.")