Spaces:

Shashank2k3
/

Fake-Profile-Detection-Instagram

Sleeping

App Files Files Community

Fake-Profile-Detection-Instagram / src /streamlit_app.py

Shashank2k3

Update src/streamlit_app.py

f702bfb verified 3 months ago

raw

history blame

9.13 kB

	from __future__ import annotations
	import os
	from pathlib import Path
	from typing import Optional, Dict, Any, Tuple

	import numpy as np
	import pandas as pd
	import streamlit as st
	import joblib
	from apify_client import ApifyClient

	# ---------- Page setup ----------
	st.set_page_config(
	page_title="Fake Instagram Profile Detector",
	page_icon="🕵️‍♂️",
	layout="centered",
	initial_sidebar_state="expanded",
	)

	# ---------- Minimal styling ----------
	st.markdown("""
	<style>
	/* Make it feel app-like */
	.reportview-container .main .block-container {padding-top: 2rem; padding-bottom: 2rem;}
	.small-muted {font-size: 0.9rem; color: rgba(0,0,0,0.55);}
	.kpi {padding: 0.75rem 1rem; border-radius: 0.75rem; border: 1px solid rgba(0,0,0,0.08);}
	.status-pill {display:inline-block; padding: .25rem .6rem; border-radius: 999px; font-weight:600;}
	.status-ok {background:#E7F6EC; color:#137333;}
	.status-bad {background:#FCE8E6; color:#B3261E;}
	.status-warn {background:#FFF4E5; color:#8A4D00;}
	</style>
	""", unsafe_allow_html=True)

	# ---------- Config & Secrets ----------
	def get_apify_token() -> Optional[str]:
	# Prefer Streamlit secrets; fallback to env var; last resort None
	token = st.secrets.get("APIFY_TOKEN", None) if hasattr(st, "secrets") else None
	return token or os.getenv("APIFY_TOKEN") # don't hardcode into source code

	APIFY_ACTOR_ID = "dSCLg0C3YEZ83HzYX" # your actor id
	# If your actor expects a different input shape, adjust below.
	DEFAULT_RUN_INPUT_KEY = "usernames"

	# ---------- Model loading ----------
	@st.cache_resource(show_spinner=False)
	def load_model() -> Any:
	# Load relative to this file to avoid CWD issues
	here = Path(__file__).resolve().parent
	model_path = here / "classifier.pkl" # place classifier.pkl inside src/
	if not model_path.exists():
	raise FileNotFoundError(f"Model not found at: {model_path}")
	return joblib.load(model_path)

	model = None
	model_load_error = None
	try:
	with st.spinner("Loading model..."):
	model = load_model()
	except Exception as e:
	model_load_error = str(e)

	# ---------- Apify helpers ----------
	@st.cache_data(show_spinner=False, ttl=60) # cache for a minute
	def fetch_instagram_profile(username: str, token: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
	try:
	client = ApifyClient(token)
	run_input = {DEFAULT_RUN_INPUT_KEY: [username]}
	run = client.actor(APIFY_ACTOR_ID).call(run_input=run_input)
	dataset = client.dataset(run["defaultDatasetId"])

	# We'll take the first item that matches
	for item in dataset.iterate_items():
	# normalize keys we care about
	out = {
	"postsCount": item.get("postsCount"),
	"followersCount": item.get("followersCount"),
	"followsCount": item.get("followsCount"),
	"private": item.get("private"),
	"verified": item.get("verified"),
	}
	return out, None

	return None, "No data returned for this username."
	except Exception as e:
	return None, f"Apify error: {e}"

	def to_numeric_features(raw: Dict[str, Any]) -> Optional[np.ndarray]:
	try:
	posts = int(raw.get("postsCount")) if raw.get("postsCount") is not None else None
	followers = int(raw.get("followersCount")) if raw.get("followersCount") is not None else None
	follows = int(raw.get("followsCount")) if raw.get("followsCount") is not None else None
	private = 1 if bool(raw.get("private")) else 0
	verified = 1 if bool(raw.get("verified")) else 0
	if None in (posts, followers, follows):
	return None
	return np.array([[posts, followers, follows, private, verified]], dtype=np.float64)
	except Exception:
	return None

	def predict_with_model(X: np.ndarray) -> Dict[str, Any]:
	# Try to get probabilities if available; else binary prediction
	result: Dict[str, Any] = {}
	if hasattr(model, "predict_proba"):
	proba = model.predict_proba(X)
	# Assume class 1 = Real, class 0 = Fake (adjust if reversed in your model)
	# Try to find mapping if model has classes_
	label_index = getattr(model, "classes_", np.array([0, 1]))
	# map probabilities to classes
	probs = dict(zip(label_index.tolist(), proba[0].tolist()))
	result["proba_real"] = probs.get(1, None)
	result["proba_fake"] = probs.get(0, None)
	result["pred"] = int(model.predict(X)[0])
	else:
	y = int(model.predict(X)[0])
	result["pred"] = y
	result["proba_real"] = None
	result["proba_fake"] = None
	return result

	# ---------- Sidebar ----------
	with st.sidebar:
	st.header("⚙️ Settings")
	st.caption("Configure how the app connects and behaves.")
	token = get_apify_token()
	if not token:
	token = st.text_input("Apify API token (not saved)", type="password", placeholder="APIFY_...")

	st.divider()
	st.markdown("About")
	st.write(
	"This app checks basic Instagram profile signals "
	"and runs a classifier to estimate whether an account looks fake or real."
	)
	st.markdown(
	'<span class="small-muted">For demo/educational purposes only. '
	'Always verify results with additional signals.</span>',
	unsafe_allow_html=True
	)

	# ---------- Header ----------
	st.title("🕵️‍♂️ Fake Instagram Profile Detector")
	st.write("Enter a username and we’ll fetch basic public signals, then run a trained model to estimate risk.")

	if model_load_error:
	st.error(f"Model failed to load: {model_load_error}")
	st.stop()

	# ---------- Main Form ----------
	with st.form("username_form", clear_on_submit=False):
	username = st.text_input("Instagram Username", placeholder="e.g., nasa")
	submitted = st.form_submit_button("Analyze")

	if not submitted:
	st.info("Enter a username and click Analyze to get started.")
	st.stop()

	# ---------- Validation ----------
	if not username.strip():
	st.warning("Please provide a username.")
	st.stop()

	if not token:
	st.error("Missing Apify token. Add it to `.streamlit/secrets.toml` as `APIFY_TOKEN` or set the env var.")
	st.stop()

	# ---------- Fetch & Predict ----------
	with st.spinner("Fetching profile data..."):
	raw_data, fetch_err = fetch_instagram_profile(username.strip(), token)

	if fetch_err:
	st.error(fetch_err)
	st.stop()
	if not raw_data:
	st.warning("No data found. Double-check the username.")
	st.stop()

	# KPIs
	st.subheader(f"Profile Signals — @{username}")
	c1, c2, c3 = st.columns(3)
	c4, c5 = st.columns(2)

	with c1:
	st.markdown('<div class="kpi"><div class="small-muted">Posts</div>'
	f'<h3>{raw_data["postsCount"] if raw_data["postsCount"] is not None else "—"}</h3></div>', unsafe_allow_html=True)
	with c2:
	st.markdown('<div class="kpi"><div class="small-muted">Followers</div>'
	f'<h3>{raw_data["followersCount"] if raw_data["followersCount"] is not None else "—"}</h3></div>', unsafe_allow_html=True)
	with c3:
	st.markdown('<div class="kpi"><div class="small-muted">Following</div>'
	f'<h3>{raw_data["followsCount"] if raw_data["followsCount"] is not None else "—"}</h3></div>', unsafe_allow_html=True)

	with c4:
	private_pill = '<span class="status-pill status-warn">Private</span>' if raw_data.get("private") else '<span class="status-pill status-ok">Public</span>'
	st.markdown(f'<div class="kpi"><div class="small-muted">Privacy</div><div>{private_pill}</div></div>', unsafe_allow_html=True)

	with c5:
	verified_pill = '<span class="status-pill status-ok">Verified</span>' if raw_data.get("verified") else '<span class="status-pill status-bad">Not Verified</span>'
	st.markdown(f'<div class="kpi"><div class="small-muted">Verification</div><div>{verified_pill}</div></div>', unsafe_allow_html=True)

	# Prepare features
	X = to_numeric_features(raw_data)
	if X is None:
	st.error("Insufficient numeric data to run the classifier (missing posts/followers/following).")
	st.stop()

	with st.spinner("Running prediction..."):
	out = predict_with_model(X)

	pred = out["pred"]
	proba_real = out.get("proba_real")
	proba_fake = out.get("proba_fake")

	# ---------- Verdict ----------
	st.subheader("Verdict")
	if raw_data.get("postsCount") is None:
	st.error("The user may not exist or data could not be fetched.")
	elif pred == 0:
	st.error("The account is likely to be Fake.")
	else:
	st.success("The account is likely to be Real.")

	# ---------- Confidence ----------
	if (proba_real is not None) or (proba_fake is not None):
	st.write("Confidence")
	cc1, cc2 = st.columns(2)
	with cc1:
	st.metric("Probability: Real", f"{(proba_real or 0)*100:0.1f}%")
	with cc2:
	st.metric("Probability: Fake", f"{(proba_fake or 0)*100:0.1f}%")

	# ---------- Raw data (expandable) ----------
	with st.expander("See fetched features"):
	st.json(raw_data)

	st.caption("⚠️ This tool provides an indicative score. Use responsibly and verify via additional checks.")