Spaces:

Shashank2k3
/

Fake-Profile-Detection-Instagram

Sleeping

File size: 8,637 Bytes

from __future__ import annotations
import os
from pathlib import Path
from typing import Optional, Dict, Any, Tuple

import numpy as np
import pandas as pd
import streamlit as st
import joblib
from apify_client import ApifyClient

# ---------- Page setup ----------
st.set_page_config(
    page_title="Fake Instagram Profile Detector",
    page_icon="🕵️‍♂️",
    layout="centered",
)

# ---------- Minimal styling ----------
st.markdown("""
<style>
/* Make it feel app-like */
.reportview-container .main .block-container {padding-top: 2rem; padding-bottom: 2rem;}
.small-muted {font-size: 0.9rem; color: rgba(0,0,0,0.55);}
.kpi {padding: 0.75rem 1rem; border-radius: 0.75rem; border: 1px solid rgba(0,0,0,0.08);}
.status-pill {display:inline-block; padding: .25rem .6rem; border-radius: 999px; font-weight:600;}
.status-ok {background:#E7F6EC; color:#137333;}
.status-bad {background:#FCE8E6; color:#B3261E;}
.status-warn {background:#FFF4E5; color:#8A4D00;}
</style>
""", unsafe_allow_html=True)

# ---------- Config & Secrets ----------
def get_apify_token():
    import os, streamlit as st
    # 1) try env var
    env_token = os.getenv("APIFY_TOKEN")
    # 2) try secrets.toml, but don't crash if it's missing
    secret_token = None
    try:
        # access st.secrets inside try/except because it raises if file is missing
        secret_token = st.secrets.get("APIFY_TOKEN", None)  # type: ignore
    except FileNotFoundError:
        secret_token = None
    except Exception:
        secret_token = None
    return secret_token or env_token

APIFY_ACTOR_ID = "dSCLg0C3YEZ83HzYX"  # your actor id
# If your actor expects a different input shape, adjust below.
DEFAULT_RUN_INPUT_KEY = "usernames"

# ---------- Model loading ----------
@st.cache_resource(show_spinner=False)
def load_model() -> Any:
    # Load relative to this file to avoid CWD issues
    here = Path(__file__).resolve().parent
    model_path = here / "classifier.pkl"  # place classifier.pkl inside src/
    if not model_path.exists():
        raise FileNotFoundError(f"Model not found at: {model_path}")
    return joblib.load(model_path)

model = None
model_load_error = None
try:
    with st.spinner("Loading model..."):
        model = load_model()
except Exception as e:
    model_load_error = str(e)

# ---------- Apify helpers ----------
@st.cache_data(show_spinner=False, ttl=60)  # cache for a minute
def fetch_instagram_profile(username: str, token: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
    try:
        client = ApifyClient(token)
        run_input = {DEFAULT_RUN_INPUT_KEY: [username]}
        run = client.actor(APIFY_ACTOR_ID).call(run_input=run_input)
        dataset = client.dataset(run["defaultDatasetId"])

        # We'll take the first item that matches
        for item in dataset.iterate_items():
            # normalize keys we care about
            out = {
                "postsCount": item.get("postsCount"),
                "followersCount": item.get("followersCount"),
                "followsCount": item.get("followsCount"),
                "private": item.get("private"),
                "verified": item.get("verified"),
            }
            return out, None

        return None, "No data returned for this username."
    except Exception as e:
        return None, f"Apify error: {e}"

def to_numeric_features(raw: Dict[str, Any]) -> Optional[np.ndarray]:
    try:
        posts = int(raw.get("postsCount")) if raw.get("postsCount") is not None else None
        followers = int(raw.get("followersCount")) if raw.get("followersCount") is not None else None
        follows = int(raw.get("followsCount")) if raw.get("followsCount") is not None else None
        private = 1 if bool(raw.get("private")) else 0
        verified = 1 if bool(raw.get("verified")) else 0
        if None in (posts, followers, follows):
            return None
        return np.array([[posts, followers, follows, private, verified]], dtype=np.float64)
    except Exception:
        return None

def predict_with_model(X: np.ndarray) -> Dict[str, Any]:
    # Try to get probabilities if available; else binary prediction
    result: Dict[str, Any] = {}
    if hasattr(model, "predict_proba"):
        proba = model.predict_proba(X)
        # Assume class 1 = Real, class 0 = Fake (adjust if reversed in your model)
        # Try to find mapping if model has classes_
        label_index = getattr(model, "classes_", np.array([0, 1]))
        # map probabilities to classes
        probs = dict(zip(label_index.tolist(), proba[0].tolist()))
        result["proba_real"] = probs.get(1, None)
        result["proba_fake"] = probs.get(0, None)
        result["pred"] = int(model.predict(X)[0])
    else:
        y = int(model.predict(X)[0])
        result["pred"] = y
        result["proba_real"] = None
        result["proba_fake"] = None
    return result


# ---------- Header ----------
st.title("🕵️‍♂️ Fake Instagram Profile Detector")
st.write("Enter a username and we’ll fetch basic public signals, then run a trained model to estimate risk.")

if model_load_error:
    st.error(f"Model failed to load: {model_load_error}")
    st.stop()

# ---------- Main Form ----------
with st.form("username_form", clear_on_submit=False):
    username = st.text_input("Instagram Username", placeholder="e.g., nasa")
    submitted = st.form_submit_button("Analyze")

if not submitted:
    st.info("Enter a username and click **Analyze** to get started.")
    st.stop()

# ---------- Validation ----------
if not username.strip():
    st.warning("Please provide a username.")
    st.stop()

if not token:
    st.error("Missing Apify token. Add it to `.streamlit/secrets.toml` as `APIFY_TOKEN` or set the env var.")
    st.stop()

# ---------- Fetch & Predict ----------
with st.spinner("Fetching profile data..."):
    raw_data, fetch_err = fetch_instagram_profile(username.strip(), token)

if fetch_err:
    st.error(fetch_err)
    st.stop()
if not raw_data:
    st.warning("No data found. Double-check the username.")
    st.stop()

# KPIs
st.subheader(f"Profile Signals — @{username}")
c1, c2, c3 = st.columns(3)
c4, c5 = st.columns(2)

with c1:
    st.markdown('<div class="kpi"><div class="small-muted">Posts</div>'
                f'<h3>{raw_data["postsCount"] if raw_data["postsCount"] is not None else "—"}</h3></div>', unsafe_allow_html=True)
with c2:
    st.markdown('<div class="kpi"><div class="small-muted">Followers</div>'
                f'<h3>{raw_data["followersCount"] if raw_data["followersCount"] is not None else "—"}</h3></div>', unsafe_allow_html=True)
with c3:
    st.markdown('<div class="kpi"><div class="small-muted">Following</div>'
                f'<h3>{raw_data["followsCount"] if raw_data["followsCount"] is not None else "—"}</h3></div>', unsafe_allow_html=True)

with c4:
    private_pill = '<span class="status-pill status-warn">Private</span>' if raw_data.get("private") else '<span class="status-pill status-ok">Public</span>'
    st.markdown(f'<div class="kpi"><div class="small-muted">Privacy</div><div>{private_pill}</div></div>', unsafe_allow_html=True)

with c5:
    verified_pill = '<span class="status-pill status-ok">Verified</span>' if raw_data.get("verified") else '<span class="status-pill status-bad">Not Verified</span>'
    st.markdown(f'<div class="kpi"><div class="small-muted">Verification</div><div>{verified_pill}</div></div>', unsafe_allow_html=True)

# Prepare features
X = to_numeric_features(raw_data)
if X is None:
    st.error("Insufficient numeric data to run the classifier (missing posts/followers/following).")
    st.stop()

with st.spinner("Running prediction..."):
    out = predict_with_model(X)

pred = out["pred"]
proba_real = out.get("proba_real")
proba_fake = out.get("proba_fake")

# ---------- Verdict ----------
st.subheader("Verdict")
if raw_data.get("postsCount") is None:
    st.error("The user may not exist or data could not be fetched.")
elif pred == 0:
    st.error("The account is **likely to be Fake**.")
else:
    st.success("The account is **likely to be Real**.")

# ---------- Confidence ----------
if (proba_real is not None) or (proba_fake is not None):
    st.write("**Confidence**")
    cc1, cc2 = st.columns(2)
    with cc1:
        st.metric("Probability: Real", f"{(proba_real or 0)*100:0.1f}%")
    with cc2:
        st.metric("Probability: Fake", f"{(proba_fake or 0)*100:0.1f}%")

# ---------- Raw data (expandable) ----------
with st.expander("See fetched features"):
    st.json(raw_data)

st.caption("⚠️ This tool provides an indicative score. Use responsibly and verify via additional checks.")