File size: 8,637 Bytes
f702bfb
 
 
 
 
2eeec36
 
f702bfb
e11575a
 
f702bfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38f2027
 
 
 
 
 
 
 
 
 
 
 
 
 
f702bfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e11575a
f702bfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e11575a
f702bfb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional, Dict, Any, Tuple

import numpy as np
import pandas as pd
import streamlit as st
import joblib
from apify_client import ApifyClient

# ---------- Page setup ----------
st.set_page_config(
    page_title="Fake Instagram Profile Detector",
    page_icon="πŸ•΅οΈβ€β™‚οΈ",
    layout="centered",
)

# ---------- Minimal styling ----------
st.markdown("""
<style>
/* Make it feel app-like */
.reportview-container .main .block-container {padding-top: 2rem; padding-bottom: 2rem;}
.small-muted {font-size: 0.9rem; color: rgba(0,0,0,0.55);}
.kpi {padding: 0.75rem 1rem; border-radius: 0.75rem; border: 1px solid rgba(0,0,0,0.08);}
.status-pill {display:inline-block; padding: .25rem .6rem; border-radius: 999px; font-weight:600;}
.status-ok {background:#E7F6EC; color:#137333;}
.status-bad {background:#FCE8E6; color:#B3261E;}
.status-warn {background:#FFF4E5; color:#8A4D00;}
</style>
""", unsafe_allow_html=True)

# ---------- Config & Secrets ----------
def get_apify_token():
    import os, streamlit as st
    # 1) try env var
    env_token = os.getenv("APIFY_TOKEN")
    # 2) try secrets.toml, but don't crash if it's missing
    secret_token = None
    try:
        # access st.secrets inside try/except because it raises if file is missing
        secret_token = st.secrets.get("APIFY_TOKEN", None)  # type: ignore
    except FileNotFoundError:
        secret_token = None
    except Exception:
        secret_token = None
    return secret_token or env_token

APIFY_ACTOR_ID = "dSCLg0C3YEZ83HzYX"  # your actor id
# If your actor expects a different input shape, adjust below.
DEFAULT_RUN_INPUT_KEY = "usernames"

# ---------- Model loading ----------
@st.cache_resource(show_spinner=False)
def load_model() -> Any:
    # Load relative to this file to avoid CWD issues
    here = Path(__file__).resolve().parent
    model_path = here / "classifier.pkl"  # place classifier.pkl inside src/
    if not model_path.exists():
        raise FileNotFoundError(f"Model not found at: {model_path}")
    return joblib.load(model_path)

model = None
model_load_error = None
try:
    with st.spinner("Loading model..."):
        model = load_model()
except Exception as e:
    model_load_error = str(e)

# ---------- Apify helpers ----------
@st.cache_data(show_spinner=False, ttl=60)  # cache for a minute
def fetch_instagram_profile(username: str, token: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
    try:
        client = ApifyClient(token)
        run_input = {DEFAULT_RUN_INPUT_KEY: [username]}
        run = client.actor(APIFY_ACTOR_ID).call(run_input=run_input)
        dataset = client.dataset(run["defaultDatasetId"])

        # We'll take the first item that matches
        for item in dataset.iterate_items():
            # normalize keys we care about
            out = {
                "postsCount": item.get("postsCount"),
                "followersCount": item.get("followersCount"),
                "followsCount": item.get("followsCount"),
                "private": item.get("private"),
                "verified": item.get("verified"),
            }
            return out, None

        return None, "No data returned for this username."
    except Exception as e:
        return None, f"Apify error: {e}"

def to_numeric_features(raw: Dict[str, Any]) -> Optional[np.ndarray]:
    try:
        posts = int(raw.get("postsCount")) if raw.get("postsCount") is not None else None
        followers = int(raw.get("followersCount")) if raw.get("followersCount") is not None else None
        follows = int(raw.get("followsCount")) if raw.get("followsCount") is not None else None
        private = 1 if bool(raw.get("private")) else 0
        verified = 1 if bool(raw.get("verified")) else 0
        if None in (posts, followers, follows):
            return None
        return np.array([[posts, followers, follows, private, verified]], dtype=np.float64)
    except Exception:
        return None

def predict_with_model(X: np.ndarray) -> Dict[str, Any]:
    # Try to get probabilities if available; else binary prediction
    result: Dict[str, Any] = {}
    if hasattr(model, "predict_proba"):
        proba = model.predict_proba(X)
        # Assume class 1 = Real, class 0 = Fake (adjust if reversed in your model)
        # Try to find mapping if model has classes_
        label_index = getattr(model, "classes_", np.array([0, 1]))
        # map probabilities to classes
        probs = dict(zip(label_index.tolist(), proba[0].tolist()))
        result["proba_real"] = probs.get(1, None)
        result["proba_fake"] = probs.get(0, None)
        result["pred"] = int(model.predict(X)[0])
    else:
        y = int(model.predict(X)[0])
        result["pred"] = y
        result["proba_real"] = None
        result["proba_fake"] = None
    return result


# ---------- Header ----------
st.title("πŸ•΅οΈβ€β™‚οΈ Fake Instagram Profile Detector")
st.write("Enter a username and we’ll fetch basic public signals, then run a trained model to estimate risk.")

if model_load_error:
    st.error(f"Model failed to load: {model_load_error}")
    st.stop()

# ---------- Main Form ----------
with st.form("username_form", clear_on_submit=False):
    username = st.text_input("Instagram Username", placeholder="e.g., nasa")
    submitted = st.form_submit_button("Analyze")

if not submitted:
    st.info("Enter a username and click **Analyze** to get started.")
    st.stop()

# ---------- Validation ----------
if not username.strip():
    st.warning("Please provide a username.")
    st.stop()

if not token:
    st.error("Missing Apify token. Add it to `.streamlit/secrets.toml` as `APIFY_TOKEN` or set the env var.")
    st.stop()

# ---------- Fetch & Predict ----------
with st.spinner("Fetching profile data..."):
    raw_data, fetch_err = fetch_instagram_profile(username.strip(), token)

if fetch_err:
    st.error(fetch_err)
    st.stop()
if not raw_data:
    st.warning("No data found. Double-check the username.")
    st.stop()

# KPIs
st.subheader(f"Profile Signals β€” @{username}")
c1, c2, c3 = st.columns(3)
c4, c5 = st.columns(2)

with c1:
    st.markdown('<div class="kpi"><div class="small-muted">Posts</div>'
                f'<h3>{raw_data["postsCount"] if raw_data["postsCount"] is not None else "β€”"}</h3></div>', unsafe_allow_html=True)
with c2:
    st.markdown('<div class="kpi"><div class="small-muted">Followers</div>'
                f'<h3>{raw_data["followersCount"] if raw_data["followersCount"] is not None else "β€”"}</h3></div>', unsafe_allow_html=True)
with c3:
    st.markdown('<div class="kpi"><div class="small-muted">Following</div>'
                f'<h3>{raw_data["followsCount"] if raw_data["followsCount"] is not None else "β€”"}</h3></div>', unsafe_allow_html=True)

with c4:
    private_pill = '<span class="status-pill status-warn">Private</span>' if raw_data.get("private") else '<span class="status-pill status-ok">Public</span>'
    st.markdown(f'<div class="kpi"><div class="small-muted">Privacy</div><div>{private_pill}</div></div>', unsafe_allow_html=True)

with c5:
    verified_pill = '<span class="status-pill status-ok">Verified</span>' if raw_data.get("verified") else '<span class="status-pill status-bad">Not Verified</span>'
    st.markdown(f'<div class="kpi"><div class="small-muted">Verification</div><div>{verified_pill}</div></div>', unsafe_allow_html=True)

# Prepare features
X = to_numeric_features(raw_data)
if X is None:
    st.error("Insufficient numeric data to run the classifier (missing posts/followers/following).")
    st.stop()

with st.spinner("Running prediction..."):
    out = predict_with_model(X)

pred = out["pred"]
proba_real = out.get("proba_real")
proba_fake = out.get("proba_fake")

# ---------- Verdict ----------
st.subheader("Verdict")
if raw_data.get("postsCount") is None:
    st.error("The user may not exist or data could not be fetched.")
elif pred == 0:
    st.error("The account is **likely to be Fake**.")
else:
    st.success("The account is **likely to be Real**.")

# ---------- Confidence ----------
if (proba_real is not None) or (proba_fake is not None):
    st.write("**Confidence**")
    cc1, cc2 = st.columns(2)
    with cc1:
        st.metric("Probability: Real", f"{(proba_real or 0)*100:0.1f}%")
    with cc2:
        st.metric("Probability: Fake", f"{(proba_fake or 0)*100:0.1f}%")

# ---------- Raw data (expandable) ----------
with st.expander("See fetched features"):
    st.json(raw_data)

st.caption("⚠️ This tool provides an indicative score. Use responsibly and verify via additional checks.")