Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from typing import Optional | |
| import requests | |
| import torch | |
| import re | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from huggingface_hub import hf_hub_download | |
| import logging | |
| logger = logging.getLogger("app") | |
| logging.basicConfig(level=logging.INFO) | |
| # =========================== | |
| # CONFIG | |
| # =========================== | |
| HF_MODEL_REPO = "gaidasalsaa/model-indobertweet-terbaru" | |
| BASE_MODEL = "indolem/indobertweet-base-uncased" | |
| PT_FILE = "model_indobertweet.pth" | |
| BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAACOx5wEAAAAA8dmBFQL26Vn%2FEWRVeQu%2BiTqdd%2F4%3DE8QcDTWabLJphye8PVICImVIHd1BLMB9fEU3pxJGrpO1Uw2TsN" | |
| # =========================== | |
| # GLOBAL MODEL | |
| # =========================== | |
| tokenizer = None | |
| model = None | |
| # =========================== | |
| # TEXT CLEANING | |
| # =========================== | |
| def clean_text(t): | |
| t = t.lower() | |
| t = re.sub(r"http\S+|www\.\S+", "", t) | |
| t = re.sub(r"@\w+", "", t) | |
| t = re.sub(r"#(\w+)", r"\1", t) | |
| return t.strip() | |
| # =========================== | |
| # LOAD MODEL | |
| # =========================== | |
| def load_model_once(): | |
| global tokenizer, model | |
| if tokenizer is not None and model is not None: | |
| logger.info("Model already loaded.") | |
| return | |
| logger.info("Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
| logger.info("Downloading model weights...") | |
| model_path = hf_hub_download( | |
| repo_id=HF_MODEL_REPO, | |
| filename=PT_FILE, | |
| ) | |
| logger.info("Loading IndoBERTweet architecture...") | |
| model = AutoModelForSequenceClassification.from_pretrained( | |
| BASE_MODEL, | |
| num_labels=2 | |
| ) | |
| logger.info("Loading state_dict...") | |
| state_dict = torch.load(model_path, map_location="cpu") | |
| model.load_state_dict(state_dict) | |
| model.eval() | |
| logger.info("MODEL READY") | |
| # =========================== | |
| # FASTAPI | |
| # =========================== | |
| app = FastAPI(title="Stress Detection API") | |
| def startup_event(): | |
| load_model_once() | |
| class StressResponse(BaseModel): | |
| message: str | |
| data: Optional[dict] = None | |
| # =========================== | |
| # TWITTER API | |
| # =========================== | |
| def get_user_id(username): | |
| url = f"https://api.x.com/2/users/by/username/{username}" | |
| headers = {"Authorization": f"Bearer {BEARER_TOKEN}"} | |
| try: | |
| r = requests.get(url, headers=headers, timeout=10) | |
| if r.status_code != 200: | |
| return None, r.json() | |
| return r.json()["data"]["id"], r.json() | |
| except: | |
| return None, {"error": "Request failed"} | |
| def fetch_tweets(user_id, limit=25): | |
| url = f"https://api.x.com/2/users/{user_id}/tweets" | |
| params = {"max_results": limit, "tweet.fields": "id,text,created_at"} | |
| headers = {"Authorization": f"Bearer {BEARER_TOKEN}"} | |
| try: | |
| r = requests.get(url, headers=headers, params=params, timeout=10) | |
| if r.status_code != 200: | |
| return None, r.json() | |
| data = r.json().get("data", []) | |
| return [t["text"] for t in data], r.json() | |
| except: | |
| return None, {"error": "Request failed"} | |
| # =========================== | |
| # KEYWORDS | |
| # =========================== | |
| def extract_keywords(tweets): | |
| stress_words = [ | |
| "gelisah","cemas","tidur","takut","hati","resah","sampe","tenang", | |
| "suka","mulu","sedih","ngerasa","gimana","gatau","perasaan", | |
| "nangis","deg","khawatir","pikiran","harap","gabisa","bener", | |
| "pengen","sakit","susah","bangun","biar","jam","kaya","bingung", | |
| "mikir","tuhan","mikirin","bawaannya","marah","tbtb","anjir", | |
| "cape","panik","enak","kali","pusing","semoga","kadang","langsung", | |
| "kemarin","tugas","males" | |
| ] | |
| found = set() | |
| for t in tweets: | |
| lower = t.lower() | |
| for w in stress_words: | |
| if w in lower: | |
| found.add(w) | |
| return list(found) | |
| # =========================== | |
| # INFERENCE | |
| # =========================== | |
| def predict_stress(text): | |
| text = clean_text(text) | |
| inputs = tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| padding=True, | |
| max_length=128 | |
| ) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.softmax(outputs.logits, dim=1)[0] | |
| label = int(torch.argmax(probs).item()) | |
| return label, float(probs[1]) | |
| # =========================== | |
| # ROUTE | |
| # =========================== | |
| def analyze(username: str): | |
| user_id, _ = get_user_id(username) | |
| if user_id is None: | |
| return StressResponse(message="Failed to fetch profile", data=None) | |
| tweets, _ = fetch_tweets(user_id) | |
| if not tweets: | |
| return StressResponse(message="No tweets available", data=None) | |
| labels = [predict_stress(t)[0] for t in tweets] | |
| stress_percentage = round(sum(labels) / len(labels) * 100, 2) | |
| # 4-level status | |
| if stress_percentage <= 25: | |
| status = 0 | |
| elif stress_percentage <= 50: | |
| status = 1 | |
| elif stress_percentage <= 75: | |
| status = 2 | |
| else: | |
| status = 3 | |
| keywords = extract_keywords(tweets) | |
| return StressResponse( | |
| message="Analysis complete", | |
| data={ | |
| "username": username, | |
| "total_tweets": len(tweets), | |
| "stress_level": stress_percentage, | |
| "keywords": keywords, | |
| "stress_status": status | |
| } | |
| ) |