File size: 2,904 Bytes
88b3899
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import joblib
import numpy as np
import torch
import os
from transformers import AutoTokenizer, AutoModel
from sklearn.preprocessing import StandardScaler

# Global configs
_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
_models = {}
_tokenizers = {}
_classifiers = {}
_scalers = {}

def initialize_models():
    """Pre-load all models at startup"""
    model_configs = {
        'Distilbert': 'distilbert-base-uncased',
        'Roberta': 'roberta-base',
        'Longformer': 'allenai/longformer-base-4096'
    }

    for name, path in model_configs.items():
        key = name.lower()
        print(f"Loading {name}...")

        # Load tokenizer and model from HuggingFace
        _tokenizers[key] = AutoTokenizer.from_pretrained(path)
        _models[key] = AutoModel.from_pretrained(path).to(_device).eval()

        # Exact file names (case-sensitive)
        clf_path = f"{name}_xgboost_model.pkl"
        if not os.path.exists(clf_path):
            raise FileNotFoundError(f"Missing classifier: {clf_path}")
        _classifiers[key] = joblib.load(clf_path)

        scaler_path = f"{name}_scaler.pkl"
        if os.path.exists(scaler_path):
            _scalers[key] = joblib.load(scaler_path)
        else:
            _scalers[key] = StandardScaler().fit(np.eye(768))  # fallback

def get_embedding(text, model_name):
    """Generate standardized embeddings with proper error handling"""
    try:
        model_key = model_name.lower()
        if model_key not in _models:
            raise ValueError(f"Model {model_name} not initialized")

        inputs = _tokenizers[model_key](
            text,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=512
        ).to(_device)

        with torch.no_grad():
            outputs = _models[model_key](**inputs)
            last_hidden = outputs.last_hidden_state
            attention_mask = inputs["attention_mask"].unsqueeze(-1)
            pooled = (last_hidden * attention_mask).sum(1) / attention_mask.sum(1)

        embedding = pooled.cpu().numpy().squeeze(0)
        return _scalers[model_key].transform(embedding.reshape(1, -1))[0]

    except Exception as e:
        print(f"Embedding error: {str(e)}")
        return np.zeros(768)

def get_prediction(text, model_name):
    try:
        model_key = model_name.lower()
        if model_key not in _classifiers:
            raise ValueError(f"Classifier for {model_name} not loaded")

        embedding = get_embedding(text, model_name).reshape(1, -1)
        proba = _classifiers[model_key].predict_proba(embedding)[0][1]

        threshold = 0.5
        return {
            "prediction": "πŸ”’ Jailbreak" if proba > threshold else "βœ… Benign",
        }

    except Exception as e:
        print(f"Prediction error: {str(e)}")
        return {"error": str(e)}

# Run on import
initialize_models()