File size: 10,903 Bytes
c800b50
f5de035
 
 
 
3c22ddb
 
f5de035
 
 
 
 
47c8ad8
3c22ddb
 
f5de035
3c22ddb
f5de035
 
 
 
3c22ddb
 
5c74992
3c22ddb
 
cfcd4e2
f5de035
3c22ddb
f5de035
 
 
 
 
 
3c22ddb
f5de035
 
 
 
 
3c22ddb
f5de035
 
3c22ddb
 
f5de035
 
 
3c22ddb
f5de035
c800b50
f5de035
 
 
3c22ddb
f5de035
e0883f0
3c22ddb
f5de035
 
3c22ddb
f5de035
47c8ad8
f5de035
 
84d4eee
98915c7
f5de035
3c22ddb
f5de035
 
 
 
e0883f0
f5de035
84d4eee
 
 
 
671191d
 
 
 
 
 
 
 
 
 
 
 
84d4eee
 
 
 
 
f5de035
 
 
 
7374be2
f5de035
7374be2
f5de035
 
 
e0883f0
f5de035
 
 
 
84d4eee
3c22ddb
 
 
 
 
84d4eee
a3facd6
086a5af
3c22ddb
 
 
 
086a5af
3c22ddb
 
 
 
 
 
 
f5de035
3c22ddb
f5de035
 
 
84d4eee
 
1643857
84d4eee
 
 
f5de035
 
 
 
3c22ddb
f5de035
 
 
3c22ddb
 
84d4eee
7374be2
84d4eee
 
98915c7
7374be2
84d4eee
7374be2
cd3335a
1643857
cd3335a
84d4eee
7374be2
 
f5de035
84d4eee
f5de035
7374be2
3c22ddb
e0883f0
7374be2
1643857
f5de035
 
 
 
 
 
 
 
 
7374be2
3c22ddb
 
f5de035
 
c800b50
84d4eee
f5de035
84d4eee
1643857
3c22ddb
 
f5de035
47c8ad8
3c22ddb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5de035
 
 
 
47c8ad8
f5de035
 
 
086a5af
47d69b8
 
b24c875
47d69b8
b24c875
 
 
f5de035
b24c875
 
 
 
 
47d69b8
 
 
 
 
 
 
 
 
 
f5de035
 
 
 
 
 
 
 
 
 
ecfaa5e
 
 
 
f5de035
 
 
47c8ad8
f5de035
 
 
47c8ad8
f5de035
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
import os
import requests
import subprocess
import tarfile
import stat
import time
import atexit
from huggingface_hub import hf_hub_download
from langchain_core.language_models import LLM
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from typing import Any, List, Optional, Mapping

# --- Helper to Setup llama-server ---
def setup_llama_binaries():
    """
    Download and extract llama-server binary and libs from official releases
    """
    # Latest release URL for Linux x64 (b4991 equivalent or newer)
    CLI_URL = "https://github.com/ggml-org/llama.cpp/releases/download/b7312/llama-b7312-bin-ubuntu-x64.tar.gz"
    LOCAL_TAR = "llama-cli.tar.gz"
    BIN_DIR = "./llama_bin" 
    SERVER_BIN = os.path.join(BIN_DIR, "bin/llama-server") # Look for server binary

    if os.path.exists(SERVER_BIN):
        return SERVER_BIN, BIN_DIR

    try:
        print("⬇️ Downloading llama.cpp binaries...")
        response = requests.get(CLI_URL, stream=True)
        if response.status_code == 200:
            with open(LOCAL_TAR, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            
            print("πŸ“¦ Extracting binaries...")
            os.makedirs(BIN_DIR, exist_ok=True)
            
            with tarfile.open(LOCAL_TAR, "r:gz") as tar:
                tar.extractall(path=BIN_DIR)
            
            # Locate llama-server
            found_bin = None
            for root, dirs, files in os.walk(BIN_DIR):
                if "llama-server" in files:
                    found_bin = os.path.join(root, "llama-server")
                    break
            
            if not found_bin:
                print("❌ Could not find llama-server in extracted files.")
                return None, None

            # Make executable
            st = os.stat(found_bin)
            os.chmod(found_bin, st.st_mode | stat.S_IEXEC)
            print(f"βœ… llama-server binary ready at {found_bin}!")
            return found_bin, BIN_DIR
        else:
            print(f"❌ Failed to download binaries: {response.status_code}")
            return None, None
    except Exception as e:
        print(f"❌ Error setting up llama-server: {e}")
        return None, None

# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
class HybridLLM(LLM):
    groq_client: Any = None
    groq_model: str = "qwen/qwen3-32b"
    api_url: str = ""
    local_server_url: str = "http://localhost:8080"
    
    @property
    def _llm_type(self) -> str:
        return "hybrid_llm"

    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
        # 1. Try Groq API (Highest Priority)
        if self.groq_client:
            try:
                print("⚑ Using Groq API...")
                stop_seq = (stop or []) + ["<|im_end|>", "Input:", "Context:"]
                
                chat_completion = self.groq_client.chat.completions.create(
                    messages=[
                        {"role": "user", "content": prompt}
                    ],
                    model=self.groq_model,
                    temperature=0.3,
                    max_tokens=1024,
                    stop=stop_seq
                )
                return chat_completion.choices[0].message.content
            except Exception as e:
                print(f"⚠️ Groq API Failed: {e}")
                # Continue to next fallback

        # 2. Try Colab API
        if self.api_url:
            try:
                print(f"🌐 Calling Colab API: {self.api_url}")
                response = requests.post(
                    f"{self.api_url}/generate",
                    json={"prompt": prompt, "max_tokens": 512},
                    timeout=30
                )
                if response.status_code == 200:
                    return response.json()["response"]
                else:
                    print(f"⚠️ API Error {response.status_code}: {response.text}")
            except Exception as e:
                print(f"⚠️ API Connection Failed: {e}")
        
        # 3. Fallback to Local Server
        print("πŸ’» Using Local llama-server Fallback...")
        try:
            # OpenAI-compatible completion endpoint
            payload = {
                "prompt": prompt,
                "n_predict": 1024,
                "temperature": 0.3,
                "stop": (stop or []) + ["<|im_end|>", "Input:", "Context:"]
            }
            response = requests.post(
                f"{self.local_server_url}/completion",
                json=payload,
                timeout=300
            )
            if response.status_code == 200:
                return response.json()["content"]
            else:
                return f"❌ Local Server Error: {response.text}"
        except Exception as e:
            return f"❌ Local Inference Failed: {e}"
        
        return "❌ Error: No working LLM available."

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {
            "groq_enabled": self.groq_client is not None,
            "groq_model": self.groq_model,
            "api_url": self.api_url, 
            "local_server_url": self.local_server_url
        }

class LLMClient:
    def __init__(self, vector_store=None):
        """
        Initialize Hybrid LLM Client with Persistent Server
        """
        self.vector_store = vector_store
        self.api_url = os.environ.get("COLAB_API_URL", "")
        self.server_process = None
        self.server_port = 8080
        self.groq_client = None

        # 1. Setup Groq Client
        groq_api_key = os.environ.get("GROQ_API_KEY")
        self.groq_model = "qwen/qwen3-32b" 

        if groq_api_key:
            try:
                from groq import Groq
                print(f"⚑ Initializing Native Groq Client ({self.groq_model})...")
                self.groq_client = Groq(api_key=groq_api_key)
                print("βœ… Groq Client ready.")
            except Exception as e:
                print(f"⚠️ Groq Init Failed: {e}")
        
        # 2. Setup Local Fallback (Always setup as requested)
        try:
            # Setup Binary
            self.server_bin, self.lib_path = setup_llama_binaries()
            
            # Download Model (Qwen3-0.6B)
            print("οΏ½ Loading Local Qwen3-0.6B (GGUF)...")
            model_repo = "Qwen/Qwen3-0.6B-GGUF" 
            filename = "Qwen3-0.6B-Q8_0.gguf"
            
            self.model_path = hf_hub_download(
                repo_id=model_repo, 
                filename=filename
            )
            print(f"βœ… Model downloaded to: {self.model_path}")
            
            # Start Server
            self.start_local_server()
            
        except Exception as e:
            print(f"⚠️ Could not setup local fallback: {e}")

        # Create Hybrid LangChain Wrapper
        self.llm = HybridLLM(
            groq_client=self.groq_client,
            groq_model=self.groq_model,
            api_url=self.api_url,
            local_server_url=f"http://localhost:{self.server_port}"
        )

    def start_local_server(self):
        """Start llama-server in background"""
        if not self.server_bin or not self.model_path:
            return

        print("πŸš€ Starting llama-server...")
        
        # Setup Env
        env = os.environ.copy()
        lib_paths = [os.path.dirname(self.server_bin)]
        lib_subdir = os.path.join(self.lib_path, "lib")
        if os.path.exists(lib_subdir):
            lib_paths.append(lib_subdir)
        env["LD_LIBRARY_PATH"] = ":".join(lib_paths) + ":" + env.get("LD_LIBRARY_PATH", "")

        cmd = [
            self.server_bin,
            "-m", self.model_path,
            "--port", str(self.server_port),
            "-c", "2048",
            "--host", "0.0.0.0" # Bind to all interfaces for container
        ]
        
        # Launch process
        self.server_process = subprocess.Popen(
            cmd,
            stdout=subprocess.DEVNULL, # Suppress noisy logs
            stderr=subprocess.DEVNULL,
            env=env
        )
        
        # Register cleanup
        atexit.register(self.stop_server)
        
        # Wait for server to be ready
        print("⏳ Waiting for server to be ready...")
        for _ in range(20): # Wait up to 20s
            try:
                requests.get(f"http://localhost:{self.server_port}/health", timeout=1)
                print("βœ… llama-server is ready!")
                return
            except:
                time.sleep(1)
        
        print("⚠️ Server start timed out (but might still be loading).")

    def stop_server(self):
        """Kill the server process"""
        if self.server_process:
            print("πŸ›‘ Stopping llama-server...")
            self.server_process.terminate()
            self.server_process = None

    def analyze(self, text, context_chunks=None):
        """
        Analyze text using LangChain RetrievalQA
        """
        if not self.vector_store:
            return "❌ Vector Store not initialized."

        # Custom Prompt Template
        # Custom Prompt Template - Stricter Format
        template = """<|im_start|>system
        You are CyberGuard - an AI specialized in Phishing Detection.
        Task: Analyze the provided URL and HTML snippet to classify the website as 'PHISHING' or 'BENIGN'.
        Check specifically for BRAND IMPERSONATION (e.g. Facebook, Google, Banks).
        Classification Rules:
        - PHISHING: Typosquatting URLs (e.g., paypa1.com), hidden login forms, obfuscated javascript, mismatched branding vs URL.
        - BENIGN: Legitimate website, clean code, URL matches the content/brand.

        RETURN THE RESULT IN THE EXACT FOLLOWING FORMAT (NO PREAMBLE):
        
        CLASSIFICATION: [PHISHING or BENIGN]
        CONFIDENCE SCORE: [0-100]%
        EXPLANATION: [Write 3-4 concise sentences explaining the main reason]
<|im_end|>
<|im_start|>user
Context from knowledge base:
{context}

Input to analyze:
{question}
<|im_end|>
<|im_start|>assistant
"""
        
        PROMPT = PromptTemplate(
            template=template, 
            input_variables=["context", "question"]
        )

        # Create QA Chain
        qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vector_store.as_retriever(
                search_type="mmr",
                search_kwargs={"k": 3, "fetch_k": 10}
            ),
            chain_type_kwargs={"prompt": PROMPT}
        )

        try:
            print("πŸ€– Generating response...")
            response = qa_chain.invoke(text)
            return response['result']
        except Exception as e:
            return f"❌ Error: {str(e)}"