File size: 4,964 Bytes
c800b50
 
 
 
47c8ad8
 
c800b50
47c8ad8
c800b50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47c8ad8
c800b50
 
 
 
 
 
 
 
 
 
 
 
47c8ad8
c800b50
47c8ad8
c800b50
 
 
47c8ad8
c800b50
 
 
 
 
47c8ad8
c800b50
 
47c8ad8
c800b50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47c8ad8
 
 
c800b50
47c8ad8
 
 
 
 
c800b50
 
 
cfc9559
c800b50
47c8ad8
 
 
c800b50
 
c7555aa
47c8ad8
 
c800b50
 
 
 
47c8ad8
 
 
 
 
 
 
 
 
 
c800b50
47c8ad8
 
 
 
c800b50
47c8ad8
 
 
c800b50
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import requests
from huggingface_hub import hf_hub_download
from langchain.llms.base import LLM
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from typing import Any, List, Optional, Mapping

# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
class HybridLLM(LLM):
    api_url: str = ""
    local_llm: Any = None
    
    @property
    def _llm_type(self) -> str:
        return "hybrid_llm"

    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
        # 1. Try Colab API first
        if self.api_url:
            try:
                print(f"🌐 Calling Colab API: {self.api_url}")
                response = requests.post(
                    f"{self.api_url}/generate",
                    json={"prompt": prompt, "max_tokens": 512},
                    timeout=30  # 30s timeout
                )
                if response.status_code == 200:
                    return response.json()["response"]
                else:
                    print(f"⚠️ API Error {response.status_code}: {response.text}")
            except Exception as e:
                print(f"⚠️ API Connection Failed: {e}")
        
        # 2. Fallback to Local GGUF
        if self.local_llm:
            print("πŸ’» Using Local GGUF Fallback...")
            # Llama-cpp-python expects prompt in specific format or raw
            # We'll pass the prompt directly
            output = self.local_llm(
                prompt, 
                max_tokens=512, 
                stop=["<|im_end|>", "User:", "Input:"], 
                echo=False
            )
            return output['choices'][0]['text']
        
        return "❌ Error: No working LLM available (API failed and no local model)."

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"api_url": self.api_url}

class LLMClient:
    def __init__(self, vector_store=None):
        """
        Initialize Hybrid LLM Client
        """
        self.vector_store = vector_store
        self.api_url = os.environ.get("COLAB_API_URL", "") # Get from Env Var
        self.local_llm = None
        
        # Initialize Local GGUF (always load as backup or if API missing)
        # We load it lazily or eagerly depending on memory. 
        # Since user has 16GB RAM, we can load a 2B model easily.
        try:
            print("πŸ“‚ Loading Local Qwen3-VL-2B-Thinking (GGUF)...")
            from llama_cpp import Llama
            
            model_name = "Qwen/Qwen2.5-VL-3B-Thinking-GGUF" # Fallback to a known working GGUF if Qwen3 not found, but user asked for Qwen3
            # NOTE: As of now, Qwen3-VL GGUF might be under a specific repo. 
            # Let's use a generic search or specific path if known.
            # User specified: Qwen/Qwen3-VL-2B-Thinking-GGUF
            # We will try to download it.
            
            repo_id = "Qwen/Qwen3-VL-2B-Thinking-GGUF" 
            
            model_repo = "Qwen/Qwen3-VL-2B-Thinking-GGUF" 
            filename = "Qwen3VL-2B-Thinking-Q4_K_M.gguf"
            
            model_path = hf_hub_download(
                repo_id=model_repo, 
                filename=filename
            )
            
            self.local_llm = Llama(
                model_path=model_path,
                n_ctx=2048,
                n_threads=2, # Use 2 vCPUs
                verbose=False
            )
            print("βœ… Local GGUF Model Ready!")
            
        except Exception as e:
            print(f"⚠️ Could not load local GGUF: {e}")

        # Create Hybrid LangChain Wrapper
        self.llm = HybridLLM(api_url=self.api_url, local_llm=self.local_llm)

    def analyze(self, text, context_chunks=None):
        """
        Analyze text using LangChain RetrievalQA
        """
        if not self.vector_store:
            return "❌ Vector Store not initialized."

        # Custom Prompt Template
        template = """<|im_start|>system
You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe). 
Respond in the following format:
LABEL: [PHISHING or BENIGN]
EXPLANATION: [A brief Vietnamese explanation]

Context:
{context}
<|im_end|>
<|im_start|>user
Input:
{question}

Short Analysis:
<|im_end|>
<|im_start|>assistant
"""
        
        PROMPT = PromptTemplate(
            template=template, 
            input_variables=["context", "question"]
        )

        # Create QA Chain
        qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
            chain_type_kwargs={"prompt": PROMPT}
        )

        try:
            print("πŸ€– Generating response...")
            response = qa_chain.invoke(text)
            return response['result']
        except Exception as e:
            return f"❌ Error: {str(e)}"