PhishingTest

Paused

App Files Files Community

dungeon29 commited on 11 days ago

Commit

c800b50

verified ·

1 Parent(s): 6c05eaf

Using GGUF model

Browse files

Files changed (1) hide show

llm_client.py +103 -55

llm_client.py CHANGED Viewed

@@ -1,96 +1,144 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-from langchain_huggingface import HuggingFacePipeline
 from langchain.chains import RetrievalQA
 from langchain_core.prompts import PromptTemplate
-import torch
-class LLMClient:
-    def __init__(self, vector_store=None):
-        """
-        Initialize Qwen2.5-3B-Instruct with LangChain
-        """
-        print("🔷 Loading Qwen2.5-3B-Instruct (LangChain)...")
-        model_name = "Qwen/Qwen2.5-1.5B-Instruct"
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.bfloat16,
-            device_map="cpu",
-            low_cpu_mem_usage=True,
-            trust_remote_code=True
-        )
-        # Create HF Pipeline
-        pipe = pipeline(
-            "text-generation",
-            model=self.model,
-            tokenizer=self.tokenizer,
-            max_new_tokens=256,
-            temperature=0.3,
-            top_p=0.9,
-            repetition_penalty=1.1,
-            do_sample=True,
-            return_full_text=False
-        )
-        self.llm = HuggingFacePipeline(pipeline=pipe)
         self.vector_store = vector_store
-        print("✅ LLM Client Ready!")
     def analyze(self, text, context_chunks=None):
         """
-        Analyze text using LangChain RetrievalQA with MMR
         """
         if not self.vector_store:
             return "❌ Vector Store not initialized."
         # Custom Prompt Template
-        template = """You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe). Respond in the following format:
 LABEL: [PHISHING or BENIGN]
-PREDICTION: [Probability of LABEL]
-EXPLANATION: [A brief Vietnamese explanation of a few sentences for reason]
 Context:
 {context}
 Input:
 {question}
-Short Analysis:"""
         PROMPT = PromptTemplate(
             template=template,
             input_variables=["context", "question"]
         )
-        retriever_config = {
-            "k": 3,
-            "fetch_k": 10,
-            "lambda_mult": 0.6
-        }
         # Create QA Chain
         qa_chain = RetrievalQA.from_chain_type(
             llm=self.llm,
             chain_type="stuff",
-            retriever=self.vector_store.as_retriever(
-                search_type="mmr",
-                search_kwargs=retriever_config
-            ),
             chain_type_kwargs={"prompt": PROMPT}
         )
         try:
-            print("🤖 Generating response (LangChain + MMR)...")
             response = qa_chain.invoke(text)
-            # Explicit Garbage Collection
-            import gc
-            gc.collect()
             return response['result']
         except Exception as e:
-            return f"❌ Error: {str(e)}"

+import os
+import requests
+from huggingface_hub import hf_hub_download
+from langchain.llms.base import LLM
 from langchain.chains import RetrievalQA
 from langchain_core.prompts import PromptTemplate
+from typing import Any, List, Optional, Mapping
+# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
+class HybridLLM(LLM):
+    api_url: str = ""
+    local_llm: Any = None
+    @property
+    def _llm_type(self) -> str:
+        return "hybrid_llm"
+    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
+        # 1. Try Colab API first
+        if self.api_url:
+            try:
+                print(f"🌐 Calling Colab API: {self.api_url}")
+                response = requests.post(
+                    f"{self.api_url}/generate",
+                    json={"prompt": prompt, "max_tokens": 512},
+                    timeout=30  # 30s timeout
+                )
+                if response.status_code == 200:
+                    return response.json()["response"]
+                else:
+                    print(f"⚠️ API Error {response.status_code}: {response.text}")
+            except Exception as e:
+                print(f"⚠️ API Connection Failed: {e}")
+        # 2. Fallback to Local GGUF
+        if self.local_llm:
+            print("💻 Using Local GGUF Fallback...")
+            # Llama-cpp-python expects prompt in specific format or raw
+            # We'll pass the prompt directly
+            output = self.local_llm(
+                prompt,
+                max_tokens=512,
+                stop=["<|im_end|>", "User:", "Input:"],
+                echo=False
+            )
+            return output['choices'][0]['text']
+        return "❌ Error: No working LLM available (API failed and no local model)."
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        return {"api_url": self.api_url}
+class LLMClient:
+    def __init__(self, vector_store=None):
+        """
+        Initialize Hybrid LLM Client
+        """
         self.vector_store = vector_store
+        self.api_url = os.environ.get("COLAB_API_URL", "") # Get from Env Var
+        self.local_llm = None
+        # Initialize Local GGUF (always load as backup or if API missing)
+        # We load it lazily or eagerly depending on memory.
+        # Since user has 16GB RAM, we can load a 2B model easily.
+        try:
+            print("📂 Loading Local Qwen3-VL-2B-Thinking (GGUF)...")
+            from llama_cpp import Llama
+            model_name = "Qwen/Qwen2.5-VL-3B-Thinking-GGUF" # Fallback to a known working GGUF if Qwen3 not found, but user asked for Qwen3
+            # NOTE: As of now, Qwen3-VL GGUF might be under a specific repo.
+            # Let's use a generic search or specific path if known.
+            # User specified: Qwen/Qwen3-VL-2B-Thinking-GGUF
+            # We will try to download it.
+            repo_id = "Qwen/Qwen3-VL-2B-Thinking-GGUF"
+            model_repo = "Qwen/Qwen3-VL-2B-Thinking-GGUF"
+            filename = "Qwen3VL-2B-Thinking-Q4_K_M.gguf"
+            model_path = hf_hub_download(
+                repo_id=model_repo,
+                filename=filename
+            )
+            self.local_llm = Llama(
+                model_path=model_path,
+                n_ctx=2048,
+                n_threads=2, # Use 2 vCPUs
+                verbose=False
+            )
+            print("✅ Local GGUF Model Ready!")
+        except Exception as e:
+            print(f"⚠️ Could not load local GGUF: {e}")
+        # Create Hybrid LangChain Wrapper
+        self.llm = HybridLLM(api_url=self.api_url, local_llm=self.local_llm)
     def analyze(self, text, context_chunks=None):
         """
+        Analyze text using LangChain RetrievalQA
         """
         if not self.vector_store:
             return "❌ Vector Store not initialized."
         # Custom Prompt Template
+        template = """<|im_start|>system
+You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe).
+Respond in the following format:
 LABEL: [PHISHING or BENIGN]
+EXPLANATION: [A brief Vietnamese explanation]
 Context:
 {context}
+<|im_end|>
+<|im_start|>user
 Input:
 {question}
+Short Analysis:
+<|im_end|>
+<|im_start|>assistant
+"""
         PROMPT = PromptTemplate(
             template=template,
             input_variables=["context", "question"]
         )
         # Create QA Chain
         qa_chain = RetrievalQA.from_chain_type(
             llm=self.llm,
             chain_type="stuff",
+            retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
             chain_type_kwargs={"prompt": PROMPT}
         )
         try:
+            print("🤖 Generating response...")
             response = qa_chain.invoke(text)
             return response['result']
         except Exception as e:
+            return f"❌ Error: {str(e)}"