Spaces:
Paused
Paused
| import os | |
| import requests | |
| from huggingface_hub import hf_hub_download | |
| from langchain.llms.base import LLM | |
| from langchain.chains import RetrievalQA | |
| from langchain_core.prompts import PromptTemplate | |
| from typing import Any, List, Optional, Mapping | |
| # --- Custom LangChain LLM Wrapper for Hybrid Approach --- | |
| class HybridLLM(LLM): | |
| api_url: str = "" | |
| local_llm: Any = None | |
| def _llm_type(self) -> str: | |
| return "hybrid_llm" | |
| def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str: | |
| # 1. Try Colab API first | |
| if self.api_url: | |
| try: | |
| print(f"๐ Calling Colab API: {self.api_url}") | |
| response = requests.post( | |
| f"{self.api_url}/generate", | |
| json={"prompt": prompt, "max_tokens": 512}, | |
| timeout=30 # 30s timeout | |
| ) | |
| if response.status_code == 200: | |
| return response.json()["response"] | |
| else: | |
| print(f"โ ๏ธ API Error {response.status_code}: {response.text}") | |
| except Exception as e: | |
| print(f"โ ๏ธ API Connection Failed: {e}") | |
| # 2. Fallback to Local GGUF | |
| if self.local_llm: | |
| print("๐ป Using Local GGUF Fallback...") | |
| # Llama-cpp-python expects prompt in specific format or raw | |
| # We'll pass the prompt directly | |
| output = self.local_llm( | |
| prompt, | |
| max_tokens=512, | |
| stop=["<|im_end|>", "User:", "Input:"], | |
| echo=False | |
| ) | |
| return output['choices'][0]['text'] | |
| return "โ Error: No working LLM available (API failed and no local model)." | |
| def _identifying_params(self) -> Mapping[str, Any]: | |
| return {"api_url": self.api_url} | |
| class LLMClient: | |
| def __init__(self, vector_store=None): | |
| """ | |
| Initialize Hybrid LLM Client | |
| """ | |
| self.vector_store = vector_store | |
| self.api_url = os.environ.get("COLAB_API_URL", "") # Get from Env Var | |
| self.local_llm = None | |
| # Initialize Local GGUF (always load as backup or if API missing) | |
| # We load it lazily or eagerly depending on memory. | |
| # Since user has 16GB RAM, we can load a 2B model easily. | |
| try: | |
| print("๐ Loading Local Qwen3-VL-2B-Thinking (GGUF)...") | |
| from llama_cpp import Llama | |
| model_name = "Qwen/Qwen2.5-VL-3B-Thinking-GGUF" # Fallback to a known working GGUF if Qwen3 not found, but user asked for Qwen3 | |
| # NOTE: As of now, Qwen3-VL GGUF might be under a specific repo. | |
| # Let's use a generic search or specific path if known. | |
| # User specified: Qwen/Qwen3-VL-2B-Thinking-GGUF | |
| # We will try to download it. | |
| repo_id = "Qwen/Qwen3-VL-2B-Thinking-GGUF" | |
| model_repo = "Qwen/Qwen3-VL-2B-Thinking-GGUF" | |
| filename = "Qwen3VL-2B-Thinking-Q4_K_M.gguf" | |
| model_path = hf_hub_download( | |
| repo_id=model_repo, | |
| filename=filename | |
| ) | |
| self.local_llm = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, | |
| n_threads=2, # Use 2 vCPUs | |
| verbose=False | |
| ) | |
| print("โ Local GGUF Model Ready!") | |
| except Exception as e: | |
| print(f"โ ๏ธ Could not load local GGUF: {e}") | |
| # Create Hybrid LangChain Wrapper | |
| self.llm = HybridLLM(api_url=self.api_url, local_llm=self.local_llm) | |
| def analyze(self, text, context_chunks=None): | |
| """ | |
| Analyze text using LangChain RetrievalQA | |
| """ | |
| if not self.vector_store: | |
| return "โ Vector Store not initialized." | |
| # Custom Prompt Template | |
| template = """<|im_start|>system | |
| You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe). | |
| Respond in the following format: | |
| LABEL: [PHISHING or BENIGN] | |
| EXPLANATION: [A brief Vietnamese explanation] | |
| Context: | |
| {context} | |
| <|im_end|> | |
| <|im_start|>user | |
| Input: | |
| {question} | |
| Short Analysis: | |
| <|im_end|> | |
| <|im_start|>assistant | |
| """ | |
| PROMPT = PromptTemplate( | |
| template=template, | |
| input_variables=["context", "question"] | |
| ) | |
| # Create QA Chain | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=self.llm, | |
| chain_type="stuff", | |
| retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}), | |
| chain_type_kwargs={"prompt": PROMPT} | |
| ) | |
| try: | |
| print("๐ค Generating response...") | |
| response = qa_chain.invoke(text) | |
| return response['result'] | |
| except Exception as e: | |
| return f"โ Error: {str(e)}" | |