Spaces:
Paused
Paused
File size: 4,964 Bytes
c800b50 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 cfc9559 c800b50 47c8ad8 c800b50 c7555aa 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 47c8ad8 c800b50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import os
import requests
from huggingface_hub import hf_hub_download
from langchain.llms.base import LLM
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from typing import Any, List, Optional, Mapping
# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
class HybridLLM(LLM):
api_url: str = ""
local_llm: Any = None
@property
def _llm_type(self) -> str:
return "hybrid_llm"
def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
# 1. Try Colab API first
if self.api_url:
try:
print(f"π Calling Colab API: {self.api_url}")
response = requests.post(
f"{self.api_url}/generate",
json={"prompt": prompt, "max_tokens": 512},
timeout=30 # 30s timeout
)
if response.status_code == 200:
return response.json()["response"]
else:
print(f"β οΈ API Error {response.status_code}: {response.text}")
except Exception as e:
print(f"β οΈ API Connection Failed: {e}")
# 2. Fallback to Local GGUF
if self.local_llm:
print("π» Using Local GGUF Fallback...")
# Llama-cpp-python expects prompt in specific format or raw
# We'll pass the prompt directly
output = self.local_llm(
prompt,
max_tokens=512,
stop=["<|im_end|>", "User:", "Input:"],
echo=False
)
return output['choices'][0]['text']
return "β Error: No working LLM available (API failed and no local model)."
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {"api_url": self.api_url}
class LLMClient:
def __init__(self, vector_store=None):
"""
Initialize Hybrid LLM Client
"""
self.vector_store = vector_store
self.api_url = os.environ.get("COLAB_API_URL", "") # Get from Env Var
self.local_llm = None
# Initialize Local GGUF (always load as backup or if API missing)
# We load it lazily or eagerly depending on memory.
# Since user has 16GB RAM, we can load a 2B model easily.
try:
print("π Loading Local Qwen3-VL-2B-Thinking (GGUF)...")
from llama_cpp import Llama
model_name = "Qwen/Qwen2.5-VL-3B-Thinking-GGUF" # Fallback to a known working GGUF if Qwen3 not found, but user asked for Qwen3
# NOTE: As of now, Qwen3-VL GGUF might be under a specific repo.
# Let's use a generic search or specific path if known.
# User specified: Qwen/Qwen3-VL-2B-Thinking-GGUF
# We will try to download it.
repo_id = "Qwen/Qwen3-VL-2B-Thinking-GGUF"
model_repo = "Qwen/Qwen3-VL-2B-Thinking-GGUF"
filename = "Qwen3VL-2B-Thinking-Q4_K_M.gguf"
model_path = hf_hub_download(
repo_id=model_repo,
filename=filename
)
self.local_llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=2, # Use 2 vCPUs
verbose=False
)
print("β
Local GGUF Model Ready!")
except Exception as e:
print(f"β οΈ Could not load local GGUF: {e}")
# Create Hybrid LangChain Wrapper
self.llm = HybridLLM(api_url=self.api_url, local_llm=self.local_llm)
def analyze(self, text, context_chunks=None):
"""
Analyze text using LangChain RetrievalQA
"""
if not self.vector_store:
return "β Vector Store not initialized."
# Custom Prompt Template
template = """<|im_start|>system
You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe).
Respond in the following format:
LABEL: [PHISHING or BENIGN]
EXPLANATION: [A brief Vietnamese explanation]
Context:
{context}
<|im_end|>
<|im_start|>user
Input:
{question}
Short Analysis:
<|im_end|>
<|im_start|>assistant
"""
PROMPT = PromptTemplate(
template=template,
input_variables=["context", "question"]
)
# Create QA Chain
qa_chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
chain_type_kwargs={"prompt": PROMPT}
)
try:
print("π€ Generating response...")
response = qa_chain.invoke(text)
return response['result']
except Exception as e:
return f"β Error: {str(e)}"
|