Spaces:
Paused
Paused
File size: 10,903 Bytes
c800b50 f5de035 3c22ddb f5de035 47c8ad8 3c22ddb f5de035 3c22ddb f5de035 3c22ddb 5c74992 3c22ddb cfcd4e2 f5de035 3c22ddb f5de035 3c22ddb f5de035 3c22ddb f5de035 3c22ddb f5de035 3c22ddb f5de035 c800b50 f5de035 3c22ddb f5de035 e0883f0 3c22ddb f5de035 3c22ddb f5de035 47c8ad8 f5de035 84d4eee 98915c7 f5de035 3c22ddb f5de035 e0883f0 f5de035 84d4eee 671191d 84d4eee f5de035 7374be2 f5de035 7374be2 f5de035 e0883f0 f5de035 84d4eee 3c22ddb 84d4eee a3facd6 086a5af 3c22ddb 086a5af 3c22ddb f5de035 3c22ddb f5de035 84d4eee 1643857 84d4eee f5de035 3c22ddb f5de035 3c22ddb 84d4eee 7374be2 84d4eee 98915c7 7374be2 84d4eee 7374be2 cd3335a 1643857 cd3335a 84d4eee 7374be2 f5de035 84d4eee f5de035 7374be2 3c22ddb e0883f0 7374be2 1643857 f5de035 7374be2 3c22ddb f5de035 c800b50 84d4eee f5de035 84d4eee 1643857 3c22ddb f5de035 47c8ad8 3c22ddb f5de035 47c8ad8 f5de035 086a5af 47d69b8 b24c875 47d69b8 b24c875 f5de035 b24c875 47d69b8 f5de035 ecfaa5e f5de035 47c8ad8 f5de035 47c8ad8 f5de035 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 |
import os
import requests
import subprocess
import tarfile
import stat
import time
import atexit
from huggingface_hub import hf_hub_download
from langchain_core.language_models import LLM
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from typing import Any, List, Optional, Mapping
# --- Helper to Setup llama-server ---
def setup_llama_binaries():
"""
Download and extract llama-server binary and libs from official releases
"""
# Latest release URL for Linux x64 (b4991 equivalent or newer)
CLI_URL = "https://github.com/ggml-org/llama.cpp/releases/download/b7312/llama-b7312-bin-ubuntu-x64.tar.gz"
LOCAL_TAR = "llama-cli.tar.gz"
BIN_DIR = "./llama_bin"
SERVER_BIN = os.path.join(BIN_DIR, "bin/llama-server") # Look for server binary
if os.path.exists(SERVER_BIN):
return SERVER_BIN, BIN_DIR
try:
print("β¬οΈ Downloading llama.cpp binaries...")
response = requests.get(CLI_URL, stream=True)
if response.status_code == 200:
with open(LOCAL_TAR, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print("π¦ Extracting binaries...")
os.makedirs(BIN_DIR, exist_ok=True)
with tarfile.open(LOCAL_TAR, "r:gz") as tar:
tar.extractall(path=BIN_DIR)
# Locate llama-server
found_bin = None
for root, dirs, files in os.walk(BIN_DIR):
if "llama-server" in files:
found_bin = os.path.join(root, "llama-server")
break
if not found_bin:
print("β Could not find llama-server in extracted files.")
return None, None
# Make executable
st = os.stat(found_bin)
os.chmod(found_bin, st.st_mode | stat.S_IEXEC)
print(f"β
llama-server binary ready at {found_bin}!")
return found_bin, BIN_DIR
else:
print(f"β Failed to download binaries: {response.status_code}")
return None, None
except Exception as e:
print(f"β Error setting up llama-server: {e}")
return None, None
# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
class HybridLLM(LLM):
groq_client: Any = None
groq_model: str = "qwen/qwen3-32b"
api_url: str = ""
local_server_url: str = "http://localhost:8080"
@property
def _llm_type(self) -> str:
return "hybrid_llm"
def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
# 1. Try Groq API (Highest Priority)
if self.groq_client:
try:
print("β‘ Using Groq API...")
stop_seq = (stop or []) + ["<|im_end|>", "Input:", "Context:"]
chat_completion = self.groq_client.chat.completions.create(
messages=[
{"role": "user", "content": prompt}
],
model=self.groq_model,
temperature=0.3,
max_tokens=1024,
stop=stop_seq
)
return chat_completion.choices[0].message.content
except Exception as e:
print(f"β οΈ Groq API Failed: {e}")
# Continue to next fallback
# 2. Try Colab API
if self.api_url:
try:
print(f"π Calling Colab API: {self.api_url}")
response = requests.post(
f"{self.api_url}/generate",
json={"prompt": prompt, "max_tokens": 512},
timeout=30
)
if response.status_code == 200:
return response.json()["response"]
else:
print(f"β οΈ API Error {response.status_code}: {response.text}")
except Exception as e:
print(f"β οΈ API Connection Failed: {e}")
# 3. Fallback to Local Server
print("π» Using Local llama-server Fallback...")
try:
# OpenAI-compatible completion endpoint
payload = {
"prompt": prompt,
"n_predict": 1024,
"temperature": 0.3,
"stop": (stop or []) + ["<|im_end|>", "Input:", "Context:"]
}
response = requests.post(
f"{self.local_server_url}/completion",
json=payload,
timeout=300
)
if response.status_code == 200:
return response.json()["content"]
else:
return f"β Local Server Error: {response.text}"
except Exception as e:
return f"β Local Inference Failed: {e}"
return "β Error: No working LLM available."
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {
"groq_enabled": self.groq_client is not None,
"groq_model": self.groq_model,
"api_url": self.api_url,
"local_server_url": self.local_server_url
}
class LLMClient:
def __init__(self, vector_store=None):
"""
Initialize Hybrid LLM Client with Persistent Server
"""
self.vector_store = vector_store
self.api_url = os.environ.get("COLAB_API_URL", "")
self.server_process = None
self.server_port = 8080
self.groq_client = None
# 1. Setup Groq Client
groq_api_key = os.environ.get("GROQ_API_KEY")
self.groq_model = "qwen/qwen3-32b"
if groq_api_key:
try:
from groq import Groq
print(f"β‘ Initializing Native Groq Client ({self.groq_model})...")
self.groq_client = Groq(api_key=groq_api_key)
print("β
Groq Client ready.")
except Exception as e:
print(f"β οΈ Groq Init Failed: {e}")
# 2. Setup Local Fallback (Always setup as requested)
try:
# Setup Binary
self.server_bin, self.lib_path = setup_llama_binaries()
# Download Model (Qwen3-0.6B)
print("οΏ½ Loading Local Qwen3-0.6B (GGUF)...")
model_repo = "Qwen/Qwen3-0.6B-GGUF"
filename = "Qwen3-0.6B-Q8_0.gguf"
self.model_path = hf_hub_download(
repo_id=model_repo,
filename=filename
)
print(f"β
Model downloaded to: {self.model_path}")
# Start Server
self.start_local_server()
except Exception as e:
print(f"β οΈ Could not setup local fallback: {e}")
# Create Hybrid LangChain Wrapper
self.llm = HybridLLM(
groq_client=self.groq_client,
groq_model=self.groq_model,
api_url=self.api_url,
local_server_url=f"http://localhost:{self.server_port}"
)
def start_local_server(self):
"""Start llama-server in background"""
if not self.server_bin or not self.model_path:
return
print("π Starting llama-server...")
# Setup Env
env = os.environ.copy()
lib_paths = [os.path.dirname(self.server_bin)]
lib_subdir = os.path.join(self.lib_path, "lib")
if os.path.exists(lib_subdir):
lib_paths.append(lib_subdir)
env["LD_LIBRARY_PATH"] = ":".join(lib_paths) + ":" + env.get("LD_LIBRARY_PATH", "")
cmd = [
self.server_bin,
"-m", self.model_path,
"--port", str(self.server_port),
"-c", "2048",
"--host", "0.0.0.0" # Bind to all interfaces for container
]
# Launch process
self.server_process = subprocess.Popen(
cmd,
stdout=subprocess.DEVNULL, # Suppress noisy logs
stderr=subprocess.DEVNULL,
env=env
)
# Register cleanup
atexit.register(self.stop_server)
# Wait for server to be ready
print("β³ Waiting for server to be ready...")
for _ in range(20): # Wait up to 20s
try:
requests.get(f"http://localhost:{self.server_port}/health", timeout=1)
print("β
llama-server is ready!")
return
except:
time.sleep(1)
print("β οΈ Server start timed out (but might still be loading).")
def stop_server(self):
"""Kill the server process"""
if self.server_process:
print("π Stopping llama-server...")
self.server_process.terminate()
self.server_process = None
def analyze(self, text, context_chunks=None):
"""
Analyze text using LangChain RetrievalQA
"""
if not self.vector_store:
return "β Vector Store not initialized."
# Custom Prompt Template
# Custom Prompt Template - Stricter Format
template = """<|im_start|>system
You are CyberGuard - an AI specialized in Phishing Detection.
Task: Analyze the provided URL and HTML snippet to classify the website as 'PHISHING' or 'BENIGN'.
Check specifically for BRAND IMPERSONATION (e.g. Facebook, Google, Banks).
Classification Rules:
- PHISHING: Typosquatting URLs (e.g., paypa1.com), hidden login forms, obfuscated javascript, mismatched branding vs URL.
- BENIGN: Legitimate website, clean code, URL matches the content/brand.
RETURN THE RESULT IN THE EXACT FOLLOWING FORMAT (NO PREAMBLE):
CLASSIFICATION: [PHISHING or BENIGN]
CONFIDENCE SCORE: [0-100]%
EXPLANATION: [Write 3-4 concise sentences explaining the main reason]
<|im_end|>
<|im_start|>user
Context from knowledge base:
{context}
Input to analyze:
{question}
<|im_end|>
<|im_start|>assistant
"""
PROMPT = PromptTemplate(
template=template,
input_variables=["context", "question"]
)
# Create QA Chain
qa_chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=self.vector_store.as_retriever(
search_type="mmr",
search_kwargs={"k": 3, "fetch_k": 10}
),
chain_type_kwargs={"prompt": PROMPT}
)
try:
print("π€ Generating response...")
response = qa_chain.invoke(text)
return response['result']
except Exception as e:
return f"β Error: {str(e)}"
|