PhishingTest

Paused

App Files Files Community

PhishingTest / llm_client.py

dungeon29

Update llm_client.py

a3facd6 verified 12 days ago

raw

history blame

8.56 kB

	import os
	import requests
	import subprocess
	import tarfile
	import stat
	import time
	import atexit
	from huggingface_hub import hf_hub_download
	from langchain_core.language_models import LLM
	from langchain.chains import RetrievalQA
	from langchain_core.prompts import PromptTemplate
	from typing import Any, List, Optional, Mapping

	# --- Helper to Setup llama-server ---
	def setup_llama_binaries():
	"""
	Download and extract llama-server binary and libs from official releases
	"""
	# Latest release URL for Linux x64 (b4991 equivalent or newer)
	CLI_URL = "https://github.com/ggml-org/llama.cpp/releases/download/b7312/llama-b7312-bin-ubuntu-x64.tar.gz"
	LOCAL_TAR = "llama-cli.tar.gz"
	BIN_DIR = "./llama_bin"
	SERVER_BIN = os.path.join(BIN_DIR, "bin/llama-server") # Look for server binary

	if os.path.exists(SERVER_BIN):
	return SERVER_BIN, BIN_DIR

	try:
	print("⬇️ Downloading llama.cpp binaries...")
	response = requests.get(CLI_URL, stream=True)
	if response.status_code == 200:
	with open(LOCAL_TAR, 'wb') as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)

	print("📦 Extracting binaries...")
	os.makedirs(BIN_DIR, exist_ok=True)

	with tarfile.open(LOCAL_TAR, "r:gz") as tar:
	tar.extractall(path=BIN_DIR)

	# Locate llama-server
	found_bin = None
	for root, dirs, files in os.walk(BIN_DIR):
	if "llama-server" in files:
	found_bin = os.path.join(root, "llama-server")
	break

	if not found_bin:
	print("❌ Could not find llama-server in extracted files.")
	return None, None

	# Make executable
	st = os.stat(found_bin)
	os.chmod(found_bin, st.st_mode \| stat.S_IEXEC)
	print(f"✅ llama-server binary ready at {found_bin}!")
	return found_bin, BIN_DIR
	else:
	print(f"❌ Failed to download binaries: {response.status_code}")
	return None, None
	except Exception as e:
	print(f"❌ Error setting up llama-server: {e}")
	return None, None

	# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
	class HybridLLM(LLM):
	api_url: str = ""
	local_server_url: str = "http://localhost:8080"

	@property
	def _llm_type(self) -> str:
	return "hybrid_llm"

	def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
	# 1. Try Colab API first
	if self.api_url:
	try:
	print(f"🌐 Calling Colab API: {self.api_url}")
	response = requests.post(
	f"{self.api_url}/generate",
	json={"prompt": prompt, "max_tokens": 512},
	timeout=30
	)
	if response.status_code == 200:
	return response.json()["response"]
	else:
	print(f"⚠️ API Error {response.status_code}: {response.text}")
	except Exception as e:
	print(f"⚠️ API Connection Failed: {e}")

	# 2. Fallback to Local Server
	print("💻 Using Local llama-server Fallback...")
	try:
	# OpenAI-compatible completion endpoint
	payload = {
	"prompt": prompt,
	"n_predict": 256,
	"temperature": 0.3,
	"stop": stop or []
	}
	response = requests.post(
	f"{self.local_server_url}/completion",
	json=payload,
	timeout=120
	)
	if response.status_code == 200:
	return response.json()["content"]
	else:
	return f"❌ Local Server Error: {response.text}"
	except Exception as e:
	return f"❌ Local Inference Failed: {e}"

	return "❌ Error: No working LLM available."

	@property
	def _identifying_params(self) -> Mapping[str, Any]:
	return {"api_url": self.api_url, "local_server_url": self.local_server_url}

	class LLMClient:
	def __init__(self, vector_store=None):
	"""
	Initialize Hybrid LLM Client with Persistent Server
	"""
	self.vector_store = vector_store
	self.api_url = os.environ.get("COLAB_API_URL", "")
	self.server_process = None
	self.server_port = 8080

	# Setup Local Fallback
	try:
	# 1. Setup Binary
	self.server_bin, self.lib_path = setup_llama_binaries()

	# 2. Download Model (Qwen3-0.6B)
	print("📂 Loading Local Qwen3-0.6B (GGUF)...")
	model_repo = "Qwen/Qwen3-0.6B-GGUF"
	filename = "Qwen3-0.6B-Q8_0.gguf"

	self.model_path = hf_hub_download(
	repo_id=model_repo,
	filename=filename
	)
	print(f"✅ Model downloaded to: {self.model_path}")

	# 3. Start Server
	self.start_local_server()

	except Exception as e:
	print(f"⚠️ Could not setup local fallback: {e}")

	# Create Hybrid LangChain Wrapper
	self.llm = HybridLLM(
	api_url=self.api_url,
	local_server_url=f"http://localhost:{self.server_port}"
	)

	def start_local_server(self):
	"""Start llama-server in background"""
	if not self.server_bin or not self.model_path:
	return

	print("🚀 Starting llama-server...")

	# Setup Env
	env = os.environ.copy()
	lib_paths = [os.path.dirname(self.server_bin)]
	lib_subdir = os.path.join(self.lib_path, "lib")
	if os.path.exists(lib_subdir):
	lib_paths.append(lib_subdir)
	env["LD_LIBRARY_PATH"] = ":".join(lib_paths) + ":" + env.get("LD_LIBRARY_PATH", "")

	cmd = [
	self.server_bin,
	"-m", self.model_path,
	"--port", str(self.server_port),
	"-c", "2048",
	"--host", "0.0.0.0" # Bind to all interfaces for container
	]

	# Launch process
	self.server_process = subprocess.Popen(
	cmd,
	stdout=subprocess.DEVNULL, # Suppress noisy logs
	stderr=subprocess.DEVNULL,
	env=env
	)

	# Register cleanup
	atexit.register(self.stop_server)

	# Wait for server to be ready
	print("⏳ Waiting for server to be ready...")
	for _ in range(20): # Wait up to 20s
	try:
	requests.get(f"http://localhost:{self.server_port}/health", timeout=1)
	print("✅ llama-server is ready!")
	return
	except:
	time.sleep(1)

	print("⚠️ Server start timed out (but might still be loading).")

	def stop_server(self):
	"""Kill the server process"""
	if self.server_process:
	print("🛑 Stopping llama-server...")
	self.server_process.terminate()
	self.server_process = None

	def analyze(self, text, context_chunks=None):
	"""
	Analyze text using LangChain RetrievalQA
	"""
	if not self.vector_store:
	return "❌ Vector Store not initialized."

	# Custom Prompt Template
	template = """<\|im_start\|>system
	You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe).
	Respond in the following format:
	LABEL: [PHISHING or BENIGN]
	EXPLANATION: [A brief Vietnamese explanation]

	Context:
	{context}
	<\|im_end\|>
	<\|im_start\|>user
	Input:
	{question}

	Short Analysis:
	<\|im_end\|>
	<\|im_start\|>assistant
	"""

	PROMPT = PromptTemplate(
	template=template,
	input_variables=["context", "question"]
	)

	# Create QA Chain
	qa_chain = RetrievalQA.from_chain_type(
	llm=self.llm,
	chain_type="stuff",
	retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
	chain_type_kwargs={"prompt": PROMPT}
	)

	try:
	print("🤖 Generating response...")
	response = qa_chain.invoke(text)
	return response['result']
	except Exception as e:
	return f"❌ Error: {str(e)}"