Spaces:
Paused
Paused
File size: 8,563 Bytes
c800b50 f5de035 3c22ddb f5de035 47c8ad8 3c22ddb f5de035 3c22ddb f5de035 3c22ddb 5c74992 3c22ddb cfcd4e2 f5de035 3c22ddb f5de035 3c22ddb f5de035 3c22ddb f5de035 3c22ddb f5de035 3c22ddb f5de035 c800b50 f5de035 3c22ddb f5de035 e0883f0 3c22ddb f5de035 3c22ddb f5de035 47c8ad8 f5de035 3c22ddb f5de035 e0883f0 f5de035 e0883f0 f5de035 3c22ddb a3facd6 3c22ddb f952f21 3c22ddb f5de035 3c22ddb f5de035 3c22ddb f5de035 3c22ddb f5de035 3c22ddb f5de035 3c22ddb e0883f0 f5de035 3c22ddb f5de035 c800b50 f5de035 3c22ddb f5de035 47c8ad8 3c22ddb f5de035 47c8ad8 f5de035 47c8ad8 f5de035 47c8ad8 f5de035 |
|
import os
import requests
import subprocess
import tarfile
import stat
import time
import atexit
from huggingface_hub import hf_hub_download
from langchain_core.language_models import LLM
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from typing import Any, List, Optional, Mapping
# --- Helper to Setup llama-server ---
def setup_llama_binaries():
"""
Download and extract llama-server binary and libs from official releases
"""
# Latest release URL for Linux x64 (b4991 equivalent or newer)
CLI_URL = "https://github.com/ggml-org/llama.cpp/releases/download/b7312/llama-b7312-bin-ubuntu-x64.tar.gz"
LOCAL_TAR = "llama-cli.tar.gz"
BIN_DIR = "./llama_bin"
SERVER_BIN = os.path.join(BIN_DIR, "bin/llama-server") # Look for server binary
if os.path.exists(SERVER_BIN):
return SERVER_BIN, BIN_DIR
try:
print("β¬οΈ Downloading llama.cpp binaries...")
response = requests.get(CLI_URL, stream=True)
if response.status_code == 200:
with open(LOCAL_TAR, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
print("π¦ Extracting binaries...")
os.makedirs(BIN_DIR, exist_ok=True)
with tarfile.open(LOCAL_TAR, "r:gz") as tar:
tar.extractall(path=BIN_DIR)
# Locate llama-server
found_bin = None
for root, dirs, files in os.walk(BIN_DIR):
if "llama-server" in files:
found_bin = os.path.join(root, "llama-server")
break
if not found_bin:
print("β Could not find llama-server in extracted files.")
return None, None
# Make executable
st = os.stat(found_bin)
os.chmod(found_bin, st.st_mode | stat.S_IEXEC)
print(f"β
llama-server binary ready at {found_bin}!")
return found_bin, BIN_DIR
else:
print(f"β Failed to download binaries: {response.status_code}")
return None, None
except Exception as e:
print(f"β Error setting up llama-server: {e}")
return None, None
# --- Custom LangChain LLM Wrapper for Hybrid Approach ---
class HybridLLM(LLM):
api_url: str = ""
local_server_url: str = "http://localhost:8080"
@property
def _llm_type(self) -> str:
return "hybrid_llm"
def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
# 1. Try Colab API first
if self.api_url:
try:
print(f"π Calling Colab API: {self.api_url}")
response = requests.post(
f"{self.api_url}/generate",
json={"prompt": prompt, "max_tokens": 512},
timeout=30
)
if response.status_code == 200:
return response.json()["response"]
else:
print(f"β οΈ API Error {response.status_code}: {response.text}")
except Exception as e:
print(f"β οΈ API Connection Failed: {e}")
# 2. Fallback to Local Server
print("π» Using Local llama-server Fallback...")
try:
# OpenAI-compatible completion endpoint
payload = {
"prompt": prompt,
"n_predict": 256,
"temperature": 0.3,
"stop": stop or []
}
response = requests.post(
f"{self.local_server_url}/completion",
json=payload,
timeout=120
)
if response.status_code == 200:
return response.json()["content"]
else:
return f"β Local Server Error: {response.text}"
except Exception as e:
return f"β Local Inference Failed: {e}"
return "β Error: No working LLM available."
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {"api_url": self.api_url, "local_server_url": self.local_server_url}
class LLMClient:
def __init__(self, vector_store=None):
"""
Initialize Hybrid LLM Client with Persistent Server
"""
self.vector_store = vector_store
self.api_url = os.environ.get("COLAB_API_URL", "")
self.server_process = None
self.server_port = 8080
# Setup Local Fallback
try:
# 1. Setup Binary
self.server_bin, self.lib_path = setup_llama_binaries()
# 2. Download Model (Qwen3-0.6B)
print("π Loading Local Qwen3-0.6B (GGUF)...")
model_repo = "Qwen/Qwen3-0.6B-GGUF"
filename = "Qwen3-0.6B-Q8_0.gguf"
self.model_path = hf_hub_download(
repo_id=model_repo,
filename=filename
)
print(f"β
Model downloaded to: {self.model_path}")
# 3. Start Server
self.start_local_server()
except Exception as e:
print(f"β οΈ Could not setup local fallback: {e}")
# Create Hybrid LangChain Wrapper
self.llm = HybridLLM(
api_url=self.api_url,
local_server_url=f"http://localhost:{self.server_port}"
)
def start_local_server(self):
"""Start llama-server in background"""
if not self.server_bin or not self.model_path:
return
print("π Starting llama-server...")
# Setup Env
env = os.environ.copy()
lib_paths = [os.path.dirname(self.server_bin)]
lib_subdir = os.path.join(self.lib_path, "lib")
if os.path.exists(lib_subdir):
lib_paths.append(lib_subdir)
env["LD_LIBRARY_PATH"] = ":".join(lib_paths) + ":" + env.get("LD_LIBRARY_PATH", "")
cmd = [
self.server_bin,
"-m", self.model_path,
"--port", str(self.server_port),
"-c", "2048",
"--host", "0.0.0.0" # Bind to all interfaces for container
]
# Launch process
self.server_process = subprocess.Popen(
cmd,
stdout=subprocess.DEVNULL, # Suppress noisy logs
stderr=subprocess.DEVNULL,
env=env
)
# Register cleanup
atexit.register(self.stop_server)
# Wait for server to be ready
print("β³ Waiting for server to be ready...")
for _ in range(20): # Wait up to 20s
try:
requests.get(f"http://localhost:{self.server_port}/health", timeout=1)
print("β
llama-server is ready!")
return
except:
time.sleep(1)
print("β οΈ Server start timed out (but might still be loading).")
def stop_server(self):
"""Kill the server process"""
if self.server_process:
print("π Stopping llama-server...")
self.server_process.terminate()
self.server_process = None
def analyze(self, text, context_chunks=None):
"""
Analyze text using LangChain RetrievalQA
"""
if not self.vector_store:
return "β Vector Store not initialized."
# Custom Prompt Template
template = """<|im_start|>system
You are a cybersecurity expert. Task: Determine whether the input is 'PHISHING' or 'BENIGN' (Safe).
Respond in the following format:
LABEL: [PHISHING or BENIGN]
EXPLANATION: [A brief Vietnamese explanation]
Context:
{context}
<|im_end|>
<|im_start|>user
Input:
{question}
Short Analysis:
<|im_end|>
<|im_start|>assistant
"""
PROMPT = PromptTemplate(
template=template,
input_variables=["context", "question"]
)
# Create QA Chain
qa_chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=self.vector_store.as_retriever(search_kwargs={"k": 3}),
chain_type_kwargs={"prompt": PROMPT}
)
try:
print("π€ Generating response...")
response = qa_chain.invoke(text)
return response['result']
except Exception as e:
return f"β Error: {str(e)}"
|