PhishingTest

Paused

App Files Files Community

dungeon29 commited on 16 days ago

Commit

3c22ddb

verified ·

1 Parent(s): 4479af2

Update llm_client.py

Browse files

Files changed (1) hide show

llm_client.py +104 -75

llm_client.py CHANGED Viewed

@@ -3,72 +3,69 @@ import requests
 import subprocess
 import tarfile
 import stat
 from huggingface_hub import hf_hub_download
 from langchain_core.language_models import LLM
 from langchain.chains import RetrievalQA
 from langchain_core.prompts import PromptTemplate
 from typing import Any, List, Optional, Mapping
-# --- Helper to Setup llama-cli ---
-def setup_llama_cli():
     """
-    Download and extract llama-cli binary and libs from official releases
     """
     # Latest release URL for Linux x64 (b4991 equivalent or newer)
-    # Using the one found: b7312
     CLI_URL = "https://github.com/ggml-org/llama.cpp/releases/download/b7312/llama-b7312-bin-ubuntu-x64.tar.gz"
     LOCAL_TAR = "llama-cli.tar.gz"
-    BIN_DIR = "./llama_bin" # Extract to a subdirectory
-    CLI_BIN = os.path.join(BIN_DIR, "bin/llama-cli") # Standard structure usually has bin/
-    if os.path.exists(CLI_BIN):
-        return CLI_BIN, BIN_DIR
     try:
-        print("⬇️ Downloading llama-cli binary...")
         response = requests.get(CLI_URL, stream=True)
         if response.status_code == 200:
             with open(LOCAL_TAR, 'wb') as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     f.write(chunk)
-            print("📦 Extracting llama-cli...")
-            # Create dir
             os.makedirs(BIN_DIR, exist_ok=True)
             with tarfile.open(LOCAL_TAR, "r:gz") as tar:
                 tar.extractall(path=BIN_DIR)
-            # Locate the binary (it might be in bin/ or root of tar)
-            # We search for it
             found_bin = None
             for root, dirs, files in os.walk(BIN_DIR):
-                if "llama-cli" in files:
-                    found_bin = os.path.join(root, "llama-cli")
                     break
             if not found_bin:
-                print("❌ Could not find llama-cli in extracted files.")
                 return None, None
             # Make executable
             st = os.stat(found_bin)
             os.chmod(found_bin, st.st_mode | stat.S_IEXEC)
-            print(f"✅ llama-cli binary ready at {found_bin}!")
             return found_bin, BIN_DIR
         else:
-            print(f"❌ Failed to download binary: {response.status_code}")
             return None, None
     except Exception as e:
-        print(f"❌ Error setting up llama-cli: {e}")
         return None, None
 # --- Custom LangChain LLM Wrapper for Hybrid Approach ---
 class HybridLLM(LLM):
     api_url: str = ""
-    model_path: str = ""
-    cli_path: str = ""
-    lib_path: str = "" # Path to folder containing .so files
     @property
     def _llm_type(self) -> str:
@@ -91,70 +88,48 @@ class HybridLLM(LLM):
             except Exception as e:
                 print(f"⚠️ API Connection Failed: {e}")
-        # 2. Fallback to Local llama-cli
-        if self.model_path and self.cli_path and os.path.exists(self.cli_path):
-            print("💻 Using Local llama-cli Fallback...")
-            try:
-                # Construct command
-                cmd = [
-                    self.cli_path,
-                    "-m", self.model_path,
-                    "-p", prompt,
-                    "-n", "512",
-                    "--temp", "0.7",
-                    "--no-display-prompt", # Don't echo prompt
-                    "-c", "2048" # Context size
-                ]
-                # Setup Environment with LD_LIBRARY_PATH
-                env = os.environ.copy()
-                # Add the directory containing the binary (and likely libs) to LD_LIBRARY_PATH
-                # Also check 'lib' subdir if it exists
-                lib_paths = [os.path.dirname(self.cli_path)]
-                lib_subdir = os.path.join(self.lib_path, "lib")
-                if os.path.exists(lib_subdir):
-                    lib_paths.append(lib_subdir)
-                env["LD_LIBRARY_PATH"] = ":".join(lib_paths) + ":" + env.get("LD_LIBRARY_PATH", "")
-                # Run binary
-                result = subprocess.run(
-                    cmd,
-                    capture_output=True,
-                    text=True,
-                    encoding='utf-8',
-                    errors='replace',
-                    env=env
-                )
-                if result.returncode == 0:
-                    return result.stdout.strip()
-                else:
-                    return f"❌ llama-cli Error: {result.stderr}"
-            except Exception as e:
-                return f"❌ Local Inference Failed: {e}"
-        return "❌ Error: No working LLM available (API failed and no local model)."
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
-        return {"api_url": self.api_url, "model_path": self.model_path}
 class LLMClient:
     def __init__(self, vector_store=None):
         """
-        Initialize Hybrid LLM Client with Binary Wrapper
         """
         self.vector_store = vector_store
         self.api_url = os.environ.get("COLAB_API_URL", "")
-        self.model_path = None
-        self.cli_path = None
-        self.lib_path = None
         # Setup Local Fallback
         try:
             # 1. Setup Binary
-            self.cli_path, self.lib_path = setup_llama_cli()
             # 2. Download Model (Qwen3-0.6B)
             print("📂 Loading Local Qwen3-0.6B (GGUF)...")
@@ -167,17 +142,71 @@ class LLMClient:
             )
             print(f"✅ Model downloaded to: {self.model_path}")
         except Exception as e:
             print(f"⚠️ Could not setup local fallback: {e}")
         # Create Hybrid LangChain Wrapper
         self.llm = HybridLLM(
-            api_url=self.api_url,
-            model_path=self.model_path,
-            cli_path=self.cli_path,
-            lib_path=self.lib_path
         )
     def analyze(self, text, context_chunks=None):
         """
         Analyze text using LangChain RetrievalQA

 import subprocess
 import tarfile
 import stat
+import time
+import atexit
 from huggingface_hub import hf_hub_download
 from langchain_core.language_models import LLM
 from langchain.chains import RetrievalQA
 from langchain_core.prompts import PromptTemplate
 from typing import Any, List, Optional, Mapping
+# --- Helper to Setup llama-server ---
+def setup_llama_binaries():
     """
+    Download and extract llama-server binary and libs from official releases
     """
     # Latest release URL for Linux x64 (b4991 equivalent or newer)
     CLI_URL = "https://github.com/ggml-org/llama.cpp/releases/download/b7312/llama-b7312-bin-ubuntu-x64.tar.gz"
     LOCAL_TAR = "llama-cli.tar.gz"
+    BIN_DIR = "./llama_bin"
+    SERVER_BIN = os.path.join(BIN_DIR, "bin/llama-server") # Look for server binary
+    if os.path.exists(SERVER_BIN):
+        return SERVER_BIN, BIN_DIR
     try:
+        print("⬇️ Downloading llama.cpp binaries...")
         response = requests.get(CLI_URL, stream=True)
         if response.status_code == 200:
             with open(LOCAL_TAR, 'wb') as f:
                 for chunk in response.iter_content(chunk_size=8192):
                     f.write(chunk)
+            print("📦 Extracting binaries...")
             os.makedirs(BIN_DIR, exist_ok=True)
             with tarfile.open(LOCAL_TAR, "r:gz") as tar:
                 tar.extractall(path=BIN_DIR)
+            # Locate llama-server
             found_bin = None
             for root, dirs, files in os.walk(BIN_DIR):
+                if "llama-server" in files:
+                    found_bin = os.path.join(root, "llama-server")
                     break
             if not found_bin:
+                print("❌ Could not find llama-server in extracted files.")
                 return None, None
             # Make executable
             st = os.stat(found_bin)
             os.chmod(found_bin, st.st_mode | stat.S_IEXEC)
+            print(f"✅ llama-server binary ready at {found_bin}!")
             return found_bin, BIN_DIR
         else:
+            print(f"❌ Failed to download binaries: {response.status_code}")
             return None, None
     except Exception as e:
+        print(f"❌ Error setting up llama-server: {e}")
         return None, None
 # --- Custom LangChain LLM Wrapper for Hybrid Approach ---
 class HybridLLM(LLM):
     api_url: str = ""
+    local_server_url: str = "http://localhost:8080"
     @property
     def _llm_type(self) -> str:
             except Exception as e:
                 print(f"⚠️ API Connection Failed: {e}")
+        # 2. Fallback to Local Server
+        print("💻 Using Local llama-server Fallback...")
+        try:
+            # OpenAI-compatible completion endpoint
+            payload = {
+                "prompt": prompt,
+                "n_predict": 512,
+                "temperature": 0.7,
+                "stop": stop or []
+            }
+            response = requests.post(
+                f"{self.local_server_url}/completion",
+                json=payload,
+                timeout=60
+            )
+            if response.status_code == 200:
+                return response.json()["content"]
+            else:
+                return f"❌ Local Server Error: {response.text}"
+        except Exception as e:
+            return f"❌ Local Inference Failed: {e}"
+        return "❌ Error: No working LLM available."
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
+        return {"api_url": self.api_url, "local_server_url": self.local_server_url}
 class LLMClient:
     def __init__(self, vector_store=None):
         """
+        Initialize Hybrid LLM Client with Persistent Server
         """
         self.vector_store = vector_store
         self.api_url = os.environ.get("COLAB_API_URL", "")
+        self.server_process = None
+        self.server_port = 8080
         # Setup Local Fallback
         try:
             # 1. Setup Binary
+            self.server_bin, self.lib_path = setup_llama_binaries()
             # 2. Download Model (Qwen3-0.6B)
             print("📂 Loading Local Qwen3-0.6B (GGUF)...")
             )
             print(f"✅ Model downloaded to: {self.model_path}")
+            # 3. Start Server
+            self.start_local_server()
         except Exception as e:
             print(f"⚠️ Could not setup local fallback: {e}")
         # Create Hybrid LangChain Wrapper
         self.llm = HybridLLM(
+            api_url=self.api_url,
+            local_server_url=f"http://localhost:{self.server_port}"
         )
+    def start_local_server(self):
+        """Start llama-server in background"""
+        if not self.server_bin or not self.model_path:
+            return
+        print("🚀 Starting llama-server...")
+        # Setup Env
+        env = os.environ.copy()
+        lib_paths = [os.path.dirname(self.server_bin)]
+        lib_subdir = os.path.join(self.lib_path, "lib")
+        if os.path.exists(lib_subdir):
+            lib_paths.append(lib_subdir)
+        env["LD_LIBRARY_PATH"] = ":".join(lib_paths) + ":" + env.get("LD_LIBRARY_PATH", "")
+        cmd = [
+            self.server_bin,
+            "-m", self.model_path,
+            "--port", str(self.server_port),
+            "-c", "2048",
+            "--host", "0.0.0.0" # Bind to all interfaces for container
+        ]
+        # Launch process
+        self.server_process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.DEVNULL, # Suppress noisy logs
+            stderr=subprocess.DEVNULL,
+            env=env
+        )
+        # Register cleanup
+        atexit.register(self.stop_server)
+        # Wait for server to be ready
+        print("⏳ Waiting for server to be ready...")
+        for _ in range(20): # Wait up to 20s
+            try:
+                requests.get(f"http://localhost:{self.server_port}/health", timeout=1)
+                print("✅ llama-server is ready!")
+                return
+            except:
+                time.sleep(1)
+        print("⚠️ Server start timed out (but might still be loading).")
+    def stop_server(self):
+        """Kill the server process"""
+        if self.server_process:
+            print("🛑 Stopping llama-server...")
+            self.server_process.terminate()
+            self.server_process = None
     def analyze(self, text, context_chunks=None):
         """
         Analyze text using LangChain RetrievalQA