PhishingTest

Paused

App Files Files Community

dungeon29 commited on 20 days ago

Commit

cfcd4e2

verified ·

1 Parent(s): 779a353

Update llm_client.py

Browse files

Files changed (1) hide show

llm_client.py +97 -36

llm_client.py CHANGED Viewed

@@ -1,15 +1,62 @@
 import os
 import requests
 from huggingface_hub import hf_hub_download
 from langchain.llms.base import LLM
 from langchain.chains import RetrievalQA
 from langchain_core.prompts import PromptTemplate
 from typing import Any, List, Optional, Mapping
 # --- Custom LangChain LLM Wrapper for Hybrid Approach ---
 class HybridLLM(LLM):
     api_url: str = ""
-    local_llm: Any = None
     @property
     def _llm_type(self) -> str:
@@ -23,7 +70,7 @@ class HybridLLM(LLM):
                 response = requests.post(
                     f"{self.api_url}/generate",
                     json={"prompt": prompt, "max_tokens": 512},
-                    timeout=30  # 30s timeout
                 )
                 if response.status_code == 200:
                     return response.json()["response"]
@@ -32,64 +79,78 @@ class HybridLLM(LLM):
             except Exception as e:
                 print(f"⚠️ API Connection Failed: {e}")
-        # 2. Fallback to Local GGUF
-        if self.local_llm:
-            print("💻 Using Local GGUF Fallback...")
-            # Llama-cpp-python expects prompt in specific format or raw
-            # We'll pass the prompt directly
-            output = self.local_llm(
-                prompt,
-                max_tokens=512,
-                stop=["<|im_end|>", "User:", "Input:"],
-                echo=False
-            )
-            return output['choices'][0]['text']
         return "❌ Error: No working LLM available (API failed and no local model)."
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
-        return {"api_url": self.api_url}
 class LLMClient:
     def __init__(self, vector_store=None):
         """
-        Initialize Hybrid LLM Client
         """
         self.vector_store = vector_store
-        self.api_url = os.environ.get("COLAB_API_URL", "") # Get from Env Var
-        self.local_llm = None
-        # Initialize Local GGUF (always load as backup or if API missing)
         try:
-            print("📂 Loading Local Qwen3-0.6B (GGUF)...")
-            from llama_cpp import Llama
-            # User selected Qwen3-0.6B-GGUF
             model_repo = "Qwen/Qwen3-0.6B-GGUF"
             filename = "Qwen3-0.6B-Q8_0.gguf"
-            model_path = hf_hub_download(
                 repo_id=model_repo,
                 filename=filename
             )
-            self.local_llm = Llama(
-                model_path=model_path,
-                n_ctx=2048,
-                n_threads=2, # Use 2 vCPUs
-                verbose=True # Enable verbose to see C++ logs
-            )
-            print("✅ Local GGUF Model Ready!")
         except Exception as e:
-            import traceback
-            print(f"❌ Detailed Error Traceback:")
-            traceback.print_exc()
-            print(f"⚠️ Could not load local GGUF: {e}")
         # Create Hybrid LangChain Wrapper
-        self.llm = HybridLLM(api_url=self.api_url, local_llm=self.local_llm)
     def analyze(self, text, context_chunks=None):
         """

 import os
 import requests
+import subprocess
+import tarfile
+import stat
 from huggingface_hub import hf_hub_download
 from langchain.llms.base import LLM
 from langchain.chains import RetrievalQA
 from langchain_core.prompts import PromptTemplate
 from typing import Any, List, Optional, Mapping
+# --- Helper to Setup llama-cli ---
+def setup_llama_cli():
+    """
+    Download and extract llama-cli binary from official releases
+    """
+    # Latest release URL for Linux x64 (b4991 equivalent or newer)
+    # Using the one found: b7312
+    CLI_URL = "https://github.com/ggml-org/llama.cpp/releases/download/b7312/llama-b7312-bin-ubuntu-x64.tar.gz"
+    LOCAL_TAR = "llama-cli.tar.gz"
+    CLI_BIN = "./llama-cli"
+    if os.path.exists(CLI_BIN):
+        return CLI_BIN
+    try:
+        print("⬇️ Downloading llama-cli binary...")
+        response = requests.get(CLI_URL, stream=True)
+        if response.status_code == 200:
+            with open(LOCAL_TAR, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            print("📦 Extracting llama-cli...")
+            with tarfile.open(LOCAL_TAR, "r:gz") as tar:
+                # Find the llama-cli binary inside the tar
+                for member in tar.getmembers():
+                    if member.name.endswith("llama-cli"):
+                        member.name = "llama-cli" # Extract to current dir as 'llama-cli'
+                        tar.extract(member, path=".")
+                        break
+            # Make executable
+            st = os.stat(CLI_BIN)
+            os.chmod(CLI_BIN, st.st_mode | stat.S_IEXEC)
+            print("✅ llama-cli binary ready!")
+            return CLI_BIN
+        else:
+            print(f"❌ Failed to download binary: {response.status_code}")
+            return None
+    except Exception as e:
+        print(f"❌ Error setting up llama-cli: {e}")
+        return None
 # --- Custom LangChain LLM Wrapper for Hybrid Approach ---
 class HybridLLM(LLM):
     api_url: str = ""
+    model_path: str = ""
+    cli_path: str = ""
     @property
     def _llm_type(self) -> str:
                 response = requests.post(
                     f"{self.api_url}/generate",
                     json={"prompt": prompt, "max_tokens": 512},
+                    timeout=30
                 )
                 if response.status_code == 200:
                     return response.json()["response"]
             except Exception as e:
                 print(f"⚠️ API Connection Failed: {e}")
+        # 2. Fallback to Local llama-cli
+        if self.model_path and self.cli_path and os.path.exists(self.cli_path):
+            print("💻 Using Local llama-cli Fallback...")
+            try:
+                # Construct command
+                cmd = [
+                    self.cli_path,
+                    "-m", self.model_path,
+                    "-p", prompt,
+                    "-n", "512",
+                    "--temp", "0.7",
+                    "--no-display-prompt", # Don't echo prompt
+                    "-c", "2048" # Context size
+                ]
+                # Run binary
+                result = subprocess.run(
+                    cmd,
+                    capture_output=True,
+                    text=True,
+                    encoding='utf-8',
+                    errors='replace'
+                )
+                if result.returncode == 0:
+                    return result.stdout.strip()
+                else:
+                    return f"❌ llama-cli Error: {result.stderr}"
+            except Exception as e:
+                return f"❌ Local Inference Failed: {e}"
         return "❌ Error: No working LLM available (API failed and no local model)."
     @property
     def _identifying_params(self) -> Mapping[str, Any]:
+        return {"api_url": self.api_url, "model_path": self.model_path}
 class LLMClient:
     def __init__(self, vector_store=None):
         """
+        Initialize Hybrid LLM Client with Binary Wrapper
         """
         self.vector_store = vector_store
+        self.api_url = os.environ.get("COLAB_API_URL", "")
+        self.model_path = None
+        self.cli_path = None
+        # Setup Local Fallback
         try:
+            # 1. Setup Binary
+            self.cli_path = setup_llama_cli()
+            # 2. Download Model (Qwen3-0.6B)
+            print("📂 Loading Local Qwen3-0.6B (GGUF)...")
             model_repo = "Qwen/Qwen3-0.6B-GGUF"
             filename = "Qwen3-0.6B-Q8_0.gguf"
+            self.model_path = hf_hub_download(
                 repo_id=model_repo,
                 filename=filename
             )
+            print(f"✅ Model downloaded to: {self.model_path}")
         except Exception as e:
+            print(f"⚠️ Could not setup local fallback: {e}")
         # Create Hybrid LangChain Wrapper
+        self.llm = HybridLLM(
+            api_url=self.api_url,
+            model_path=self.model_path,
+            cli_path=self.cli_path
+        )
     def analyze(self, text, context_chunks=None):
         """