dungeon29 commited on
Commit
cfcd4e2
Β·
verified Β·
1 Parent(s): 779a353

Update llm_client.py

Browse files
Files changed (1) hide show
  1. llm_client.py +97 -36
llm_client.py CHANGED
@@ -1,15 +1,62 @@
1
  import os
2
  import requests
 
 
 
3
  from huggingface_hub import hf_hub_download
4
  from langchain.llms.base import LLM
5
  from langchain.chains import RetrievalQA
6
  from langchain_core.prompts import PromptTemplate
7
  from typing import Any, List, Optional, Mapping
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # --- Custom LangChain LLM Wrapper for Hybrid Approach ---
10
  class HybridLLM(LLM):
11
  api_url: str = ""
12
- local_llm: Any = None
 
13
 
14
  @property
15
  def _llm_type(self) -> str:
@@ -23,7 +70,7 @@ class HybridLLM(LLM):
23
  response = requests.post(
24
  f"{self.api_url}/generate",
25
  json={"prompt": prompt, "max_tokens": 512},
26
- timeout=30 # 30s timeout
27
  )
28
  if response.status_code == 200:
29
  return response.json()["response"]
@@ -32,64 +79,78 @@ class HybridLLM(LLM):
32
  except Exception as e:
33
  print(f"⚠️ API Connection Failed: {e}")
34
 
35
- # 2. Fallback to Local GGUF
36
- if self.local_llm:
37
- print("πŸ’» Using Local GGUF Fallback...")
38
- # Llama-cpp-python expects prompt in specific format or raw
39
- # We'll pass the prompt directly
40
- output = self.local_llm(
41
- prompt,
42
- max_tokens=512,
43
- stop=["<|im_end|>", "User:", "Input:"],
44
- echo=False
45
- )
46
- return output['choices'][0]['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  return "❌ Error: No working LLM available (API failed and no local model)."
49
 
50
  @property
51
  def _identifying_params(self) -> Mapping[str, Any]:
52
- return {"api_url": self.api_url}
53
 
54
  class LLMClient:
55
  def __init__(self, vector_store=None):
56
  """
57
- Initialize Hybrid LLM Client
58
  """
59
  self.vector_store = vector_store
60
- self.api_url = os.environ.get("COLAB_API_URL", "") # Get from Env Var
61
- self.local_llm = None
 
62
 
63
- # Initialize Local GGUF (always load as backup or if API missing)
64
  try:
65
- print("πŸ“‚ Loading Local Qwen3-0.6B (GGUF)...")
66
- from llama_cpp import Llama
67
 
68
- # User selected Qwen3-0.6B-GGUF
 
69
  model_repo = "Qwen/Qwen3-0.6B-GGUF"
70
  filename = "Qwen3-0.6B-Q8_0.gguf"
71
 
72
- model_path = hf_hub_download(
73
  repo_id=model_repo,
74
  filename=filename
75
  )
76
-
77
- self.local_llm = Llama(
78
- model_path=model_path,
79
- n_ctx=2048,
80
- n_threads=2, # Use 2 vCPUs
81
- verbose=True # Enable verbose to see C++ logs
82
- )
83
- print("βœ… Local GGUF Model Ready!")
84
 
85
  except Exception as e:
86
- import traceback
87
- print(f"❌ Detailed Error Traceback:")
88
- traceback.print_exc()
89
- print(f"⚠️ Could not load local GGUF: {e}")
90
 
91
  # Create Hybrid LangChain Wrapper
92
- self.llm = HybridLLM(api_url=self.api_url, local_llm=self.local_llm)
 
 
 
 
93
 
94
  def analyze(self, text, context_chunks=None):
95
  """
 
1
  import os
2
  import requests
3
+ import subprocess
4
+ import tarfile
5
+ import stat
6
  from huggingface_hub import hf_hub_download
7
  from langchain.llms.base import LLM
8
  from langchain.chains import RetrievalQA
9
  from langchain_core.prompts import PromptTemplate
10
  from typing import Any, List, Optional, Mapping
11
 
12
+ # --- Helper to Setup llama-cli ---
13
+ def setup_llama_cli():
14
+ """
15
+ Download and extract llama-cli binary from official releases
16
+ """
17
+ # Latest release URL for Linux x64 (b4991 equivalent or newer)
18
+ # Using the one found: b7312
19
+ CLI_URL = "https://github.com/ggml-org/llama.cpp/releases/download/b7312/llama-b7312-bin-ubuntu-x64.tar.gz"
20
+ LOCAL_TAR = "llama-cli.tar.gz"
21
+ CLI_BIN = "./llama-cli"
22
+
23
+ if os.path.exists(CLI_BIN):
24
+ return CLI_BIN
25
+
26
+ try:
27
+ print("⬇️ Downloading llama-cli binary...")
28
+ response = requests.get(CLI_URL, stream=True)
29
+ if response.status_code == 200:
30
+ with open(LOCAL_TAR, 'wb') as f:
31
+ for chunk in response.iter_content(chunk_size=8192):
32
+ f.write(chunk)
33
+
34
+ print("πŸ“¦ Extracting llama-cli...")
35
+ with tarfile.open(LOCAL_TAR, "r:gz") as tar:
36
+ # Find the llama-cli binary inside the tar
37
+ for member in tar.getmembers():
38
+ if member.name.endswith("llama-cli"):
39
+ member.name = "llama-cli" # Extract to current dir as 'llama-cli'
40
+ tar.extract(member, path=".")
41
+ break
42
+
43
+ # Make executable
44
+ st = os.stat(CLI_BIN)
45
+ os.chmod(CLI_BIN, st.st_mode | stat.S_IEXEC)
46
+ print("βœ… llama-cli binary ready!")
47
+ return CLI_BIN
48
+ else:
49
+ print(f"❌ Failed to download binary: {response.status_code}")
50
+ return None
51
+ except Exception as e:
52
+ print(f"❌ Error setting up llama-cli: {e}")
53
+ return None
54
+
55
  # --- Custom LangChain LLM Wrapper for Hybrid Approach ---
56
  class HybridLLM(LLM):
57
  api_url: str = ""
58
+ model_path: str = ""
59
+ cli_path: str = ""
60
 
61
  @property
62
  def _llm_type(self) -> str:
 
70
  response = requests.post(
71
  f"{self.api_url}/generate",
72
  json={"prompt": prompt, "max_tokens": 512},
73
+ timeout=30
74
  )
75
  if response.status_code == 200:
76
  return response.json()["response"]
 
79
  except Exception as e:
80
  print(f"⚠️ API Connection Failed: {e}")
81
 
82
+ # 2. Fallback to Local llama-cli
83
+ if self.model_path and self.cli_path and os.path.exists(self.cli_path):
84
+ print("πŸ’» Using Local llama-cli Fallback...")
85
+ try:
86
+ # Construct command
87
+ cmd = [
88
+ self.cli_path,
89
+ "-m", self.model_path,
90
+ "-p", prompt,
91
+ "-n", "512",
92
+ "--temp", "0.7",
93
+ "--no-display-prompt", # Don't echo prompt
94
+ "-c", "2048" # Context size
95
+ ]
96
+
97
+ # Run binary
98
+ result = subprocess.run(
99
+ cmd,
100
+ capture_output=True,
101
+ text=True,
102
+ encoding='utf-8',
103
+ errors='replace'
104
+ )
105
+
106
+ if result.returncode == 0:
107
+ return result.stdout.strip()
108
+ else:
109
+ return f"❌ llama-cli Error: {result.stderr}"
110
+ except Exception as e:
111
+ return f"❌ Local Inference Failed: {e}"
112
 
113
  return "❌ Error: No working LLM available (API failed and no local model)."
114
 
115
  @property
116
  def _identifying_params(self) -> Mapping[str, Any]:
117
+ return {"api_url": self.api_url, "model_path": self.model_path}
118
 
119
  class LLMClient:
120
  def __init__(self, vector_store=None):
121
  """
122
+ Initialize Hybrid LLM Client with Binary Wrapper
123
  """
124
  self.vector_store = vector_store
125
+ self.api_url = os.environ.get("COLAB_API_URL", "")
126
+ self.model_path = None
127
+ self.cli_path = None
128
 
129
+ # Setup Local Fallback
130
  try:
131
+ # 1. Setup Binary
132
+ self.cli_path = setup_llama_cli()
133
 
134
+ # 2. Download Model (Qwen3-0.6B)
135
+ print("πŸ“‚ Loading Local Qwen3-0.6B (GGUF)...")
136
  model_repo = "Qwen/Qwen3-0.6B-GGUF"
137
  filename = "Qwen3-0.6B-Q8_0.gguf"
138
 
139
+ self.model_path = hf_hub_download(
140
  repo_id=model_repo,
141
  filename=filename
142
  )
143
+ print(f"βœ… Model downloaded to: {self.model_path}")
 
 
 
 
 
 
 
144
 
145
  except Exception as e:
146
+ print(f"⚠️ Could not setup local fallback: {e}")
 
 
 
147
 
148
  # Create Hybrid LangChain Wrapper
149
+ self.llm = HybridLLM(
150
+ api_url=self.api_url,
151
+ model_path=self.model_path,
152
+ cli_path=self.cli_path
153
+ )
154
 
155
  def analyze(self, text, context_chunks=None):
156
  """