Bi Yoo commited on
Commit
38bd823
·
1 Parent(s): ddacd95

update model download sequence

Browse files
Files changed (3) hide show
  1. .gitignore +9 -0
  2. app.py +44 -1
  3. requirements.txt +1 -4
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ env/
8
+ venv/
9
+ ENV/
app.py CHANGED
@@ -3,9 +3,12 @@ CV Chatbot with RAG (Retrieval-Augmented Generation)
3
  FastAPI backend that uses semantic search to answer questions about your CV
4
  """
5
 
 
6
  import json
7
  import os
8
  import re
 
 
9
  import threading
10
  import time
11
  from typing import List, Dict, Optional, Tuple
@@ -94,6 +97,12 @@ from config import (
94
  SYSTEM_PROMPT
95
  )
96
 
 
 
 
 
 
 
97
  # Initialize FastAPI
98
  app = FastAPI(title="CV Chatbot RAG API")
99
 
@@ -128,6 +137,39 @@ local_model_lock = threading.Lock()
128
  _session_serializer: Optional[URLSafeTimedSerializer] = None
129
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def get_session_serializer() -> URLSafeTimedSerializer:
132
  """Lazily initialize the session token serializer."""
133
  global _session_serializer
@@ -356,11 +398,12 @@ def initialize_llm():
356
  raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
357
  print(f"Initialized HuggingFace Inference API with model: {HUGGINGFACE_MODEL}")
358
  elif LLM_PROVIDER == "local":
 
359
  try:
360
  from llama_cpp import Llama # type: ignore[import]
361
  except ImportError as import_err:
362
  raise ValueError(
363
- "llama-cpp-python is not installed. Ensure requirements are up to date."
364
  ) from import_err
365
 
366
  auth_token = LOCAL_MODEL_HF_TOKEN or None
 
3
  FastAPI backend that uses semantic search to answer questions about your CV
4
  """
5
 
6
+ import importlib
7
  import json
8
  import os
9
  import re
10
+ import subprocess
11
+ import sys
12
  import threading
13
  import time
14
  from typing import List, Dict, Optional, Tuple
 
97
  SYSTEM_PROMPT
98
  )
99
 
100
+ LLAMA_CPP_WHEEL_INDEX = os.getenv(
101
+ "LLAMA_CPP_WHEEL_INDEX",
102
+ "https://abetlen.github.io/llama-cpp-python/whl/cpu",
103
+ )
104
+ LLAMA_CPP_VERSION = os.getenv("LLAMA_CPP_VERSION", "0.3.16")
105
+
106
  # Initialize FastAPI
107
  app = FastAPI(title="CV Chatbot RAG API")
108
 
 
137
  _session_serializer: Optional[URLSafeTimedSerializer] = None
138
 
139
 
140
+ def ensure_llama_cpp_installed() -> None:
141
+ """Install the prebuilt CPU wheel for llama-cpp-python when needed."""
142
+ if LLM_PROVIDER != "local":
143
+ return
144
+
145
+ try:
146
+ importlib.import_module("llama_cpp")
147
+ return
148
+ except ImportError:
149
+ pass
150
+
151
+ package_spec = f"llama-cpp-python=={LLAMA_CPP_VERSION}"
152
+ print(f"Installing {package_spec} from llama.cpp CPU wheel index...")
153
+ cmd = [
154
+ sys.executable,
155
+ "-m",
156
+ "pip",
157
+ "install",
158
+ "--no-cache-dir",
159
+ ]
160
+ index_url = LLAMA_CPP_WHEEL_INDEX.strip()
161
+ if index_url:
162
+ cmd.extend(["--extra-index-url", index_url])
163
+ cmd.append(package_spec)
164
+
165
+ try:
166
+ subprocess.run(cmd, check=True)
167
+ except subprocess.CalledProcessError as install_err:
168
+ raise RuntimeError(
169
+ f"Failed to install {package_spec} from {index_url or 'PyPI'}"
170
+ ) from install_err
171
+
172
+
173
  def get_session_serializer() -> URLSafeTimedSerializer:
174
  """Lazily initialize the session token serializer."""
175
  global _session_serializer
 
398
  raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
399
  print(f"Initialized HuggingFace Inference API with model: {HUGGINGFACE_MODEL}")
400
  elif LLM_PROVIDER == "local":
401
+ ensure_llama_cpp_installed()
402
  try:
403
  from llama_cpp import Llama # type: ignore[import]
404
  except ImportError as import_err:
405
  raise ValueError(
406
+ "llama-cpp-python could not be imported even after attempting installation."
407
  ) from import_err
408
 
409
  auth_token = LOCAL_MODEL_HF_TOKEN or None
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- --extra-index-url https://download.pytorch.org/whl/cpu
2
  fastapi==0.104.1
3
  uvicorn[standard]==0.24.0
4
  sentence-transformers==2.2.2
@@ -7,7 +6,5 @@ faiss-cpu==1.8.0
7
  pydantic==2.5.0
8
  numpy>=1.26.4,<2
9
  python-multipart==0.0.6
10
- llama-cpp-python>=0.3.6
11
- torch==2.3.1
12
  itsdangerous==2.2.0
13
- requests==2.31.0
 
 
1
  fastapi==0.104.1
2
  uvicorn[standard]==0.24.0
3
  sentence-transformers==2.2.2
 
6
  pydantic==2.5.0
7
  numpy>=1.26.4,<2
8
  python-multipart==0.0.6
 
 
9
  itsdangerous==2.2.0
10
+ requests==2.31.0