Spaces:
Running
Running
Bi Yoo
commited on
Commit
·
38bd823
1
Parent(s):
ddacd95
update model download sequence
Browse files- .gitignore +9 -0
- app.py +44 -1
- requirements.txt +1 -4
.gitignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
venv/
|
| 9 |
+
ENV/
|
app.py
CHANGED
|
@@ -3,9 +3,12 @@ CV Chatbot with RAG (Retrieval-Augmented Generation)
|
|
| 3 |
FastAPI backend that uses semantic search to answer questions about your CV
|
| 4 |
"""
|
| 5 |
|
|
|
|
| 6 |
import json
|
| 7 |
import os
|
| 8 |
import re
|
|
|
|
|
|
|
| 9 |
import threading
|
| 10 |
import time
|
| 11 |
from typing import List, Dict, Optional, Tuple
|
|
@@ -94,6 +97,12 @@ from config import (
|
|
| 94 |
SYSTEM_PROMPT
|
| 95 |
)
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
# Initialize FastAPI
|
| 98 |
app = FastAPI(title="CV Chatbot RAG API")
|
| 99 |
|
|
@@ -128,6 +137,39 @@ local_model_lock = threading.Lock()
|
|
| 128 |
_session_serializer: Optional[URLSafeTimedSerializer] = None
|
| 129 |
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
def get_session_serializer() -> URLSafeTimedSerializer:
|
| 132 |
"""Lazily initialize the session token serializer."""
|
| 133 |
global _session_serializer
|
|
@@ -356,11 +398,12 @@ def initialize_llm():
|
|
| 356 |
raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
|
| 357 |
print(f"Initialized HuggingFace Inference API with model: {HUGGINGFACE_MODEL}")
|
| 358 |
elif LLM_PROVIDER == "local":
|
|
|
|
| 359 |
try:
|
| 360 |
from llama_cpp import Llama # type: ignore[import]
|
| 361 |
except ImportError as import_err:
|
| 362 |
raise ValueError(
|
| 363 |
-
"llama-cpp-python
|
| 364 |
) from import_err
|
| 365 |
|
| 366 |
auth_token = LOCAL_MODEL_HF_TOKEN or None
|
|
|
|
| 3 |
FastAPI backend that uses semantic search to answer questions about your CV
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
import importlib
|
| 7 |
import json
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
+
import subprocess
|
| 11 |
+
import sys
|
| 12 |
import threading
|
| 13 |
import time
|
| 14 |
from typing import List, Dict, Optional, Tuple
|
|
|
|
| 97 |
SYSTEM_PROMPT
|
| 98 |
)
|
| 99 |
|
| 100 |
+
LLAMA_CPP_WHEEL_INDEX = os.getenv(
|
| 101 |
+
"LLAMA_CPP_WHEEL_INDEX",
|
| 102 |
+
"https://abetlen.github.io/llama-cpp-python/whl/cpu",
|
| 103 |
+
)
|
| 104 |
+
LLAMA_CPP_VERSION = os.getenv("LLAMA_CPP_VERSION", "0.3.16")
|
| 105 |
+
|
| 106 |
# Initialize FastAPI
|
| 107 |
app = FastAPI(title="CV Chatbot RAG API")
|
| 108 |
|
|
|
|
| 137 |
_session_serializer: Optional[URLSafeTimedSerializer] = None
|
| 138 |
|
| 139 |
|
| 140 |
+
def ensure_llama_cpp_installed() -> None:
|
| 141 |
+
"""Install the prebuilt CPU wheel for llama-cpp-python when needed."""
|
| 142 |
+
if LLM_PROVIDER != "local":
|
| 143 |
+
return
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
importlib.import_module("llama_cpp")
|
| 147 |
+
return
|
| 148 |
+
except ImportError:
|
| 149 |
+
pass
|
| 150 |
+
|
| 151 |
+
package_spec = f"llama-cpp-python=={LLAMA_CPP_VERSION}"
|
| 152 |
+
print(f"Installing {package_spec} from llama.cpp CPU wheel index...")
|
| 153 |
+
cmd = [
|
| 154 |
+
sys.executable,
|
| 155 |
+
"-m",
|
| 156 |
+
"pip",
|
| 157 |
+
"install",
|
| 158 |
+
"--no-cache-dir",
|
| 159 |
+
]
|
| 160 |
+
index_url = LLAMA_CPP_WHEEL_INDEX.strip()
|
| 161 |
+
if index_url:
|
| 162 |
+
cmd.extend(["--extra-index-url", index_url])
|
| 163 |
+
cmd.append(package_spec)
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
subprocess.run(cmd, check=True)
|
| 167 |
+
except subprocess.CalledProcessError as install_err:
|
| 168 |
+
raise RuntimeError(
|
| 169 |
+
f"Failed to install {package_spec} from {index_url or 'PyPI'}"
|
| 170 |
+
) from install_err
|
| 171 |
+
|
| 172 |
+
|
| 173 |
def get_session_serializer() -> URLSafeTimedSerializer:
|
| 174 |
"""Lazily initialize the session token serializer."""
|
| 175 |
global _session_serializer
|
|
|
|
| 398 |
raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
|
| 399 |
print(f"Initialized HuggingFace Inference API with model: {HUGGINGFACE_MODEL}")
|
| 400 |
elif LLM_PROVIDER == "local":
|
| 401 |
+
ensure_llama_cpp_installed()
|
| 402 |
try:
|
| 403 |
from llama_cpp import Llama # type: ignore[import]
|
| 404 |
except ImportError as import_err:
|
| 405 |
raise ValueError(
|
| 406 |
+
"llama-cpp-python could not be imported even after attempting installation."
|
| 407 |
) from import_err
|
| 408 |
|
| 409 |
auth_token = LOCAL_MODEL_HF_TOKEN or None
|
requirements.txt
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
--extra-index-url https://download.pytorch.org/whl/cpu
|
| 2 |
fastapi==0.104.1
|
| 3 |
uvicorn[standard]==0.24.0
|
| 4 |
sentence-transformers==2.2.2
|
|
@@ -7,7 +6,5 @@ faiss-cpu==1.8.0
|
|
| 7 |
pydantic==2.5.0
|
| 8 |
numpy>=1.26.4,<2
|
| 9 |
python-multipart==0.0.6
|
| 10 |
-
llama-cpp-python>=0.3.6
|
| 11 |
-
torch==2.3.1
|
| 12 |
itsdangerous==2.2.0
|
| 13 |
-
requests==2.31.0
|
|
|
|
|
|
|
| 1 |
fastapi==0.104.1
|
| 2 |
uvicorn[standard]==0.24.0
|
| 3 |
sentence-transformers==2.2.2
|
|
|
|
| 6 |
pydantic==2.5.0
|
| 7 |
numpy>=1.26.4,<2
|
| 8 |
python-multipart==0.0.6
|
|
|
|
|
|
|
| 9 |
itsdangerous==2.2.0
|
| 10 |
+
requests==2.31.0
|