Spaces:

biyootiful
/

cv-chatbot

Running

App Files Files Community

Bi Yoo commited on about 1 month ago

Commit

38bd823

1 Parent(s): ddacd95

update model download sequence

Browse files

Files changed (3) hide show

.gitignore +9 -0
app.py +44 -1
requirements.txt +1 -4

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/

app.py CHANGED Viewed

@@ -3,9 +3,12 @@ CV Chatbot with RAG (Retrieval-Augmented Generation)
 FastAPI backend that uses semantic search to answer questions about your CV
 """
 import json
 import os
 import re
 import threading
 import time
 from typing import List, Dict, Optional, Tuple
@@ -94,6 +97,12 @@ from config import (
     SYSTEM_PROMPT
 )
 # Initialize FastAPI
 app = FastAPI(title="CV Chatbot RAG API")
@@ -128,6 +137,39 @@ local_model_lock = threading.Lock()
 _session_serializer: Optional[URLSafeTimedSerializer] = None
 def get_session_serializer() -> URLSafeTimedSerializer:
     """Lazily initialize the session token serializer."""
     global _session_serializer
@@ -356,11 +398,12 @@ def initialize_llm():
             raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
         print(f"Initialized HuggingFace Inference API with model: {HUGGINGFACE_MODEL}")
     elif LLM_PROVIDER == "local":
         try:
             from llama_cpp import Llama  # type: ignore[import]
         except ImportError as import_err:
             raise ValueError(
-                "llama-cpp-python is not installed. Ensure requirements are up to date."
             ) from import_err
         auth_token = LOCAL_MODEL_HF_TOKEN or None

 FastAPI backend that uses semantic search to answer questions about your CV
 """
+import importlib
 import json
 import os
 import re
+import subprocess
+import sys
 import threading
 import time
 from typing import List, Dict, Optional, Tuple
     SYSTEM_PROMPT
 )
+LLAMA_CPP_WHEEL_INDEX = os.getenv(
+    "LLAMA_CPP_WHEEL_INDEX",
+    "https://abetlen.github.io/llama-cpp-python/whl/cpu",
+)
+LLAMA_CPP_VERSION = os.getenv("LLAMA_CPP_VERSION", "0.3.16")
 # Initialize FastAPI
 app = FastAPI(title="CV Chatbot RAG API")
 _session_serializer: Optional[URLSafeTimedSerializer] = None
+def ensure_llama_cpp_installed() -> None:
+    """Install the prebuilt CPU wheel for llama-cpp-python when needed."""
+    if LLM_PROVIDER != "local":
+        return
+    try:
+        importlib.import_module("llama_cpp")
+        return
+    except ImportError:
+        pass
+    package_spec = f"llama-cpp-python=={LLAMA_CPP_VERSION}"
+    print(f"Installing {package_spec} from llama.cpp CPU wheel index...")
+    cmd = [
+        sys.executable,
+        "-m",
+        "pip",
+        "install",
+        "--no-cache-dir",
+    ]
+    index_url = LLAMA_CPP_WHEEL_INDEX.strip()
+    if index_url:
+        cmd.extend(["--extra-index-url", index_url])
+    cmd.append(package_spec)
+    try:
+        subprocess.run(cmd, check=True)
+    except subprocess.CalledProcessError as install_err:
+        raise RuntimeError(
+            f"Failed to install {package_spec} from {index_url or 'PyPI'}"
+        ) from install_err
 def get_session_serializer() -> URLSafeTimedSerializer:
     """Lazily initialize the session token serializer."""
     global _session_serializer
             raise ValueError("HUGGINGFACE_API_KEY not set in environment variables")
         print(f"Initialized HuggingFace Inference API with model: {HUGGINGFACE_MODEL}")
     elif LLM_PROVIDER == "local":
+        ensure_llama_cpp_installed()
         try:
             from llama_cpp import Llama  # type: ignore[import]
         except ImportError as import_err:
             raise ValueError(
+                "llama-cpp-python could not be imported even after attempting installation."
             ) from import_err
         auth_token = LOCAL_MODEL_HF_TOKEN or None

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
---extra-index-url https://download.pytorch.org/whl/cpu
 fastapi==0.104.1
 uvicorn[standard]==0.24.0
 sentence-transformers==2.2.2
@@ -7,7 +6,5 @@ faiss-cpu==1.8.0
 pydantic==2.5.0
 numpy>=1.26.4,<2
 python-multipart==0.0.6
-llama-cpp-python>=0.3.6
-torch==2.3.1
 itsdangerous==2.2.0
-requests==2.31.0

 fastapi==0.104.1
 uvicorn[standard]==0.24.0
 sentence-transformers==2.2.2
 pydantic==2.5.0
 numpy>=1.26.4,<2
 python-multipart==0.0.6
 itsdangerous==2.2.0
+requests==2.31.0