Spaces:

nishantgaurav23
/

Sport-Chatbot

Runtime error

App Files Files Community

nishantgaurav23 commited on Nov 2, 2024

Commit

a88fc03

verified ·

1 Parent(s): 71f7801

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -1

app.py CHANGED Viewed

@@ -14,6 +14,11 @@ import sys
 from llama_cpp import Llama
 from tqdm import tqdm
 # Set page config first
 st.set_page_config(
     page_title="The Sport Chatbot",
@@ -27,7 +32,28 @@ logging.basicConfig(
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     handlers=[logging.StreamHandler(sys.stdout)]
 )
 def download_file_with_progress(url: str, filename: str):
     """Download a file with progress bar using requests"""
     response = requests.get(url, stream=True)
@@ -156,7 +182,8 @@ class RAGPipeline:
         self.retriever = SentenceTransformerRetriever()
         self.documents = []
         self.device = torch.device("cpu")
-        self.llm = load_llama_model()
     def preprocess_query(self, query: str) -> str:
         """Clean and prepare the query"""

 from llama_cpp import Llama
 from tqdm import tqdm
+# At the top of your script
+os.environ['LLAMA_CPP_THREADS'] = '4'
+os.environ['LLAMA_CPP_BATCH_SIZE'] = '512'
+os.environ['LLAMA_CPP_MODEL_PATH'] = os.path.join("models", "mistral-7b-v0.1.Q4_K_M.gguf")
 # Set page config first
 st.set_page_config(
     page_title="The Sport Chatbot",
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     handlers=[logging.StreamHandler(sys.stdout)]
 )
+# Add this at the top level of your script, after imports
+@st.cache_resource
+def get_llama_model():
+    model_path = os.path.join("models", "mistral-7b-v0.1.Q4_K_M.gguf")
+    os.makedirs(os.path.dirname(model_path), exist_ok=True)
+    if not os.path.exists(model_path):
+        st.info("Downloading model... This may take a while.")
+        direct_url = "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf"
+        download_file_with_progress(direct_url, model_path)
+    llm_config = {
+        "model_path": model_path,
+        "n_ctx": 2048,
+        "n_threads": 4,
+        "n_batch": 512,
+        "n_gpu_layers": 0,
+        "verbose": False,
+        "use_mlock": True
+    }
+    return Llama(**llm_config)
 def download_file_with_progress(url: str, filename: str):
     """Download a file with progress bar using requests"""
     response = requests.get(url, stream=True)
         self.retriever = SentenceTransformerRetriever()
         self.documents = []
         self.device = torch.device("cpu")
+        # Use the cached model directly
+        self.llm = get_llama_model()
     def preprocess_query(self, query: str) -> str:
         """Clean and prepare the query"""