Spaces:

nishantgaurav23
/

Sport-Chatbot

Runtime error

App Files Files Community

nishantgaurav23 commited on Nov 1, 2024

Commit

6f7b9d9

verified ·

1 Parent(s): bc26371

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -316

app.py CHANGED Viewed

@@ -7,24 +7,14 @@ import torch
 import torch.nn.functional as F
 import re
 import requests
-#from dotenv import load_dotenv
 from embedding_processor import SentenceTransformerRetriever, process_data
 import pickle
-import os
-import warnings
-import json  # Add this import
-# Add at the top with other imports
-from llama_cpp import Llama
-import requests
-from tqdm import tqdm
 import logging
 import sys
-# Set page config immediately after imports
 st.set_page_config(
     page_title="The Sport Chatbot",
     page_icon="🏆",
@@ -38,16 +28,21 @@ logging.basicConfig(
     handlers=[logging.StreamHandler(sys.stdout)]
 )
-# Create necessary directories at startup
-for directory in ['models', 'ESPN_data', 'embeddings_cache']:
-    os.makedirs(directory, exist_ok=True)
-# Load environment variables
-#load_dotenv()
-# Add the new function here, right after imports and before API configuration
 @st.cache_data
 def load_from_drive(file_id: str):
@@ -72,93 +67,72 @@ def load_from_drive(file_id: str):
         st.error(f"Error loading file from Drive: {str(e)}")
         return None
-# Hugging Face API configuration
-# API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-v0.1"
-# headers = {"Authorization": f"Bearer HF_TOKEN"}
-#model_name = 'mistralai/Mistral-7B-v0.1'
 class RAGPipeline:
     def __init__(self, data_folder: str, k: int = 5):
-        try:
-            self.data_folder = data_folder
-            self.k = k
-            self.retriever = SentenceTransformerRetriever()
-            self.documents = []
-            self.device = torch.device("cpu")
-            # Model path with absolute path
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            self.model_path = os.path.join(current_dir, "models", "mistral-7b-v0.1.Q4_K_M.gguf")
-            # Initialize model
-            self.llm = self.get_model()
-        except Exception as e:
-            logging.error(f"Error in RAGPipeline initialization: {str(e)}")
-            raise
-    @st.cache_resource(show_spinner=False)
-    def get_model(_self):
-        """Get or initialize the model with caching"""
-        try:
-            if not os.path.exists(_self.model_path):
-                os.makedirs(os.path.dirname(_self.model_path), exist_ok=True)
-                st.info("Downloading model... This may take a while.")
-                direct_url = "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf"
-                _self.download_file_with_progress(direct_url, _self.model_path)
-            # Verify file exists and has content
-            if not os.path.exists(_self.model_path):
-                raise FileNotFoundError(f"Model file {_self.model_path} not found after download attempts")
-            if os.path.getsize(_self.model_path) < 1000000:  # Less than 1MB
-                os.remove(_self.model_path)
-                raise ValueError("Downloaded model file is too small, likely corrupted")
-            llm_config = {
-                "model_path": _self.model_path,
-                "n_ctx": 2048,
-                "n_threads": 4,
-                "n_batch": 512,
-                "n_gpu_layers": 0,
-                "verbose": False
-            }
-            model = Llama(**llm_config)
-            st.success("Model loaded successfully!")
-            return model
-        except Exception as e:
-            st.error(f"Error initializing model: {str(e)}")
-            raise
-    def download_file_with_progress(self, url: str, filename: str):
-        """Download a file with progress bar using requests"""
-        response = requests.get(url, stream=True)
-        total_size = int(response.headers.get('content-length', 0))
-        with open(filename, 'wb') as file, tqdm(
-            desc=filename,
-            total=total_size,
-            unit='iB',
-            unit_scale=True,
-            unit_divisor=1024,
-        ) as progress_bar:
-            for data in response.iter_content(chunk_size=1024):
-                size = file.write(data)
-                progress_bar.update(size)
-    # Alternative API call with streaming
     def query_model(self, prompt: str) -> str:
-        """Query the local Llama model instead of API"""
         try:
             if self.llm is None:
                 raise RuntimeError("Model not initialized")
-            # Generate response using Llama model
             response = self.llm(
                 prompt,
                 max_tokens=512,
@@ -167,47 +141,41 @@ class RAGPipeline:
                 echo=False,
                 stop=["Question:", "\n\n"]
             )
-            # Check and extract response
             if response and 'choices' in response and len(response['choices']) > 0:
                 text = response['choices'][0].get('text', '').strip()
                 return text
             else:
                 raise ValueError("No valid response generated")
         except Exception as e:
             logging.error(f"Error in query_model: {str(e)}")
             raise
-    def preprocess_query(self, query: str) -> str:
-        """Clean and prepare the query"""
-        query = query.lower().strip()
-        query = re.sub(r'\s+', ' ', query)
-        return query
     def process_query(self, query: str, placeholder) -> str:
         try:
             # Preprocess query
             query = self.preprocess_query(query)
             # Show retrieval status
             status = placeholder.empty()
             status.write("🔍 Finding relevant information...")
             # Get embeddings and search
             query_embedding = self.retriever.encode([query])
             similarities = F.cosine_similarity(query_embedding, self.retriever.doc_embeddings)
             scores, indices = torch.topk(similarities, k=min(self.k, len(self.documents)))
             relevant_docs = [self.documents[idx] for idx in indices.tolist()]
             # Update status
             status.write("💭 Generating response...")
             # Prepare context and prompt
-            context = "\n".join(relevant_docs[:3])  # Use top 3 most relevant docs
             prompt = f"""Context information is below:
             {context}
             Given the context above, please answer the following question:
             {query}
@@ -217,12 +185,12 @@ class RAGPipeline:
             - Only include sports-related information
             - No dates or timestamps in the response
             - Use clear, natural language
             Answer:"""
             # Generate response
             response_placeholder = placeholder.empty()
             try:
                 response_text = self.query_model(prompt)
                 if response_text:
@@ -233,174 +201,27 @@ class RAGPipeline:
                     message = "No relevant answer found. Please try rephrasing your question."
                     response_placeholder.warning(message)
                     return message
             except Exception as e:
                 logging.error(f"Generation error: {str(e)}")
                 message = "Had some trouble generating the response. Please try again."
                 response_placeholder.warning(message)
                 return message
         except Exception as e:
             logging.error(f"Process error: {str(e)}")
             message = "Something went wrong. Please try again with a different question."
             placeholder.warning(message)
             return message
-    def postprocess_response(self, response: str) -> str:
-        """Clean up the generated response"""
-        response = response.strip()
-        response = re.sub(r'\s+', ' ', response)
-        response = re.sub(r'\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}(?:\+\d{2}:?\d{2})?', '', response)
-        return response
-#     def process_query(self, query: str, placeholder) -> str:
-#         try:
-#             # Preprocess query
-#             query = self.preprocess_query(query)
-#             # Show retrieval status
-#             status = placeholder.empty()
-#             status.write("🔍 Finding relevant information...")
-#             # Get embeddings and search using tensor operations
-#             query_embedding = self.retriever.encode([query])
-#             similarities = F.cosine_similarity(query_embedding, self.retriever.doc_embeddings)
-#             scores, indices = torch.topk(similarities, k=min(self.k, len(self.documents)))
-#             # Print search results for debugging
-#             print("\nSearch Results:")
-#             for idx, score in zip(indices.tolist(), scores.tolist()):
-#                 print(f"Score: {score:.4f} | Document: {self.documents[idx][:100]}...")
-#             relevant_docs = [self.documents[idx] for idx in indices.tolist()]
-#             # Update status
-#             status.write("💭 Generating response...")
-#             # Prepare context and prompt
-#             context = "\n".join(relevant_docs[:3])  # Only use top 3 most relevant docs
-#             prompt = f"""Answer this question using the given context. Be specific and detailed.
-# Context: {context}
-# Question: {query}
-# Answer (provide a complete, detailed response):"""
-#             # Generate response
-#             response_placeholder = placeholder.empty()
-#             try:
-#                 response = requests.post(
-#                     model_name,
-#                     #headers=headers,
-#                     json={
-#                         "inputs": prompt,
-#                         "parameters": {
-#                             "max_new_tokens": 1024,
-#                             "temperature": 0.5,
-#                             "top_p": 0.9,
-#                             "top_k": 50,
-#                             "repetition_penalty": 1.03,
-#                             "do_sample": True
-#                         }
-#                     },
-#                     timeout=30
-#                 ).json()
-#                 if response and isinstance(response, list) and len(response) > 0:
-#                     generated_text = response[0].get('generated_text', '').strip()
-#                     if generated_text:
-#                         # Find and extract only the answer part
-#                         if "Answer:" in generated_text:
-#                             answer_part = generated_text.split("Answer:")[-1].strip()
-#                         elif "Answer (provide a complete, detailed response):" in generated_text:
-#                             answer_part = generated_text.split("Answer (provide a complete, detailed response):")[-1].strip()
-#                         else:
-#                             answer_part = generated_text.strip()
-#                         # Clean up the answer
-#                         answer_part = answer_part.replace("Context:", "").replace("Question:", "")
-#                         final_response = self.postprocess_response(answer_part)
-#                         response_placeholder.markdown(final_response)
-#                         return final_response
-#                 message = "No relevant answer found. Please try rephrasing your question."
-#                 response_placeholder.warning(message)
-#                 return message
-#             except Exception as e:
-#                 print(f"Generation error: {str(e)}")
-#                 message = "Had some trouble generating the response. Please try again."
-#                 response_placeholder.warning(message)
-#                 return message
-#         except Exception as e:
-#             print(f"Process error: {str(e)}")
-#             message = "Something went wrong. Please try again with a different question."
-#             placeholder.warning(message)
-#             return message
-def check_environment():
-    """Check if the environment is properly set up"""
-    # if not headers['Authorization']:
-    #     st.error("HUGGINGFACE_API_KEY environment variable not set!")
-    #     st.stop()
-    #     return False
-    try:
-        import torch
-        import sentence_transformers
-        return True
-    except ImportError as e:
-        st.error(f"Missing required package: {str(e)}")
-        st.stop()
-        return False
-# @st.cache_resource
-# def initialize_rag_pipeline():
-#     """Initialize the RAG pipeline once"""
-#     data_folder = "ESPN_data"
-#     return RAGPipeline(data_folder)
-def check_space_requirements():
-    """Check if we're running on HF Space and have necessary resources"""
-    try:
-        # Check if we're on HF Space
-        is_space = os.environ.get('SPACE_ID') is not None
-        if is_space:
-            # Check disk space
-            disk_space = os.statvfs('/')
-            free_space_gb = (disk_space.f_frsize * disk_space.f_bavail) / (1024**3)
-            if free_space_gb < 10:  # Need at least 10GB free
-                st.warning(f"Low disk space: {free_space_gb:.1f}GB free")
-            # Check if model exists
-            model_path = "mistral-7b-v0.1.Q4_K_M.gguf"
-            if not os.path.exists(model_path):
-                st.info("Model will be downloaded on first run")
-            # Check if embeddings exist
-            if not os.path.exists('embeddings_cache/embeddings.pkl'):
-                st.info("Embeddings will be loaded from Drive")
-        return True
-    except Exception as e:
-        logging.error(f"Space requirements check failed: {str(e)}")
-        return False
 @st.cache_resource(show_spinner=False)
 def initialize_rag_pipeline():
     """Initialize the RAG pipeline once"""
     try:
-        # First check/create necessary directories
-        for directory in ['models', 'ESPN_data', 'embeddings_cache']:
-            os.makedirs(directory, exist_ok=True)
-        # Load embeddings from Drive first
         drive_file_id = "1MuV63AE9o6zR9aBvdSDQOUextp71r2NN"
         with st.spinner("Loading embeddings from Google Drive..."):
             cache_data = load_from_drive(drive_file_id)
@@ -408,7 +229,7 @@ def initialize_rag_pipeline():
                 st.error("Failed to load embeddings from Google Drive")
                 st.stop()
-        # Now initialize pipeline
         data_folder = "ESPN_data"
         rag = RAGPipeline(data_folder)
@@ -426,20 +247,9 @@ def initialize_rag_pipeline():
 def main():
     try:
         # Environment check
-        if not check_environment() or not check_space_requirements():
             return
-        # Session state for initialization status
-        if 'initialized' not in st.session_state:
-            st.session_state.initialized = False
-        # # Page config
-        # st.set_page_config(
-        #     page_title="The Sport Chatbot",
-        #     page_icon="🏆",
-        #     layout="wide"
-        # )
         # Improved CSS styling
         st.markdown("""
             <style>
@@ -510,7 +320,7 @@ def main():
             </style>
         """, unsafe_allow_html=True)
-        # Header section with improved styling
         st.markdown("<h1 class='main-title'>🏆 The Sport Chatbot</h1>", unsafe_allow_html=True)
         st.markdown("<h3 class='sub-title'>Using ESPN API</h3>", unsafe_allow_html=True)
         st.markdown("""
@@ -523,40 +333,22 @@ def main():
             </p>
         """, unsafe_allow_html=True)
-        # Add some spacing
-        st.markdown("<br>", unsafe_allow_html=True)
         # Initialize the pipeline
-        if not st.session_state.initialized:
-            try:
-                with st.spinner("Loading resources..."):
-                    # Create necessary directories
-                    for directory in ['models', 'ESPN_data', 'embeddings_cache']:
-                        os.makedirs(directory, exist_ok=True)
-                    # Initialize RAG pipeline
-                    st.session_state.rag = initialize_rag_pipeline()
-                    st.session_state.initialized = True
-                st.success("System initialized successfully!")
-            except Exception as e:
-                logging.error(f"Initialization error: {str(e)}")
-                st.error("Unable to initialize the system. Please check if all required files are present.")
-                st.stop()
-        # Create columns for layout with golden ratio
         col1, col2, col3 = st.columns([1, 6, 1])
         with col2:
-            # Query input with label styling
             query = st.text_input("What would you like to know about sports?")
-            # Centered button
             if st.button("Get Answer"):
                 if query:
                     response_placeholder = st.empty()
                     try:
-                        # Get response from RAG pipeline
                         response = st.session_state.rag.process_query(query, response_placeholder)
                         logging.info(f"Generated response: {response}")
                     except Exception as e:
@@ -565,13 +357,12 @@ def main():
                 else:
                     st.warning("Please enter a question!")
-        # Footer with improved styling
         st.markdown("<br><br>", unsafe_allow_html=True)
         st.markdown("---")
         st.markdown("""
             <p style='text-align: center; color: #666666; padding: 1rem 0;'>
-                Powered by ESPN Data & Mistral AI 🚀<br>
-                <small>Running on Hugging Face Spaces</small>
             </p>
         """, unsafe_allow_html=True)
@@ -580,8 +371,4 @@ def main():
         st.error("An unexpected error occurred. Please check the logs and try again.")
 if __name__ == "__main__":
-    try:
-        main()
-    except Exception as e:
-        logging.error(f"Application error: {str(e)}")
-        st.error("An unexpected error occurred. Please check the logs and try again.")

 import torch.nn.functional as F
 import re
 import requests
 from embedding_processor import SentenceTransformerRetriever, process_data
 import pickle
 import logging
 import sys
+from llama_cpp import Llama
+from tqdm import tqdm
+# Set page config first
 st.set_page_config(
     page_title="The Sport Chatbot",
     page_icon="🏆",
     handlers=[logging.StreamHandler(sys.stdout)]
 )
+def download_file_with_progress(url: str, filename: str):
+    """Download a file with progress bar using requests"""
+    response = requests.get(url, stream=True)
+    total_size = int(response.headers.get('content-length', 0))
+    with open(filename, 'wb') as file, tqdm(
+        desc=filename,
+        total=total_size,
+        unit='iB',
+        unit_scale=True,
+        unit_divisor=1024,
+    ) as progress_bar:
+        for data in response.iter_content(chunk_size=1024):
+            size = file.write(data)
+            progress_bar.update(size)
 @st.cache_data
 def load_from_drive(file_id: str):
         st.error(f"Error loading file from Drive: {str(e)}")
         return None
+@st.cache_resource(show_spinner=False)
+def load_llama_model():
+    """Load Llama model with caching"""
+    try:
+        model_path = "mistral-7b-v0.1.Q4_K_M.gguf"
+        if not os.path.exists(model_path):
+            st.info("Downloading model... This may take a while.")
+            direct_url = "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_K_M.gguf"
+            download_file_with_progress(direct_url, model_path)
+        llm_config = {
+            "model_path": model_path,
+            "n_ctx": 2048,
+            "n_threads": 4,
+            "n_batch": 512,
+            "n_gpu_layers": 0,
+            "verbose": False
+        }
+        model = Llama(**llm_config)
+        st.success("Model loaded successfully!")
+        return model
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        raise
+def check_environment():
+    """Check if the environment is properly set up"""
+    try:
+        import torch
+        import sentence_transformers
+        return True
+    except ImportError as e:
+        st.error(f"Missing required package: {str(e)}")
+        st.stop()
+        return False
 class RAGPipeline:
     def __init__(self, data_folder: str, k: int = 5):
+        self.data_folder = data_folder
+        self.k = k
+        self.retriever = SentenceTransformerRetriever()
+        self.documents = []
+        self.device = torch.device("cpu")
+        self.llm = load_llama_model()
+    def preprocess_query(self, query: str) -> str:
+        """Clean and prepare the query"""
+        query = query.lower().strip()
+        query = re.sub(r'\s+', ' ', query)
+        return query
+    def postprocess_response(self, response: str) -> str:
+        """Clean up the generated response"""
+        response = response.strip()
+        response = re.sub(r'\s+', ' ', response)
+        response = re.sub(r'\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}(?:\+\d{2}:?\d{2})?', '', response)
+        return response
     def query_model(self, prompt: str) -> str:
+        """Query the local Llama model"""
         try:
             if self.llm is None:
                 raise RuntimeError("Model not initialized")
             response = self.llm(
                 prompt,
                 max_tokens=512,
                 echo=False,
                 stop=["Question:", "\n\n"]
             )
             if response and 'choices' in response and len(response['choices']) > 0:
                 text = response['choices'][0].get('text', '').strip()
                 return text
             else:
                 raise ValueError("No valid response generated")
         except Exception as e:
             logging.error(f"Error in query_model: {str(e)}")
             raise
     def process_query(self, query: str, placeholder) -> str:
         try:
             # Preprocess query
             query = self.preprocess_query(query)
             # Show retrieval status
             status = placeholder.empty()
             status.write("🔍 Finding relevant information...")
             # Get embeddings and search
             query_embedding = self.retriever.encode([query])
             similarities = F.cosine_similarity(query_embedding, self.retriever.doc_embeddings)
             scores, indices = torch.topk(similarities, k=min(self.k, len(self.documents)))
             relevant_docs = [self.documents[idx] for idx in indices.tolist()]
             # Update status
             status.write("💭 Generating response...")
             # Prepare context and prompt
+            context = "\n".join(relevant_docs[:3])
             prompt = f"""Context information is below:
             {context}
             Given the context above, please answer the following question:
             {query}
             - Only include sports-related information
             - No dates or timestamps in the response
             - Use clear, natural language
             Answer:"""
             # Generate response
             response_placeholder = placeholder.empty()
             try:
                 response_text = self.query_model(prompt)
                 if response_text:
                     message = "No relevant answer found. Please try rephrasing your question."
                     response_placeholder.warning(message)
                     return message
             except Exception as e:
                 logging.error(f"Generation error: {str(e)}")
                 message = "Had some trouble generating the response. Please try again."
                 response_placeholder.warning(message)
                 return message
         except Exception as e:
             logging.error(f"Process error: {str(e)}")
             message = "Something went wrong. Please try again with a different question."
             placeholder.warning(message)
             return message
 @st.cache_resource(show_spinner=False)
 def initialize_rag_pipeline():
     """Initialize the RAG pipeline once"""
     try:
+        # Create necessary directories
+        os.makedirs("ESPN_data", exist_ok=True)
+        # Load embeddings from Drive
         drive_file_id = "1MuV63AE9o6zR9aBvdSDQOUextp71r2NN"
         with st.spinner("Loading embeddings from Google Drive..."):
             cache_data = load_from_drive(drive_file_id)
                 st.error("Failed to load embeddings from Google Drive")
                 st.stop()
+        # Initialize pipeline
         data_folder = "ESPN_data"
         rag = RAGPipeline(data_folder)
 def main():
     try:
         # Environment check
+        if not check_environment():
             return
         # Improved CSS styling
         st.markdown("""
             <style>
             </style>
         """, unsafe_allow_html=True)
+        # Header section
         st.markdown("<h1 class='main-title'>🏆 The Sport Chatbot</h1>", unsafe_allow_html=True)
         st.markdown("<h3 class='sub-title'>Using ESPN API</h3>", unsafe_allow_html=True)
         st.markdown("""
             </p>
         """, unsafe_allow_html=True)
         # Initialize the pipeline
+        if 'rag' not in st.session_state:
+            with st.spinner("Loading resources..."):
+                st.session_state.rag = initialize_rag_pipeline()
+        # Create columns for layout
         col1, col2, col3 = st.columns([1, 6, 1])
         with col2:
+            # Query input
             query = st.text_input("What would you like to know about sports?")
             if st.button("Get Answer"):
                 if query:
                     response_placeholder = st.empty()
                     try:
                         response = st.session_state.rag.process_query(query, response_placeholder)
                         logging.info(f"Generated response: {response}")
                     except Exception as e:
                 else:
                     st.warning("Please enter a question!")
+        # Footer
         st.markdown("<br><br>", unsafe_allow_html=True)
         st.markdown("---")
         st.markdown("""
             <p style='text-align: center; color: #666666; padding: 1rem 0;'>
+                Powered by ESPN Data & Mistral AI 🚀
             </p>
         """, unsafe_allow_html=True)
         st.error("An unexpected error occurred. Please check the logs and try again.")
 if __name__ == "__main__":
+    main()