Spaces:

AhmedEwis
/

CMP_AI_RAG

Sleeping

App Files Files Community

AhmedEwis commited on Sep 29

Commit

ba6a631

verified ·

1 Parent(s): 02c6c25

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -19

app.py CHANGED Viewed

@@ -3,6 +3,7 @@
 """
 Enhanced CMA RAG Chatbot for Hugging Face Spaces
 Conversational AI with memory for Kuwait Capital Markets Authority documents
 """
 import os
@@ -12,9 +13,8 @@ import gradio as gr
 from typing import List, Tuple, Dict, Any
 import faiss
 import pickle
-from sentence_transformers import SentenceTransformer
 from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.schema import Document
 from langchain_openai import ChatOpenAI
@@ -33,13 +33,12 @@ class EnhancedCMARAGBot:
         logger.info("🚀 Starting Enhanced CMA Conversational RAG Chatbot...")
         logger.info("📚 Loading vector store and initializing conversational AI...")
-        # Initialize embedding model
-        self.embedding_model = HuggingFaceEmbeddings(
-            model_name="sentence-transformers/all-MiniLM-L6-v2",
-            model_kwargs={'device': 'cpu'},
-            encode_kwargs={'normalize_embeddings': True}
         )
-        logger.info("Embedding model initialized")
         # Load vector store
         self.vector_store = self._load_vector_store()
@@ -47,12 +46,12 @@ class EnhancedCMARAGBot:
         # Initialize OpenAI LLM
         self.llm = ChatOpenAI(
-            model="gpt-4o-mini",
             temperature=0.1,
             max_tokens=1000,
             openai_api_key=os.getenv("OPENAI_API_KEY")
         )
-        logger.info("OpenAI LLM initialized with gpt-4o-mini")
         # Setup retrieval chain
         self._setup_retrieval_chain()
@@ -64,21 +63,24 @@ class EnhancedCMARAGBot:
         logger.info("Enhanced CMA RAG Bot initialized successfully")
     def _load_vector_store(self):
-        """Load the pre-built FAISS vector store"""
         try:
             vector_store = FAISS.load_local(
                 "faiss_index",
                 self.embedding_model,
                 allow_dangerous_deserialization=True
             )
             return vector_store
         except Exception as e:
-            logger.error(f"Error loading vector store: {e}")
-            # Fallback: create from processed documents
             return self._create_vector_store_from_documents()
     def _create_vector_store_from_documents(self):
-        """Create vector store from processed documents as fallback"""
         try:
             with open('processed_documents.json', 'r', encoding='utf-8') as f:
                 processed_docs = json.load(f)
@@ -91,12 +93,14 @@ class EnhancedCMARAGBot:
                 )
                 documents.append(doc)
-            # Create vector store
             vector_store = FAISS.from_documents(documents, self.embedding_model)
             # Save for future use
             vector_store.save_local("faiss_index")
-            logger.info("Vector store created and saved from processed documents")
             return vector_store
@@ -323,9 +327,9 @@ def get_stats():
     try:
         with open('processed_documents.json', 'r', encoding='utf-8') as f:
             docs = json.load(f)
-        return len(docs), 19, "GPT-4o Mini", "تفاعلية مع ذاكرة"
     except:
-        return "2,091", "19", "GPT-4o Mini", "تفاعلية مع ذاكرة"
 # Create Gradio interface
 def create_interface():
@@ -359,7 +363,7 @@ def create_interface():
         <div class="main-header rtl">
             <h1>🤖 مستشار هيئة أسواق المال الكويتية</h1>
             <p>نظام RAG تفاعلي مع ذاكرة محادثة وذكاء اصطناعي محسّن</p>
-            <p>مساعد ذكي متفهم يتذكر المحادثة ويجيب على الأسئلة القانونية والمحادثة العامة</p>
         </div>
         """)

 """
 Enhanced CMA RAG Chatbot for Hugging Face Spaces
 Conversational AI with memory for Kuwait Capital Markets Authority documents
+Using OpenAI text-embedding-3-large and gpt-4.1-mini
 """
 import os
 from typing import List, Tuple, Dict, Any
 import faiss
 import pickle
 from langchain_community.vectorstores import FAISS
+from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.schema import Document
 from langchain_openai import ChatOpenAI
         logger.info("🚀 Starting Enhanced CMA Conversational RAG Chatbot...")
         logger.info("📚 Loading vector store and initializing conversational AI...")
+        # Initialize OpenAI embedding model
+        self.embedding_model = OpenAIEmbeddings(
+            model="text-embedding-3-large",
+            openai_api_key=os.getenv("OPENAI_API_KEY")
         )
+        logger.info("OpenAI embedding model initialized (text-embedding-3-large)")
         # Load vector store
         self.vector_store = self._load_vector_store()
         # Initialize OpenAI LLM
         self.llm = ChatOpenAI(
+            model="gpt-4.1-mini",
             temperature=0.1,
             max_tokens=1000,
             openai_api_key=os.getenv("OPENAI_API_KEY")
         )
+        logger.info("OpenAI LLM initialized with gpt-4.1-mini")
         # Setup retrieval chain
         self._setup_retrieval_chain()
         logger.info("Enhanced CMA RAG Bot initialized successfully")
     def _load_vector_store(self):
+        """Load the pre-built FAISS vector store or create new one with OpenAI embeddings"""
         try:
+            # Try to load existing vector store
             vector_store = FAISS.load_local(
                 "faiss_index",
                 self.embedding_model,
                 allow_dangerous_deserialization=True
             )
+            logger.info("Existing vector store loaded")
             return vector_store
         except Exception as e:
+            logger.warning(f"Could not load existing vector store: {e}")
+            logger.info("Creating new vector store with OpenAI embeddings...")
+            # Create from processed documents with OpenAI embeddings
             return self._create_vector_store_from_documents()
     def _create_vector_store_from_documents(self):
+        """Create vector store from processed documents using OpenAI embeddings"""
         try:
             with open('processed_documents.json', 'r', encoding='utf-8') as f:
                 processed_docs = json.load(f)
                 )
                 documents.append(doc)
+            logger.info(f"Creating vector store from {len(documents)} documents...")
+            # Create vector store with OpenAI embeddings
             vector_store = FAISS.from_documents(documents, self.embedding_model)
             # Save for future use
             vector_store.save_local("faiss_index")
+            logger.info("Vector store created and saved with OpenAI embeddings")
             return vector_store
     try:
         with open('processed_documents.json', 'r', encoding='utf-8') as f:
             docs = json.load(f)
+        return len(docs), 19, "GPT-4.1 Mini", "تفاعلية مع ذاكرة"
     except:
+        return "2,091", "19", "GPT-4.1 Mini", "تفاعلية مع ذاكرة"
 # Create Gradio interface
 def create_interface():
         <div class="main-header rtl">
             <h1>🤖 مستشار هيئة أسواق المال الكويتية</h1>
             <p>نظام RAG تفاعلي مع ذاكرة محادثة وذكاء اصطناعي محسّن</p>
+            <p>مدعوم بـ OpenAI text-embedding-3-large و GPT-4.1 Mini</p>
         </div>
         """)