AhmedEwis commited on
Commit
ba6a631
·
verified ·
1 Parent(s): 02c6c25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -19
app.py CHANGED
@@ -3,6 +3,7 @@
3
  """
4
  Enhanced CMA RAG Chatbot for Hugging Face Spaces
5
  Conversational AI with memory for Kuwait Capital Markets Authority documents
 
6
  """
7
 
8
  import os
@@ -12,9 +13,8 @@ import gradio as gr
12
  from typing import List, Tuple, Dict, Any
13
  import faiss
14
  import pickle
15
- from sentence_transformers import SentenceTransformer
16
  from langchain_community.vectorstores import FAISS
17
- from langchain_community.embeddings import HuggingFaceEmbeddings
18
  from langchain.text_splitter import RecursiveCharacterTextSplitter
19
  from langchain.schema import Document
20
  from langchain_openai import ChatOpenAI
@@ -33,13 +33,12 @@ class EnhancedCMARAGBot:
33
  logger.info("🚀 Starting Enhanced CMA Conversational RAG Chatbot...")
34
  logger.info("📚 Loading vector store and initializing conversational AI...")
35
 
36
- # Initialize embedding model
37
- self.embedding_model = HuggingFaceEmbeddings(
38
- model_name="sentence-transformers/all-MiniLM-L6-v2",
39
- model_kwargs={'device': 'cpu'},
40
- encode_kwargs={'normalize_embeddings': True}
41
  )
42
- logger.info("Embedding model initialized")
43
 
44
  # Load vector store
45
  self.vector_store = self._load_vector_store()
@@ -47,12 +46,12 @@ class EnhancedCMARAGBot:
47
 
48
  # Initialize OpenAI LLM
49
  self.llm = ChatOpenAI(
50
- model="gpt-4o-mini",
51
  temperature=0.1,
52
  max_tokens=1000,
53
  openai_api_key=os.getenv("OPENAI_API_KEY")
54
  )
55
- logger.info("OpenAI LLM initialized with gpt-4o-mini")
56
 
57
  # Setup retrieval chain
58
  self._setup_retrieval_chain()
@@ -64,21 +63,24 @@ class EnhancedCMARAGBot:
64
  logger.info("Enhanced CMA RAG Bot initialized successfully")
65
 
66
  def _load_vector_store(self):
67
- """Load the pre-built FAISS vector store"""
68
  try:
 
69
  vector_store = FAISS.load_local(
70
  "faiss_index",
71
  self.embedding_model,
72
  allow_dangerous_deserialization=True
73
  )
 
74
  return vector_store
75
  except Exception as e:
76
- logger.error(f"Error loading vector store: {e}")
77
- # Fallback: create from processed documents
 
78
  return self._create_vector_store_from_documents()
79
 
80
  def _create_vector_store_from_documents(self):
81
- """Create vector store from processed documents as fallback"""
82
  try:
83
  with open('processed_documents.json', 'r', encoding='utf-8') as f:
84
  processed_docs = json.load(f)
@@ -91,12 +93,14 @@ class EnhancedCMARAGBot:
91
  )
92
  documents.append(doc)
93
 
94
- # Create vector store
 
 
95
  vector_store = FAISS.from_documents(documents, self.embedding_model)
96
 
97
  # Save for future use
98
  vector_store.save_local("faiss_index")
99
- logger.info("Vector store created and saved from processed documents")
100
 
101
  return vector_store
102
 
@@ -323,9 +327,9 @@ def get_stats():
323
  try:
324
  with open('processed_documents.json', 'r', encoding='utf-8') as f:
325
  docs = json.load(f)
326
- return len(docs), 19, "GPT-4o Mini", "تفاعلية مع ذاكرة"
327
  except:
328
- return "2,091", "19", "GPT-4o Mini", "تفاعلية مع ذاكرة"
329
 
330
  # Create Gradio interface
331
  def create_interface():
@@ -359,7 +363,7 @@ def create_interface():
359
  <div class="main-header rtl">
360
  <h1>🤖 مستشار هيئة أسواق المال الكويتية</h1>
361
  <p>نظام RAG تفاعلي مع ذاكرة محادثة وذكاء اصطناعي محسّن</p>
362
- <p>مساعد ذكي متفهم يتذكر المحادثة ويجيب على الأسئلة القانونية والمحادثة العامة</p>
363
  </div>
364
  """)
365
 
 
3
  """
4
  Enhanced CMA RAG Chatbot for Hugging Face Spaces
5
  Conversational AI with memory for Kuwait Capital Markets Authority documents
6
+ Using OpenAI text-embedding-3-large and gpt-4.1-mini
7
  """
8
 
9
  import os
 
13
  from typing import List, Tuple, Dict, Any
14
  import faiss
15
  import pickle
 
16
  from langchain_community.vectorstores import FAISS
17
+ from langchain_openai.embeddings import OpenAIEmbeddings
18
  from langchain.text_splitter import RecursiveCharacterTextSplitter
19
  from langchain.schema import Document
20
  from langchain_openai import ChatOpenAI
 
33
  logger.info("🚀 Starting Enhanced CMA Conversational RAG Chatbot...")
34
  logger.info("📚 Loading vector store and initializing conversational AI...")
35
 
36
+ # Initialize OpenAI embedding model
37
+ self.embedding_model = OpenAIEmbeddings(
38
+ model="text-embedding-3-large",
39
+ openai_api_key=os.getenv("OPENAI_API_KEY")
 
40
  )
41
+ logger.info("OpenAI embedding model initialized (text-embedding-3-large)")
42
 
43
  # Load vector store
44
  self.vector_store = self._load_vector_store()
 
46
 
47
  # Initialize OpenAI LLM
48
  self.llm = ChatOpenAI(
49
+ model="gpt-4.1-mini",
50
  temperature=0.1,
51
  max_tokens=1000,
52
  openai_api_key=os.getenv("OPENAI_API_KEY")
53
  )
54
+ logger.info("OpenAI LLM initialized with gpt-4.1-mini")
55
 
56
  # Setup retrieval chain
57
  self._setup_retrieval_chain()
 
63
  logger.info("Enhanced CMA RAG Bot initialized successfully")
64
 
65
  def _load_vector_store(self):
66
+ """Load the pre-built FAISS vector store or create new one with OpenAI embeddings"""
67
  try:
68
+ # Try to load existing vector store
69
  vector_store = FAISS.load_local(
70
  "faiss_index",
71
  self.embedding_model,
72
  allow_dangerous_deserialization=True
73
  )
74
+ logger.info("Existing vector store loaded")
75
  return vector_store
76
  except Exception as e:
77
+ logger.warning(f"Could not load existing vector store: {e}")
78
+ logger.info("Creating new vector store with OpenAI embeddings...")
79
+ # Create from processed documents with OpenAI embeddings
80
  return self._create_vector_store_from_documents()
81
 
82
  def _create_vector_store_from_documents(self):
83
+ """Create vector store from processed documents using OpenAI embeddings"""
84
  try:
85
  with open('processed_documents.json', 'r', encoding='utf-8') as f:
86
  processed_docs = json.load(f)
 
93
  )
94
  documents.append(doc)
95
 
96
+ logger.info(f"Creating vector store from {len(documents)} documents...")
97
+
98
+ # Create vector store with OpenAI embeddings
99
  vector_store = FAISS.from_documents(documents, self.embedding_model)
100
 
101
  # Save for future use
102
  vector_store.save_local("faiss_index")
103
+ logger.info("Vector store created and saved with OpenAI embeddings")
104
 
105
  return vector_store
106
 
 
327
  try:
328
  with open('processed_documents.json', 'r', encoding='utf-8') as f:
329
  docs = json.load(f)
330
+ return len(docs), 19, "GPT-4.1 Mini", "تفاعلية مع ذاكرة"
331
  except:
332
+ return "2,091", "19", "GPT-4.1 Mini", "تفاعلية مع ذاكرة"
333
 
334
  # Create Gradio interface
335
  def create_interface():
 
363
  <div class="main-header rtl">
364
  <h1>🤖 مستشار هيئة أسواق المال الكويتية</h1>
365
  <p>نظام RAG تفاعلي مع ذاكرة محادثة وذكاء اصطناعي محسّن</p>
366
+ <p>مدعوم بـ OpenAI text-embedding-3-large و GPT-4.1 Mini</p>
367
  </div>
368
  """)
369