Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
"""
|
| 4 |
Enhanced CMA RAG Chatbot for Hugging Face Spaces
|
| 5 |
Conversational AI with memory for Kuwait Capital Markets Authority documents
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
|
@@ -12,9 +13,8 @@ import gradio as gr
|
|
| 12 |
from typing import List, Tuple, Dict, Any
|
| 13 |
import faiss
|
| 14 |
import pickle
|
| 15 |
-
from sentence_transformers import SentenceTransformer
|
| 16 |
from langchain_community.vectorstores import FAISS
|
| 17 |
-
from
|
| 18 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 19 |
from langchain.schema import Document
|
| 20 |
from langchain_openai import ChatOpenAI
|
|
@@ -33,13 +33,12 @@ class EnhancedCMARAGBot:
|
|
| 33 |
logger.info("🚀 Starting Enhanced CMA Conversational RAG Chatbot...")
|
| 34 |
logger.info("📚 Loading vector store and initializing conversational AI...")
|
| 35 |
|
| 36 |
-
# Initialize embedding model
|
| 37 |
-
self.embedding_model =
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
encode_kwargs={'normalize_embeddings': True}
|
| 41 |
)
|
| 42 |
-
logger.info("
|
| 43 |
|
| 44 |
# Load vector store
|
| 45 |
self.vector_store = self._load_vector_store()
|
|
@@ -47,12 +46,12 @@ class EnhancedCMARAGBot:
|
|
| 47 |
|
| 48 |
# Initialize OpenAI LLM
|
| 49 |
self.llm = ChatOpenAI(
|
| 50 |
-
model="gpt-
|
| 51 |
temperature=0.1,
|
| 52 |
max_tokens=1000,
|
| 53 |
openai_api_key=os.getenv("OPENAI_API_KEY")
|
| 54 |
)
|
| 55 |
-
logger.info("OpenAI LLM initialized with gpt-
|
| 56 |
|
| 57 |
# Setup retrieval chain
|
| 58 |
self._setup_retrieval_chain()
|
|
@@ -64,21 +63,24 @@ class EnhancedCMARAGBot:
|
|
| 64 |
logger.info("Enhanced CMA RAG Bot initialized successfully")
|
| 65 |
|
| 66 |
def _load_vector_store(self):
|
| 67 |
-
"""Load the pre-built FAISS vector store"""
|
| 68 |
try:
|
|
|
|
| 69 |
vector_store = FAISS.load_local(
|
| 70 |
"faiss_index",
|
| 71 |
self.embedding_model,
|
| 72 |
allow_dangerous_deserialization=True
|
| 73 |
)
|
|
|
|
| 74 |
return vector_store
|
| 75 |
except Exception as e:
|
| 76 |
-
logger.
|
| 77 |
-
|
|
|
|
| 78 |
return self._create_vector_store_from_documents()
|
| 79 |
|
| 80 |
def _create_vector_store_from_documents(self):
|
| 81 |
-
"""Create vector store from processed documents
|
| 82 |
try:
|
| 83 |
with open('processed_documents.json', 'r', encoding='utf-8') as f:
|
| 84 |
processed_docs = json.load(f)
|
|
@@ -91,12 +93,14 @@ class EnhancedCMARAGBot:
|
|
| 91 |
)
|
| 92 |
documents.append(doc)
|
| 93 |
|
| 94 |
-
|
|
|
|
|
|
|
| 95 |
vector_store = FAISS.from_documents(documents, self.embedding_model)
|
| 96 |
|
| 97 |
# Save for future use
|
| 98 |
vector_store.save_local("faiss_index")
|
| 99 |
-
logger.info("Vector store created and saved
|
| 100 |
|
| 101 |
return vector_store
|
| 102 |
|
|
@@ -323,9 +327,9 @@ def get_stats():
|
|
| 323 |
try:
|
| 324 |
with open('processed_documents.json', 'r', encoding='utf-8') as f:
|
| 325 |
docs = json.load(f)
|
| 326 |
-
return len(docs), 19, "GPT-
|
| 327 |
except:
|
| 328 |
-
return "2,091", "19", "GPT-
|
| 329 |
|
| 330 |
# Create Gradio interface
|
| 331 |
def create_interface():
|
|
@@ -359,7 +363,7 @@ def create_interface():
|
|
| 359 |
<div class="main-header rtl">
|
| 360 |
<h1>🤖 مستشار هيئة أسواق المال الكويتية</h1>
|
| 361 |
<p>نظام RAG تفاعلي مع ذاكرة محادثة وذكاء اصطناعي محسّن</p>
|
| 362 |
-
<p
|
| 363 |
</div>
|
| 364 |
""")
|
| 365 |
|
|
|
|
| 3 |
"""
|
| 4 |
Enhanced CMA RAG Chatbot for Hugging Face Spaces
|
| 5 |
Conversational AI with memory for Kuwait Capital Markets Authority documents
|
| 6 |
+
Using OpenAI text-embedding-3-large and gpt-4.1-mini
|
| 7 |
"""
|
| 8 |
|
| 9 |
import os
|
|
|
|
| 13 |
from typing import List, Tuple, Dict, Any
|
| 14 |
import faiss
|
| 15 |
import pickle
|
|
|
|
| 16 |
from langchain_community.vectorstores import FAISS
|
| 17 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
| 18 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 19 |
from langchain.schema import Document
|
| 20 |
from langchain_openai import ChatOpenAI
|
|
|
|
| 33 |
logger.info("🚀 Starting Enhanced CMA Conversational RAG Chatbot...")
|
| 34 |
logger.info("📚 Loading vector store and initializing conversational AI...")
|
| 35 |
|
| 36 |
+
# Initialize OpenAI embedding model
|
| 37 |
+
self.embedding_model = OpenAIEmbeddings(
|
| 38 |
+
model="text-embedding-3-large",
|
| 39 |
+
openai_api_key=os.getenv("OPENAI_API_KEY")
|
|
|
|
| 40 |
)
|
| 41 |
+
logger.info("OpenAI embedding model initialized (text-embedding-3-large)")
|
| 42 |
|
| 43 |
# Load vector store
|
| 44 |
self.vector_store = self._load_vector_store()
|
|
|
|
| 46 |
|
| 47 |
# Initialize OpenAI LLM
|
| 48 |
self.llm = ChatOpenAI(
|
| 49 |
+
model="gpt-4.1-mini",
|
| 50 |
temperature=0.1,
|
| 51 |
max_tokens=1000,
|
| 52 |
openai_api_key=os.getenv("OPENAI_API_KEY")
|
| 53 |
)
|
| 54 |
+
logger.info("OpenAI LLM initialized with gpt-4.1-mini")
|
| 55 |
|
| 56 |
# Setup retrieval chain
|
| 57 |
self._setup_retrieval_chain()
|
|
|
|
| 63 |
logger.info("Enhanced CMA RAG Bot initialized successfully")
|
| 64 |
|
| 65 |
def _load_vector_store(self):
|
| 66 |
+
"""Load the pre-built FAISS vector store or create new one with OpenAI embeddings"""
|
| 67 |
try:
|
| 68 |
+
# Try to load existing vector store
|
| 69 |
vector_store = FAISS.load_local(
|
| 70 |
"faiss_index",
|
| 71 |
self.embedding_model,
|
| 72 |
allow_dangerous_deserialization=True
|
| 73 |
)
|
| 74 |
+
logger.info("Existing vector store loaded")
|
| 75 |
return vector_store
|
| 76 |
except Exception as e:
|
| 77 |
+
logger.warning(f"Could not load existing vector store: {e}")
|
| 78 |
+
logger.info("Creating new vector store with OpenAI embeddings...")
|
| 79 |
+
# Create from processed documents with OpenAI embeddings
|
| 80 |
return self._create_vector_store_from_documents()
|
| 81 |
|
| 82 |
def _create_vector_store_from_documents(self):
|
| 83 |
+
"""Create vector store from processed documents using OpenAI embeddings"""
|
| 84 |
try:
|
| 85 |
with open('processed_documents.json', 'r', encoding='utf-8') as f:
|
| 86 |
processed_docs = json.load(f)
|
|
|
|
| 93 |
)
|
| 94 |
documents.append(doc)
|
| 95 |
|
| 96 |
+
logger.info(f"Creating vector store from {len(documents)} documents...")
|
| 97 |
+
|
| 98 |
+
# Create vector store with OpenAI embeddings
|
| 99 |
vector_store = FAISS.from_documents(documents, self.embedding_model)
|
| 100 |
|
| 101 |
# Save for future use
|
| 102 |
vector_store.save_local("faiss_index")
|
| 103 |
+
logger.info("Vector store created and saved with OpenAI embeddings")
|
| 104 |
|
| 105 |
return vector_store
|
| 106 |
|
|
|
|
| 327 |
try:
|
| 328 |
with open('processed_documents.json', 'r', encoding='utf-8') as f:
|
| 329 |
docs = json.load(f)
|
| 330 |
+
return len(docs), 19, "GPT-4.1 Mini", "تفاعلية مع ذاكرة"
|
| 331 |
except:
|
| 332 |
+
return "2,091", "19", "GPT-4.1 Mini", "تفاعلية مع ذاكرة"
|
| 333 |
|
| 334 |
# Create Gradio interface
|
| 335 |
def create_interface():
|
|
|
|
| 363 |
<div class="main-header rtl">
|
| 364 |
<h1>🤖 مستشار هيئة أسواق المال الكويتية</h1>
|
| 365 |
<p>نظام RAG تفاعلي مع ذاكرة محادثة وذكاء اصطناعي محسّن</p>
|
| 366 |
+
<p>مدعوم بـ OpenAI text-embedding-3-large و GPT-4.1 Mini</p>
|
| 367 |
</div>
|
| 368 |
""")
|
| 369 |
|