Rulga commited on
Commit
db04008
·
1 Parent(s): c4b548a

Refactor vector store loading logic to return the vector store object instead of a string message and enhance error handling during download

Browse files
Files changed (2) hide show
  1. app.py +3 -13
  2. src/knowledge_base/dataset.py +27 -6
app.py CHANGED
@@ -60,24 +60,14 @@ def load_vector_store():
60
  print(f"Debug - Download result: success={success}, result_type={type(result)}")
61
 
62
  if success:
63
- if result is None:
64
- print("Debug - Vector store is None despite success=True")
65
- return None
66
-
67
  if isinstance(result, str):
68
- print(f"Debug - Vector store is a string: {result}")
69
- return None
70
-
71
- # Check if the result has a similarity_search method
72
- if hasattr(result, 'similarity_search'):
73
- print("Debug - Vector store loaded successfully with similarity_search method")
74
- return result
75
- else:
76
- print(f"Debug - Vector store object does not have similarity_search method: {type(result)}")
77
  return None
 
78
  else:
79
  print(f"Debug - Failed to load vector store: {result}")
80
  return None
 
81
  except Exception as e:
82
  import traceback
83
  print(f"Exception loading knowledge base: {str(e)}")
 
60
  print(f"Debug - Download result: success={success}, result_type={type(result)}")
61
 
62
  if success:
 
 
 
 
63
  if isinstance(result, str):
64
+ print(f"Debug - Error message received: {result}")
 
 
 
 
 
 
 
 
65
  return None
66
+ return result
67
  else:
68
  print(f"Debug - Failed to load vector store: {result}")
69
  return None
70
+
71
  except Exception as e:
72
  import traceback
73
  print(f"Exception loading knowledge base: {str(e)}")
src/knowledge_base/dataset.py CHANGED
@@ -9,7 +9,8 @@ from typing import Tuple, List, Dict, Any, Optional, Union
9
  from datetime import datetime
10
  from huggingface_hub import HfApi, HfFolder
11
  from langchain_community.vectorstores import FAISS
12
- from config.settings import VECTOR_STORE_PATH, HF_TOKEN
 
13
 
14
  class DatasetManager:
15
  def __init__(self, dataset_name="Rulga/status-law-knowledge-base", token: Optional[str] = None):
@@ -188,7 +189,17 @@ class DatasetManager:
188
  try:
189
  # Check if local files exist and force is False
190
  if not force and os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss")):
191
- return True, "Local files exist"
 
 
 
 
 
 
 
 
 
 
192
 
193
  # Ensure vector store directory exists
194
  os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
@@ -209,10 +220,20 @@ class DatasetManager:
209
  local_dir=VECTOR_STORE_PATH
210
  )
211
 
212
- return True, "Vector store downloaded successfully"
213
-
214
- except Exception as download_error:
215
- return False, f"Failed to download vector store: {str(download_error)}"
 
 
 
 
 
 
 
 
 
 
216
 
217
  except Exception as e:
218
  return False, f"Error in download_vector_store: {str(e)}"
 
9
  from datetime import datetime
10
  from huggingface_hub import HfApi, HfFolder
11
  from langchain_community.vectorstores import FAISS
12
+ from config.settings import VECTOR_STORE_PATH, HF_TOKEN, EMBEDDING_MODEL
13
+ from langchain.embeddings import HuggingFaceEmbeddings
14
 
15
  class DatasetManager:
16
  def __init__(self, dataset_name="Rulga/status-law-knowledge-base", token: Optional[str] = None):
 
189
  try:
190
  # Check if local files exist and force is False
191
  if not force and os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss")):
192
+ # Instead of returning string, load and return the vector store
193
+ embeddings = HuggingFaceEmbeddings(
194
+ model_name=EMBEDDING_MODEL,
195
+ model_kwargs={'device': 'cpu'}
196
+ )
197
+ vector_store = FAISS.load_local(
198
+ VECTOR_STORE_PATH,
199
+ embeddings,
200
+ allow_dangerous_deserialization=True
201
+ )
202
+ return True, vector_store
203
 
204
  # Ensure vector store directory exists
205
  os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
 
220
  local_dir=VECTOR_STORE_PATH
221
  )
222
 
223
+ # After successful download, load and return the vector store
224
+ embeddings = HuggingFaceEmbeddings(
225
+ model_name=EMBEDDING_MODEL,
226
+ model_kwargs={'device': 'cpu'}
227
+ )
228
+ vector_store = FAISS.load_local(
229
+ VECTOR_STORE_PATH,
230
+ embeddings,
231
+ allow_dangerous_deserialization=True
232
+ )
233
+ return True, vector_store
234
+
235
+ except Exception as e:
236
+ return False, f"Failed to download vector store: {str(e)}"
237
 
238
  except Exception as e:
239
  return False, f"Error in download_vector_store: {str(e)}"