Spaces:
Sleeping
Sleeping
Update pipeline.py
Browse files- pipeline.py +24 -17
pipeline.py
CHANGED
|
@@ -131,24 +131,31 @@ def classify_query(query: str) -> str:
|
|
| 131 |
def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
| 132 |
try:
|
| 133 |
if os.path.exists(store_dir):
|
| 134 |
-
print(f"
|
| 135 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
except Exception as e:
|
| 154 |
raise RuntimeError(f"Error building/loading vector store: {str(e)}")
|
|
|
|
| 131 |
def build_or_load_vectorstore(csv_path: str, store_dir: str) -> FAISS:
|
| 132 |
try:
|
| 133 |
if os.path.exists(store_dir):
|
| 134 |
+
print(f"DEBUG: Found existing FAISS store at '{store_dir}'. Loading...")
|
| 135 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
| 136 |
+
vectorstore = FAISS.load_local(store_dir, embeddings)
|
| 137 |
+
return vectorstore
|
| 138 |
+
else:
|
| 139 |
+
print(f"DEBUG: Building new store from CSV: {csv_path}")
|
| 140 |
+
df = pd.read_csv(csv_path)
|
| 141 |
+
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
|
| 142 |
+
df.columns = df.columns.str.strip()
|
| 143 |
+
if "Answer" in df.columns:
|
| 144 |
+
df.rename(columns={"Answer": "Answers"}, inplace=True)
|
| 145 |
+
if "Question" not in df.columns and "Question " in df.columns:
|
| 146 |
+
df.rename(columns={"Question ": "Question"}, inplace=True)
|
| 147 |
+
if "Question" not in df.columns or "Answers" not in df.columns:
|
| 148 |
+
raise ValueError("CSV must have 'Question' and 'Answers' columns.")
|
| 149 |
+
docs = []
|
| 150 |
+
for _, row in df.iterrows():
|
| 151 |
+
q = str(row["Question"])
|
| 152 |
+
ans = str(row["Answers"])
|
| 153 |
+
doc = Document(page_content=ans, metadata={"question": q})
|
| 154 |
+
docs.append(doc)
|
| 155 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
|
| 156 |
+
vectorstore = FAISS.from_documents(docs, embedding=embeddings)
|
| 157 |
+
vectorstore.save_local(store_dir)
|
| 158 |
+
return vectorstore
|
| 159 |
|
| 160 |
except Exception as e:
|
| 161 |
raise RuntimeError(f"Error building/loading vector store: {str(e)}")
|