Spaces:
Sleeping
Sleeping
File size: 2,813 Bytes
0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 0552d2e b945fc7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# vector.py
import os
import glob
import pandas as pd
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
# ==============================
# CONFIGURATION
# ==============================
VECTOR_PATH = "faiss_index"
HF_CACHE_DIR = os.getenv("HF_CACHE_DIR", "/app/huggingface_cache")
EMBEDDING_MODEL = os.getenv("HF_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
# Ensure cache directory exists
os.makedirs(HF_CACHE_DIR, exist_ok=True)
# ==============================
# EMBEDDING SETUP
# ==============================
embeddings = HuggingFaceEmbeddings(
model_name=EMBEDDING_MODEL,
cache_folder=HF_CACHE_DIR,
model_kwargs={"token": HF_TOKEN}
)
# ==============================
# VECTOR STORE OPERATIONS
# ==============================
def build_vectorstore():
"""Builds FAISS index from all CSV files in /datasets."""
texts = []
for file in glob.glob("datasets/*.csv"):
try:
df = pd.read_csv(file)
if "text" in df.columns:
# Primary text field
texts.extend(df["text"].dropna().astype(str).tolist())
else:
# Combine all columns if no "text" column found
texts.extend([" ".join(map(str, row.values)) for _, row in df.iterrows()])
print(f"β
Loaded {len(df)} rows from {file}")
except Exception as e:
print(f"β οΈ Skipping {file}, error: {e}")
if not texts:
texts = ["AgriCopilot initialized knowledge base."]
print("π Building FAISS vector index...")
vectorstore = FAISS.from_texts(texts, embeddings)
vectorstore.save_local(VECTOR_PATH)
print(f"π Vectorstore built successfully with {len(texts)} documents.")
return vectorstore
def load_vector_store():
"""Loads FAISS index if it exists, otherwise builds a new one."""
if os.path.exists(VECTOR_PATH):
print("π Loading existing FAISS index...")
return FAISS.load_local(VECTOR_PATH, embeddings, allow_dangerous_deserialization=True)
else:
print("π§© No existing FAISS index found. Building a new one...")
return build_vectorstore()
vectorstore = load_vector_store()
# ==============================
# VECTOR QUERY
# ==============================
def query_vector(query: str, k: int = 3):
"""
Performs a semantic similarity search using FAISS.
Returns a list of top-k relevant text chunks from the knowledge base.
"""
try:
docs = vectorstore.similarity_search(query, k=k)
return [d.page_content for d in docs]
except Exception as e:
print(f"β οΈ Vector query error: {e}")
return ["No relevant knowledge found."]
|