import bm25s from typing import List, Dict from pathlib import Path from constants.constants import BM25_INDEX, PRODUCTS_DF _FILE_PATH = Path(__file__).parents[2] def search_bm25(query: str, top_k: int = 5) -> List[Dict[str, any]]: """ Search products using BM25 lexical search (keyword matching). This is Stage 1: traditional keyword-based ranking without semantic understanding. Fast but misses semantic meaning and intent. Args: query: Search query string top_k: Number of top results to return (default: 5) Returns: List of dictionaries containing product information and scores """ query_tokens = bm25s.tokenize(query, stopwords="en") results, scores = BM25_INDEX.retrieve(query_tokens, k=top_k) return [ { "product_name": PRODUCTS_DF.iloc[idx]["Product Name"], "description": PRODUCTS_DF.iloc[idx]["Description"], "main_category": PRODUCTS_DF.iloc[idx]["MainCategory"], "secondary_category": PRODUCTS_DF.iloc[idx]["SecondaryCategory"], "score": float(score), } for idx, score in zip(results[0], scores[0]) ]