Spaces:
Running
Running
RobertoBarrosoLuque
commited on
Commit
Β·
e59c3d6
1
Parent(s):
73fd05b
Cleanup and refactor
Browse files
src/{modules β data_prep}/__init__.py
RENAMED
|
File without changes
|
src/data_prep/constants.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.data_prep.data_prep import (
|
| 2 |
+
load_bm25_index,
|
| 3 |
+
load_clean_amazon_product_data,
|
| 4 |
+
load_faiss_index,
|
| 5 |
+
)
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
BM25_INDEX = load_bm25_index()
|
| 9 |
+
FAISS_INDEX = load_faiss_index()
|
| 10 |
+
PRODUCTS_DF = load_clean_amazon_product_data()
|
src/{modules β data_prep}/data_prep.py
RENAMED
|
File without changes
|
src/fireworks/__init__.py
ADDED
|
File without changes
|
src/{modules β fireworks}/inference.py
RENAMED
|
File without changes
|
src/search/__init__.py
ADDED
|
File without changes
|
src/{modules β search}/bm25_lexical_search.py
RENAMED
|
@@ -1,15 +1,11 @@
|
|
| 1 |
import bm25s
|
| 2 |
from typing import List, Dict
|
| 3 |
from pathlib import Path
|
| 4 |
-
from src.
|
| 5 |
|
| 6 |
_FILE_PATH = Path(__file__).parents[2]
|
| 7 |
|
| 8 |
|
| 9 |
-
_BM25_INDEX = load_bm25_index()
|
| 10 |
-
_PRODUCTS_DF = load_clean_amazon_product_data()
|
| 11 |
-
|
| 12 |
-
|
| 13 |
def search_bm25(query: str, top_k: int = 5) -> List[Dict[str, any]]:
|
| 14 |
"""
|
| 15 |
Search products using BM25 lexical search (keyword matching).
|
|
@@ -25,14 +21,14 @@ def search_bm25(query: str, top_k: int = 5) -> List[Dict[str, any]]:
|
|
| 25 |
List of dictionaries containing product information and scores
|
| 26 |
"""
|
| 27 |
query_tokens = bm25s.tokenize(query, stopwords="en")
|
| 28 |
-
results, scores =
|
| 29 |
|
| 30 |
return [
|
| 31 |
{
|
| 32 |
-
"product_name":
|
| 33 |
-
"description":
|
| 34 |
-
"main_category":
|
| 35 |
-
"secondary_category":
|
| 36 |
"score": float(score),
|
| 37 |
}
|
| 38 |
for idx, score in zip(results[0], scores[0])
|
|
|
|
| 1 |
import bm25s
|
| 2 |
from typing import List, Dict
|
| 3 |
from pathlib import Path
|
| 4 |
+
from src.data_prep.constants import BM25_INDEX, PRODUCTS_DF
|
| 5 |
|
| 6 |
_FILE_PATH = Path(__file__).parents[2]
|
| 7 |
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
def search_bm25(query: str, top_k: int = 5) -> List[Dict[str, any]]:
|
| 10 |
"""
|
| 11 |
Search products using BM25 lexical search (keyword matching).
|
|
|
|
| 21 |
List of dictionaries containing product information and scores
|
| 22 |
"""
|
| 23 |
query_tokens = bm25s.tokenize(query, stopwords="en")
|
| 24 |
+
results, scores = BM25_INDEX.retrieve(query_tokens, k=top_k)
|
| 25 |
|
| 26 |
return [
|
| 27 |
{
|
| 28 |
+
"product_name": PRODUCTS_DF.iloc[idx]["Product Name"],
|
| 29 |
+
"description": PRODUCTS_DF.iloc[idx]["Description"],
|
| 30 |
+
"main_category": PRODUCTS_DF.iloc[idx]["MainCategory"],
|
| 31 |
+
"secondary_category": PRODUCTS_DF.iloc[idx]["SecondaryCategory"],
|
| 32 |
"score": float(score),
|
| 33 |
}
|
| 34 |
for idx, score in zip(results[0], scores[0])
|