RobertoBarrosoLuque commited on
Commit
e59c3d6
Β·
1 Parent(s): 73fd05b

Cleanup and refactor

Browse files
src/{modules β†’ data_prep}/__init__.py RENAMED
File without changes
src/data_prep/constants.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.data_prep.data_prep import (
2
+ load_bm25_index,
3
+ load_clean_amazon_product_data,
4
+ load_faiss_index,
5
+ )
6
+
7
+
8
+ BM25_INDEX = load_bm25_index()
9
+ FAISS_INDEX = load_faiss_index()
10
+ PRODUCTS_DF = load_clean_amazon_product_data()
src/{modules β†’ data_prep}/data_prep.py RENAMED
File without changes
src/fireworks/__init__.py ADDED
File without changes
src/{modules β†’ fireworks}/inference.py RENAMED
File without changes
src/search/__init__.py ADDED
File without changes
src/{modules β†’ search}/bm25_lexical_search.py RENAMED
@@ -1,15 +1,11 @@
1
  import bm25s
2
  from typing import List, Dict
3
  from pathlib import Path
4
- from src.modules.data_prep import load_bm25_index, load_clean_amazon_product_data
5
 
6
  _FILE_PATH = Path(__file__).parents[2]
7
 
8
 
9
- _BM25_INDEX = load_bm25_index()
10
- _PRODUCTS_DF = load_clean_amazon_product_data()
11
-
12
-
13
  def search_bm25(query: str, top_k: int = 5) -> List[Dict[str, any]]:
14
  """
15
  Search products using BM25 lexical search (keyword matching).
@@ -25,14 +21,14 @@ def search_bm25(query: str, top_k: int = 5) -> List[Dict[str, any]]:
25
  List of dictionaries containing product information and scores
26
  """
27
  query_tokens = bm25s.tokenize(query, stopwords="en")
28
- results, scores = _BM25_INDEX.retrieve(query_tokens, k=top_k)
29
 
30
  return [
31
  {
32
- "product_name": _PRODUCTS_DF.iloc[idx]["Product Name"],
33
- "description": _PRODUCTS_DF.iloc[idx]["Description"],
34
- "main_category": _PRODUCTS_DF.iloc[idx]["MainCategory"],
35
- "secondary_category": _PRODUCTS_DF.iloc[idx]["SecondaryCategory"],
36
  "score": float(score),
37
  }
38
  for idx, score in zip(results[0], scores[0])
 
1
  import bm25s
2
  from typing import List, Dict
3
  from pathlib import Path
4
+ from src.data_prep.constants import BM25_INDEX, PRODUCTS_DF
5
 
6
  _FILE_PATH = Path(__file__).parents[2]
7
 
8
 
 
 
 
 
9
  def search_bm25(query: str, top_k: int = 5) -> List[Dict[str, any]]:
10
  """
11
  Search products using BM25 lexical search (keyword matching).
 
21
  List of dictionaries containing product information and scores
22
  """
23
  query_tokens = bm25s.tokenize(query, stopwords="en")
24
+ results, scores = BM25_INDEX.retrieve(query_tokens, k=top_k)
25
 
26
  return [
27
  {
28
+ "product_name": PRODUCTS_DF.iloc[idx]["Product Name"],
29
+ "description": PRODUCTS_DF.iloc[idx]["Description"],
30
+ "main_category": PRODUCTS_DF.iloc[idx]["MainCategory"],
31
+ "secondary_category": PRODUCTS_DF.iloc[idx]["SecondaryCategory"],
32
  "score": float(score),
33
  }
34
  for idx, score in zip(results[0], scores[0])