bharatverse11's picture
Upload reranker.py
3e9154b verified
raw
history blame
1.42 kB
"""
FILE: 06_reranker.py
PURPOSE:
- Improve ranking accuracy by comparing query + result pairs using a CrossEncoder
- Works on top FAISS candidates and reorders them based on semantic relevance
REQUIREMENTS:
pip install sentence-transformers
"""
from sentence_transformers import CrossEncoder
# Best model for semantic relevance matching
# Best model for semantic relevance matching (Upgraded to L-12 for maximum accuracy)
RERANK_MODEL = "cross-encoder/ms-marco-MiniLM-L-12-v2"
class Reranker:
def __init__(self):
print(f"πŸ€– Loading reranking model: {RERANK_MODEL}")
self.model = CrossEncoder(RERANK_MODEL)
def rerank(self, query, candidates):
"""
candidates = list of dict objects:
[
{"name": "", "domain": "", "category": "", "region": "", "text": "...", "score": number}
]
"""
# Clean text for better model understanding (replace separators with commas)
pairs = []
for c in candidates:
clean_text = c["text"].replace("β€’", ", ").replace(" ", " ").strip()
pairs.append((query, clean_text))
scores = self.model.predict(pairs)
# attach and sort
for i, s in enumerate(scores):
candidates[i]["rerank_score"] = float(s)
return sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)