Spaces:
Sleeping
Sleeping
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from config.rag_config import RAGConfig | |
| class Retriever: | |
| def __init__(self, embeddings, texts, config: RAGConfig): | |
| self.embeddings = embeddings | |
| self.texts = texts | |
| self.top_k = config.top_k | |
| self.threshold = config.similarity_threshold | |
| def retrieve(self, query_embedding): | |
| scores = cosine_similarity([query_embedding], self.embeddings)[0] | |
| # 阈值过滤 | |
| filtered = [(self.texts[i], float(scores[i])) | |
| for i in np.argsort(scores)[::-1] | |
| if scores[i] >= self.threshold] | |
| results = filtered[:self.top_k] | |
| if not results: | |
| best_idx = int(np.argmax(scores)) | |
| results = [(self.texts[best_idx], float(scores[best_idx]))] | |
| return results | |