Spaces:
Sleeping
Sleeping
| from typing import List | |
| import numpy as np | |
| import streamlit as st | |
| from src.models import Chapter | |
| class Reranker: | |
| def rerank(self, chapters: List[Chapter]) -> List[Chapter]: | |
| # TODO | |
| return chapters | |
| # Rerankers applicable to SemanticRetriever results | |
| def sort_chapters(chapters, scores): | |
| reranked_chapters = sorted(zip(chapters, scores), key=lambda x: x[1], reverse=True) | |
| reranked_chapters = [x[0] for x in reranked_chapters] | |
| return reranked_chapters | |
| class CombinedScoreAndNumberReranker(Reranker): | |
| def __init__(self, num_verse_weight=0.3, semantic_sim_weight=0.7): | |
| self.num_verse_weight = num_verse_weight | |
| self.semantic_sim_weight = semantic_sim_weight | |
| def rerank(self, chapters: List[Chapter]) -> List[Chapter]: | |
| num_verse_score = compute_num_verse_scores(chapters) | |
| max_sem_sim_score = compute_sem_sim_scores(chapters) | |
| final_scores = ( | |
| self.num_verse_weight * num_verse_score | |
| + self.semantic_sim_weight * max_sem_sim_score | |
| ) | |
| return sort_chapters(chapters, final_scores) | |
| class SemanticSimScoreReranker(Reranker): | |
| def rerank(self, chapters: List[Chapter]) -> List[Chapter]: | |
| sem_sim_scores = np.array( | |
| [chapter.highlight_verses_df["score"].max() for chapter in chapters] | |
| ) | |
| return sort_chapters(chapters, sem_sim_scores) | |
| class MaxVerseReranker(Reranker): | |
| def rerank(self, chapters: List[Chapter]) -> List[Chapter]: | |
| num_verses = [chapter.get_num_unique_highlight_verse() for chapter in chapters] | |
| return sort_chapters(chapters, num_verses) | |
| def compute_num_verse_scores(chapters): | |
| num_verses = np.array( | |
| [chapter.get_num_unique_highlight_verse() for chapter in chapters] | |
| ) | |
| max_verses = max(num_verses) | |
| num_verse_scores = num_verses / max_verses | |
| return num_verse_scores | |
| def compute_sem_sim_scores(chapters): | |
| sem_sim_scores = np.array( | |
| [chapter.highlight_verses_df["score"].max() for chapter in chapters] | |
| ) | |
| return sem_sim_scores | |