Upload backend/hue_portal/core/hybrid_search.py with huggingface_hub
Browse files
backend/hue_portal/core/hybrid_search.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
Hybrid search combining BM25 and vector similarity.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
from typing import List, Tuple, Optional, Dict, Any
|
| 5 |
import numpy as np
|
|
@@ -15,6 +19,12 @@ from .embeddings import (
|
|
| 15 |
from .embedding_utils import load_embedding
|
| 16 |
from .search_ml import expand_query_with_synonyms
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Default weights for hybrid search
|
| 20 |
DEFAULT_BM25_WEIGHT = 0.4
|
|
@@ -163,6 +173,9 @@ def get_vector_scores(
|
|
| 163 |
"""
|
| 164 |
Get vector similarity scores for queryset.
|
| 165 |
|
|
|
|
|
|
|
|
|
|
| 166 |
Args:
|
| 167 |
queryset: Django QuerySet to search.
|
| 168 |
query: Search query string.
|
|
@@ -171,6 +184,11 @@ def get_vector_scores(
|
|
| 171 |
Returns:
|
| 172 |
List of (object, vector_score) tuples.
|
| 173 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
if not query:
|
| 175 |
return []
|
| 176 |
|
|
|
|
| 1 |
"""
|
| 2 |
Hybrid search combining BM25 and vector similarity.
|
| 3 |
+
|
| 4 |
+
NOTE: This module is being phased out in favor of pure semantic search.
|
| 5 |
+
Pure semantic search (100% vector) is recommended when using Query Rewrite Strategy + BGE-M3.
|
| 6 |
+
See pure_semantic_search.py for the new implementation.
|
| 7 |
"""
|
| 8 |
from typing import List, Tuple, Optional, Dict, Any
|
| 9 |
import numpy as np
|
|
|
|
| 19 |
from .embedding_utils import load_embedding
|
| 20 |
from .search_ml import expand_query_with_synonyms
|
| 21 |
|
| 22 |
+
# Import get_vector_scores from pure_semantic_search for backward compatibility
|
| 23 |
+
try:
|
| 24 |
+
from .pure_semantic_search import get_vector_scores as _get_vector_scores_from_pure
|
| 25 |
+
except ImportError:
|
| 26 |
+
_get_vector_scores_from_pure = None
|
| 27 |
+
|
| 28 |
|
| 29 |
# Default weights for hybrid search
|
| 30 |
DEFAULT_BM25_WEIGHT = 0.4
|
|
|
|
| 173 |
"""
|
| 174 |
Get vector similarity scores for queryset.
|
| 175 |
|
| 176 |
+
DEPRECATED: Use pure_semantic_search.get_vector_scores() instead.
|
| 177 |
+
This function is kept for backward compatibility.
|
| 178 |
+
|
| 179 |
Args:
|
| 180 |
queryset: Django QuerySet to search.
|
| 181 |
query: Search query string.
|
|
|
|
| 184 |
Returns:
|
| 185 |
List of (object, vector_score) tuples.
|
| 186 |
"""
|
| 187 |
+
# Try to use the new implementation from pure_semantic_search
|
| 188 |
+
if _get_vector_scores_from_pure:
|
| 189 |
+
return _get_vector_scores_from_pure(queryset, query, top_k)
|
| 190 |
+
|
| 191 |
+
# Fallback to original implementation
|
| 192 |
if not query:
|
| 193 |
return []
|
| 194 |
|