davidtran999 commited on
Commit
6a63e0f
·
verified ·
1 Parent(s): f45e06e

Upload backend/core/config/hybrid_search_config.py with huggingface_hub

Browse files
backend/core/config/hybrid_search_config.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration for hybrid search weights and thresholds.
3
+ """
4
+ from dataclasses import dataclass
5
+ from typing import Dict
6
+
7
+
8
+ @dataclass
9
+ class HybridSearchConfig:
10
+ """Configuration for hybrid search."""
11
+ bm25_weight: float = 0.4
12
+ vector_weight: float = 0.6
13
+ min_hybrid_score: float = 0.1
14
+ min_bm25_score: float = 0.0
15
+ min_vector_score: float = 0.1
16
+ top_k_multiplier: int = 2 # Get more results before filtering
17
+
18
+
19
+ # Default configuration
20
+ DEFAULT_CONFIG = HybridSearchConfig()
21
+
22
+ # Per-content-type configurations
23
+ CONTENT_TYPE_CONFIGS: Dict[str, HybridSearchConfig] = {
24
+ "procedure": HybridSearchConfig(
25
+ bm25_weight=0.5,
26
+ vector_weight=0.5,
27
+ min_hybrid_score=0.15
28
+ ),
29
+ "fine": HybridSearchConfig(
30
+ bm25_weight=0.7,
31
+ vector_weight=0.3,
32
+ min_hybrid_score=0.08
33
+ ),
34
+ "office": HybridSearchConfig(
35
+ bm25_weight=0.3,
36
+ vector_weight=0.7,
37
+ min_hybrid_score=0.12
38
+ ),
39
+ "advisory": HybridSearchConfig(
40
+ bm25_weight=0.4,
41
+ vector_weight=0.6,
42
+ min_hybrid_score=0.1
43
+ ),
44
+ "legal": HybridSearchConfig(
45
+ bm25_weight=0.6,
46
+ vector_weight=0.4,
47
+ min_hybrid_score=0.05 # Lower threshold to find more legal document matches
48
+ ),
49
+ }
50
+
51
+
52
+ def get_config(content_type: str = None) -> HybridSearchConfig:
53
+ """
54
+ Get hybrid search configuration for content type.
55
+
56
+ Args:
57
+ content_type: Type of content ('procedure', 'fine', 'office', 'advisory').
58
+
59
+ Returns:
60
+ HybridSearchConfig instance.
61
+ """
62
+ if content_type and content_type in CONTENT_TYPE_CONFIGS:
63
+ return CONTENT_TYPE_CONFIGS[content_type]
64
+ return DEFAULT_CONFIG
65
+