davidtran999 commited on
Commit
8883a13
·
verified ·
1 Parent(s): 385215c

Upload backend/hue_portal/core/tests/test_pure_semantic_search.py with huggingface_hub

Browse files
backend/hue_portal/core/tests/test_pure_semantic_search.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unit tests for Pure Semantic Search.
3
+ """
4
+ import unittest
5
+ from unittest.mock import Mock, patch, MagicMock
6
+ from django.test import TestCase
7
+ from django.db.models import QuerySet
8
+ from hue_portal.core.pure_semantic_search import (
9
+ get_vector_scores,
10
+ parallel_vector_search,
11
+ pure_semantic_search,
12
+ calculate_exact_match_boost
13
+ )
14
+
15
+
16
+ class TestPureSemanticSearch(unittest.TestCase):
17
+ """Test Pure Semantic Search functions."""
18
+
19
+ def setUp(self):
20
+ """Set up test fixtures."""
21
+ self.mock_queryset = Mock(spec=QuerySet)
22
+ self.mock_queryset.__iter__ = Mock(return_value=iter([]))
23
+ self.mock_queryset.__len__ = Mock(return_value=0)
24
+
25
+ @patch('hue_portal.core.pure_semantic_search.get_embedding_model')
26
+ @patch('hue_portal.core.pure_semantic_search.generate_embedding')
27
+ @patch('hue_portal.core.pure_semantic_search.load_embedding')
28
+ @patch('hue_portal.core.pure_semantic_search.cosine_similarity')
29
+ def test_get_vector_scores(self, mock_cosine, mock_load, mock_gen, mock_model):
30
+ """Test get_vector_scores function."""
31
+ # Mock embedding model
32
+ mock_model.return_value = Mock()
33
+ mock_gen.return_value = [0.1] * 1024 # BGE-M3 dimension
34
+ mock_cosine.return_value = 0.8
35
+
36
+ # Mock objects with embeddings
37
+ obj1 = Mock()
38
+ obj2 = Mock()
39
+ mock_load.side_effect = [[0.1] * 1024, [0.1] * 1024]
40
+
41
+ self.mock_queryset.__iter__ = Mock(return_value=iter([obj1, obj2]))
42
+ self.mock_queryset.__len__ = Mock(return_value=2)
43
+
44
+ results = get_vector_scores(self.mock_queryset, "test query", top_k=10)
45
+
46
+ self.assertIsInstance(results, list)
47
+ # Should return results with scores
48
+ if results:
49
+ self.assertIsInstance(results[0], tuple)
50
+ self.assertEqual(len(results[0]), 2)
51
+
52
+ def test_calculate_exact_match_boost(self):
53
+ """Test exact match boost calculation."""
54
+ obj = Mock()
55
+ obj.title = "Quy định điều 12"
56
+ obj.name = "Điều 12"
57
+
58
+ # Test phrase match
59
+ boost = calculate_exact_match_boost(obj, "điều 12", ["title", "name"])
60
+ self.assertGreater(boost, 0.0)
61
+ self.assertLessEqual(boost, 1.0)
62
+
63
+ # Test no match
64
+ boost2 = calculate_exact_match_boost(obj, "điều 99", ["title", "name"])
65
+ self.assertLess(boost2, boost)
66
+
67
+ @patch('hue_portal.core.pure_semantic_search.get_vector_scores')
68
+ def test_parallel_vector_search_single_query(self, mock_get_scores):
69
+ """Test parallel_vector_search with single query."""
70
+ obj1 = Mock()
71
+ obj2 = Mock()
72
+ mock_get_scores.return_value = [(obj1, 0.9), (obj2, 0.8)]
73
+
74
+ self.mock_queryset.__iter__ = Mock(return_value=iter([obj1, obj2]))
75
+
76
+ results = parallel_vector_search(
77
+ ["test query"],
78
+ self.mock_queryset,
79
+ top_k_per_query=5,
80
+ final_top_k=2
81
+ )
82
+
83
+ self.assertIsInstance(results, list)
84
+ # Should use single query search path
85
+
86
+ @patch('hue_portal.core.pure_semantic_search.get_vector_scores')
87
+ def test_parallel_vector_search_multiple_queries(self, mock_get_scores):
88
+ """Test parallel_vector_search with multiple queries."""
89
+ obj1 = Mock()
90
+ obj2 = Mock()
91
+ obj3 = Mock()
92
+
93
+ # Different results for different queries
94
+ mock_get_scores.side_effect = [
95
+ [(obj1, 0.9), (obj2, 0.8)], # Query 1
96
+ [(obj2, 0.85), (obj3, 0.75)], # Query 2
97
+ ]
98
+
99
+ self.mock_queryset.__iter__ = Mock(return_value=iter([obj1, obj2, obj3]))
100
+
101
+ results = parallel_vector_search(
102
+ ["query 1", "query 2"],
103
+ self.mock_queryset,
104
+ top_k_per_query=5,
105
+ final_top_k=3
106
+ )
107
+
108
+ self.assertIsInstance(results, list)
109
+ # Should merge results from multiple queries
110
+ # obj2 should appear with max score (0.85)
111
+
112
+ @patch('hue_portal.core.pure_semantic_search.parallel_vector_search')
113
+ def test_pure_semantic_search_single(self, mock_parallel):
114
+ """Test pure_semantic_search with single query."""
115
+ obj1 = Mock()
116
+ obj2 = Mock()
117
+ mock_parallel.return_value = [(obj1, 0.9), (obj2, 0.8)]
118
+
119
+ results = pure_semantic_search(
120
+ ["test query"],
121
+ self.mock_queryset,
122
+ top_k=2
123
+ )
124
+
125
+ self.assertIsInstance(results, list)
126
+ # Should return objects only (without scores)
127
+ self.assertEqual(len(results), 2)
128
+ self.assertEqual(results[0], obj1)
129
+ self.assertEqual(results[1], obj2)
130
+
131
+ @patch('hue_portal.core.pure_semantic_search.parallel_vector_search')
132
+ def test_pure_semantic_search_multiple(self, mock_parallel):
133
+ """Test pure_semantic_search with multiple queries."""
134
+ obj1 = Mock()
135
+ obj2 = Mock()
136
+ mock_parallel.return_value = [(obj1, 0.9), (obj2, 0.8)]
137
+
138
+ results = pure_semantic_search(
139
+ ["query 1", "query 2", "query 3"],
140
+ self.mock_queryset,
141
+ top_k=2
142
+ )
143
+
144
+ self.assertIsInstance(results, list)
145
+ # Should use parallel_vector_search
146
+ mock_parallel.assert_called_once()
147
+
148
+ def test_pure_semantic_search_empty_queries(self):
149
+ """Test pure_semantic_search with empty queries."""
150
+ results = pure_semantic_search([], self.mock_queryset, top_k=10)
151
+ self.assertEqual(results, [])
152
+
153
+
154
+ if __name__ == "__main__":
155
+ unittest.main()
156
+