davidtran999 commited on
Commit
45dea2f
·
verified ·
1 Parent(s): f759130

Upload backend/core/tests/test_embeddings.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. backend/core/tests/test_embeddings.py +146 -0
backend/core/tests/test_embeddings.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unit tests for embeddings functionality.
3
+ """
4
+ import unittest
5
+ import numpy as np
6
+ from django.test import TestCase
7
+
8
+ from hue_portal.core.embeddings import (
9
+ get_embedding_model,
10
+ generate_embedding,
11
+ generate_embeddings_batch,
12
+ cosine_similarity,
13
+ get_embedding_dimension
14
+ )
15
+ from hue_portal.core.embedding_utils import (
16
+ save_embedding,
17
+ load_embedding,
18
+ has_embedding
19
+ )
20
+
21
+
22
+ class EmbeddingsTestCase(TestCase):
23
+ """Test embedding generation and utilities."""
24
+
25
+ def test_get_embedding_model(self):
26
+ """Test loading embedding model."""
27
+ model = get_embedding_model()
28
+ # Model might not be available in test environment
29
+ # Just check that function doesn't crash
30
+ self.assertIsNotNone(model or True)
31
+
32
+ def test_generate_embedding(self):
33
+ """Test generating embedding for a single text."""
34
+ text = "Thủ tục đăng ký cư trú"
35
+ embedding = generate_embedding(text)
36
+
37
+ if embedding is not None:
38
+ self.assertIsInstance(embedding, np.ndarray)
39
+ self.assertGreater(len(embedding), 0)
40
+
41
+ def test_generate_embeddings_batch(self):
42
+ """Test generating embeddings for multiple texts."""
43
+ texts = [
44
+ "Thủ tục đăng ký cư trú",
45
+ "Mức phạt vượt đèn đỏ",
46
+ "Địa chỉ công an phường"
47
+ ]
48
+ embeddings = generate_embeddings_batch(texts, batch_size=2)
49
+
50
+ if embeddings and embeddings[0] is not None:
51
+ self.assertEqual(len(embeddings), len(texts))
52
+ self.assertIsInstance(embeddings[0], np.ndarray)
53
+
54
+ def test_cosine_similarity(self):
55
+ """Test cosine similarity calculation."""
56
+ vec1 = np.array([1.0, 0.0, 0.0])
57
+ vec2 = np.array([1.0, 0.0, 0.0])
58
+
59
+ similarity = cosine_similarity(vec1, vec2)
60
+ self.assertAlmostEqual(similarity, 1.0, places=5)
61
+
62
+ vec3 = np.array([0.0, 1.0, 0.0])
63
+ similarity2 = cosine_similarity(vec1, vec3)
64
+ self.assertAlmostEqual(similarity2, 0.0, places=5)
65
+
66
+ def test_cosine_similarity_orthogonal(self):
67
+ """Test cosine similarity for orthogonal vectors."""
68
+ vec1 = np.array([1.0, 0.0])
69
+ vec2 = np.array([0.0, 1.0])
70
+
71
+ similarity = cosine_similarity(vec1, vec2)
72
+ self.assertAlmostEqual(similarity, 0.0, places=5)
73
+
74
+ def test_get_embedding_dimension(self):
75
+ """Test getting embedding dimension."""
76
+ dim = get_embedding_dimension()
77
+ # Dimension might be 0 if model not available
78
+ self.assertIsInstance(dim, int)
79
+ self.assertGreaterEqual(dim, 0)
80
+
81
+ def test_similar_texts_have_similar_embeddings(self):
82
+ """Test that similar texts produce similar embeddings."""
83
+ text1 = "Thủ tục đăng ký cư trú"
84
+ text2 = "Đăng ký thủ tục cư trú"
85
+ text3 = "Mức phạt giao thông"
86
+
87
+ emb1 = generate_embedding(text1)
88
+ emb2 = generate_embedding(text2)
89
+ emb3 = generate_embedding(text3)
90
+
91
+ if emb1 is not None and emb2 is not None and emb3 is not None:
92
+ sim_similar = cosine_similarity(emb1, emb2)
93
+ sim_different = cosine_similarity(emb1, emb3)
94
+
95
+ # Similar texts should have higher similarity
96
+ self.assertGreater(sim_similar, sim_different)
97
+
98
+
99
+ class EmbeddingUtilsTestCase(TestCase):
100
+ """Test embedding utility functions."""
101
+
102
+ def test_save_and_load_embedding(self):
103
+ """Test saving and loading embeddings."""
104
+ from hue_portal.core.models import Procedure
105
+
106
+ # Create a test procedure
107
+ procedure = Procedure.objects.create(
108
+ title="Test Procedure",
109
+ domain="Test"
110
+ )
111
+
112
+ # Create a dummy embedding
113
+ dummy_embedding = np.random.rand(384).astype(np.float32)
114
+
115
+ # Save embedding
116
+ success = save_embedding(procedure, dummy_embedding)
117
+ self.assertTrue(success)
118
+
119
+ # Reload from database
120
+ procedure.refresh_from_db()
121
+
122
+ # Load embedding
123
+ loaded_embedding = load_embedding(procedure)
124
+ self.assertIsNotNone(loaded_embedding)
125
+ self.assertTrue(np.allclose(dummy_embedding, loaded_embedding))
126
+
127
+ def test_has_embedding(self):
128
+ """Test checking if instance has embedding."""
129
+ from hue_portal.core.models import Procedure
130
+
131
+ procedure = Procedure.objects.create(
132
+ title="Test Procedure",
133
+ domain="Test"
134
+ )
135
+
136
+ # Initially no embedding
137
+ self.assertFalse(has_embedding(procedure))
138
+
139
+ # Add embedding
140
+ dummy_embedding = np.random.rand(384).astype(np.float32)
141
+ save_embedding(procedure, dummy_embedding)
142
+
143
+ # Refresh and check
144
+ procedure.refresh_from_db()
145
+ self.assertTrue(has_embedding(procedure))
146
+