|
|
""" |
|
|
Unit tests for embeddings functionality. |
|
|
""" |
|
|
import unittest |
|
|
import numpy as np |
|
|
from django.test import TestCase |
|
|
|
|
|
from hue_portal.core.embeddings import ( |
|
|
get_embedding_model, |
|
|
generate_embedding, |
|
|
generate_embeddings_batch, |
|
|
cosine_similarity, |
|
|
get_embedding_dimension |
|
|
) |
|
|
from hue_portal.core.embedding_utils import ( |
|
|
save_embedding, |
|
|
load_embedding, |
|
|
has_embedding |
|
|
) |
|
|
|
|
|
|
|
|
class EmbeddingsTestCase(TestCase): |
|
|
"""Test embedding generation and utilities.""" |
|
|
|
|
|
def test_get_embedding_model(self): |
|
|
"""Test loading embedding model.""" |
|
|
model = get_embedding_model() |
|
|
|
|
|
|
|
|
self.assertIsNotNone(model or True) |
|
|
|
|
|
def test_generate_embedding(self): |
|
|
"""Test generating embedding for a single text.""" |
|
|
text = "Thủ tục đăng ký cư trú" |
|
|
embedding = generate_embedding(text) |
|
|
|
|
|
if embedding is not None: |
|
|
self.assertIsInstance(embedding, np.ndarray) |
|
|
self.assertGreater(len(embedding), 0) |
|
|
|
|
|
def test_generate_embeddings_batch(self): |
|
|
"""Test generating embeddings for multiple texts.""" |
|
|
texts = [ |
|
|
"Thủ tục đăng ký cư trú", |
|
|
"Mức phạt vượt đèn đỏ", |
|
|
"Địa chỉ công an phường" |
|
|
] |
|
|
embeddings = generate_embeddings_batch(texts, batch_size=2) |
|
|
|
|
|
if embeddings and embeddings[0] is not None: |
|
|
self.assertEqual(len(embeddings), len(texts)) |
|
|
self.assertIsInstance(embeddings[0], np.ndarray) |
|
|
|
|
|
def test_cosine_similarity(self): |
|
|
"""Test cosine similarity calculation.""" |
|
|
vec1 = np.array([1.0, 0.0, 0.0]) |
|
|
vec2 = np.array([1.0, 0.0, 0.0]) |
|
|
|
|
|
similarity = cosine_similarity(vec1, vec2) |
|
|
self.assertAlmostEqual(similarity, 1.0, places=5) |
|
|
|
|
|
vec3 = np.array([0.0, 1.0, 0.0]) |
|
|
similarity2 = cosine_similarity(vec1, vec3) |
|
|
self.assertAlmostEqual(similarity2, 0.0, places=5) |
|
|
|
|
|
def test_cosine_similarity_orthogonal(self): |
|
|
"""Test cosine similarity for orthogonal vectors.""" |
|
|
vec1 = np.array([1.0, 0.0]) |
|
|
vec2 = np.array([0.0, 1.0]) |
|
|
|
|
|
similarity = cosine_similarity(vec1, vec2) |
|
|
self.assertAlmostEqual(similarity, 0.0, places=5) |
|
|
|
|
|
def test_get_embedding_dimension(self): |
|
|
"""Test getting embedding dimension.""" |
|
|
dim = get_embedding_dimension() |
|
|
|
|
|
self.assertIsInstance(dim, int) |
|
|
self.assertGreaterEqual(dim, 0) |
|
|
|
|
|
def test_similar_texts_have_similar_embeddings(self): |
|
|
"""Test that similar texts produce similar embeddings.""" |
|
|
text1 = "Thủ tục đăng ký cư trú" |
|
|
text2 = "Đăng ký thủ tục cư trú" |
|
|
text3 = "Mức phạt giao thông" |
|
|
|
|
|
emb1 = generate_embedding(text1) |
|
|
emb2 = generate_embedding(text2) |
|
|
emb3 = generate_embedding(text3) |
|
|
|
|
|
if emb1 is not None and emb2 is not None and emb3 is not None: |
|
|
sim_similar = cosine_similarity(emb1, emb2) |
|
|
sim_different = cosine_similarity(emb1, emb3) |
|
|
|
|
|
|
|
|
self.assertGreater(sim_similar, sim_different) |
|
|
|
|
|
|
|
|
class EmbeddingUtilsTestCase(TestCase): |
|
|
"""Test embedding utility functions.""" |
|
|
|
|
|
def test_save_and_load_embedding(self): |
|
|
"""Test saving and loading embeddings.""" |
|
|
from hue_portal.core.models import Procedure |
|
|
|
|
|
|
|
|
procedure = Procedure.objects.create( |
|
|
title="Test Procedure", |
|
|
domain="Test" |
|
|
) |
|
|
|
|
|
|
|
|
dummy_embedding = np.random.rand(384).astype(np.float32) |
|
|
|
|
|
|
|
|
success = save_embedding(procedure, dummy_embedding) |
|
|
self.assertTrue(success) |
|
|
|
|
|
|
|
|
procedure.refresh_from_db() |
|
|
|
|
|
|
|
|
loaded_embedding = load_embedding(procedure) |
|
|
self.assertIsNotNone(loaded_embedding) |
|
|
self.assertTrue(np.allclose(dummy_embedding, loaded_embedding)) |
|
|
|
|
|
def test_has_embedding(self): |
|
|
"""Test checking if instance has embedding.""" |
|
|
from hue_portal.core.models import Procedure |
|
|
|
|
|
procedure = Procedure.objects.create( |
|
|
title="Test Procedure", |
|
|
domain="Test" |
|
|
) |
|
|
|
|
|
|
|
|
self.assertFalse(has_embedding(procedure)) |
|
|
|
|
|
|
|
|
dummy_embedding = np.random.rand(384).astype(np.float32) |
|
|
save_embedding(procedure, dummy_embedding) |
|
|
|
|
|
|
|
|
procedure.refresh_from_db() |
|
|
self.assertTrue(has_embedding(procedure)) |
|
|
|
|
|
|