| | """
|
| | HAIM Test Suite β Binary HDV Tests
|
| | ===================================
|
| | Tests for the core BinaryHDV operations (Phase 3.0).
|
| | Validates mathematical properties of VSA operations.
|
| | """
|
| |
|
| | import numpy as np
|
| | import pytest
|
| |
|
| | from mnemocore.core.binary_hdv import (
|
| | BinaryHDV,
|
| | TextEncoder,
|
| | batch_hamming_distance,
|
| | majority_bundle,
|
| | top_k_nearest,
|
| | )
|
| |
|
| |
|
| |
|
| | D = 1024
|
| |
|
| |
|
| | class TestBinaryHDVConstruction:
|
| | def test_random_creates_valid_vector(self):
|
| | v = BinaryHDV.random(D)
|
| | assert v.dimension == D
|
| | assert v.data.shape == (D // 8,)
|
| | assert v.data.dtype == np.uint8
|
| |
|
| | def test_zeros(self):
|
| | v = BinaryHDV.zeros(D)
|
| | assert np.all(v.data == 0)
|
| |
|
| | def test_ones(self):
|
| | v = BinaryHDV.ones(D)
|
| | assert np.all(v.data == 0xFF)
|
| |
|
| | def test_from_seed_deterministic(self):
|
| | v1 = BinaryHDV.from_seed("hello", D)
|
| | v2 = BinaryHDV.from_seed("hello", D)
|
| | assert v1 == v2
|
| |
|
| | def test_different_seeds_different_vectors(self):
|
| | v1 = BinaryHDV.from_seed("hello", D)
|
| | v2 = BinaryHDV.from_seed("world", D)
|
| | assert v1 != v2
|
| |
|
| | def test_dimension_must_be_multiple_of_8(self):
|
| | with pytest.raises(AssertionError):
|
| | BinaryHDV.random(100)
|
| |
|
| | def test_serialization_roundtrip(self):
|
| | v = BinaryHDV.random(D)
|
| | raw = v.to_bytes()
|
| | assert len(raw) == D // 8
|
| | v2 = BinaryHDV.from_bytes(raw, D)
|
| | assert v == v2
|
| |
|
| |
|
| | class TestXORBinding:
|
| | def test_self_inverse(self):
|
| | """a β a = 0 (zero vector)."""
|
| | a = BinaryHDV.random(D)
|
| | result = a.xor_bind(a)
|
| | assert result == BinaryHDV.zeros(D)
|
| |
|
| | def test_commutative(self):
|
| | """a β b = b β a."""
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | assert a.xor_bind(b) == b.xor_bind(a)
|
| |
|
| | def test_associative(self):
|
| | """(a β b) β c = a β (b β c)."""
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | c = BinaryHDV.random(D)
|
| | lhs = a.xor_bind(b).xor_bind(c)
|
| | rhs = a.xor_bind(b.xor_bind(c))
|
| | assert lhs == rhs
|
| |
|
| | def test_xor_with_zeros_is_identity(self):
|
| | """a β 0 = a."""
|
| | a = BinaryHDV.random(D)
|
| | z = BinaryHDV.zeros(D)
|
| | assert a.xor_bind(z) == a
|
| |
|
| | def test_unbinding(self):
|
| | """If c = a β b, then a = c β b (self-inverse property enables unbinding)."""
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | c = a.xor_bind(b)
|
| | recovered_a = c.xor_bind(b)
|
| | assert recovered_a == a
|
| |
|
| | def test_binding_preserves_distance(self):
|
| | """hamming(aβc, bβc) = hamming(a, b)."""
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | c = BinaryHDV.random(D)
|
| | dist_ab = a.hamming_distance(b)
|
| | dist_ac_bc = a.xor_bind(c).hamming_distance(b.xor_bind(c))
|
| | assert dist_ab == dist_ac_bc
|
| |
|
| |
|
| | class TestHammingDistance:
|
| | def test_self_distance_is_zero(self):
|
| | a = BinaryHDV.random(D)
|
| | assert a.hamming_distance(a) == 0
|
| |
|
| | def test_inverse_is_max_distance(self):
|
| | """hamming(a, ~a) = dimension."""
|
| | a = BinaryHDV.random(D)
|
| | assert a.hamming_distance(a.invert()) == D
|
| |
|
| | def test_symmetry(self):
|
| | """hamming(a, b) = hamming(b, a)."""
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | assert a.hamming_distance(b) == b.hamming_distance(a)
|
| |
|
| | def test_triangle_inequality(self):
|
| | """hamming(a, c) <= hamming(a, b) + hamming(b, c)."""
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | c = BinaryHDV.random(D)
|
| | assert a.hamming_distance(c) <= a.hamming_distance(b) + b.hamming_distance(c)
|
| |
|
| | def test_random_vectors_near_half_dimension(self):
|
| | """Random vectors should have Hamming distance β D/2."""
|
| | np.random.seed(42)
|
| | distances = []
|
| | for _ in range(50):
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | distances.append(a.hamming_distance(b))
|
| | mean_dist = np.mean(distances)
|
| |
|
| | assert abs(mean_dist - D / 2) < D * 0.05
|
| |
|
| | def test_similarity_score_range(self):
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | sim = a.similarity(b)
|
| | assert 0.0 <= sim <= 1.0
|
| |
|
| | def test_normalized_distance_range(self):
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | nd = a.normalized_distance(b)
|
| | assert 0.0 <= nd <= 1.0
|
| |
|
| |
|
| | class TestPermutation:
|
| | def test_permute_zero_is_identity(self):
|
| | a = BinaryHDV.random(D)
|
| | assert a.permute(0) == a
|
| |
|
| | def test_permute_full_cycle(self):
|
| | """Permuting by D should return the original vector."""
|
| | a = BinaryHDV.random(D)
|
| | assert a.permute(D) == a
|
| |
|
| | def test_permute_produces_different_vector(self):
|
| | """Non-zero permutation should produce a (very likely) different vector."""
|
| | a = BinaryHDV.random(D)
|
| | b = a.permute(1)
|
| | assert a != b
|
| |
|
| | def test_permute_is_invertible(self):
|
| | """permute(k) followed by permute(-k) recovers original."""
|
| | a = BinaryHDV.random(D)
|
| | b = a.permute(7).permute(-7)
|
| | assert a == b
|
| |
|
| |
|
| | class TestMajorityBundle:
|
| | def test_single_vector_bundle(self):
|
| | """Bundling a single vector returns that vector."""
|
| | a = BinaryHDV.random(D)
|
| | result = majority_bundle([a])
|
| | assert result == a
|
| |
|
| | def test_bundled_vector_similar_to_inputs(self):
|
| | """Bundle of {a, b, c} should be more similar to each input than random."""
|
| | np.random.seed(42)
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | c = BinaryHDV.random(D)
|
| | bundled = majority_bundle([a, b, c])
|
| |
|
| |
|
| | random_v = BinaryHDV.random(D)
|
| | for v in [a, b, c]:
|
| | sim_to_bundle = bundled.similarity(v)
|
| | sim_to_random = bundled.similarity(random_v)
|
| | assert sim_to_bundle > sim_to_random, (
|
| | f"Bundle should be more similar to its inputs than to random vectors. "
|
| | f"sim_to_bundle={sim_to_bundle:.3f}, sim_to_random={sim_to_random:.3f}"
|
| | )
|
| |
|
| | def test_bundle_is_approximate(self):
|
| | """Bundle is not exact β it's a lossy superposition."""
|
| | a = BinaryHDV.random(D)
|
| | b = BinaryHDV.random(D)
|
| | bundled = majority_bundle([a, b])
|
| |
|
| | assert bundled != a
|
| | assert bundled != b
|
| | assert bundled.similarity(a) > 0.5
|
| | assert bundled.similarity(b) > 0.5
|
| |
|
| | def test_empty_bundle_raises(self):
|
| | with pytest.raises(AssertionError):
|
| | majority_bundle([])
|
| |
|
| |
|
| | class TestBatchOperations:
|
| | def test_batch_hamming_distance(self):
|
| | """Batch Hamming should match individual computations."""
|
| | np.random.seed(42)
|
| | query = BinaryHDV.random(D)
|
| | n = 100
|
| | db = np.stack(
|
| | [BinaryHDV.random(D).data for _ in range(n)], axis=0
|
| | )
|
| |
|
| | batch_distances = batch_hamming_distance(query, db)
|
| | assert batch_distances.shape == (n,)
|
| |
|
| |
|
| | for i in range(n):
|
| | individual = query.hamming_distance(
|
| | BinaryHDV(data=db[i], dimension=D)
|
| | )
|
| | assert batch_distances[i] == individual
|
| |
|
| | def test_top_k_nearest(self):
|
| | """Top-K should return the K closest vectors."""
|
| | np.random.seed(42)
|
| | query = BinaryHDV.random(D)
|
| | n = 50
|
| | db_vectors = [BinaryHDV.random(D) for _ in range(n)]
|
| | db = np.stack([v.data for v in db_vectors], axis=0)
|
| |
|
| |
|
| | close_vector = query.data.copy()
|
| |
|
| | close_vector[0] ^= 0x03
|
| | db[0] = close_vector
|
| |
|
| | results = top_k_nearest(query, db, k=5)
|
| | assert len(results) == 5
|
| |
|
| | assert results[0][0] == 0
|
| |
|
| | for i in range(len(results) - 1):
|
| | assert results[i][1] <= results[i + 1][1]
|
| |
|
| |
|
| | class TestTextEncoder:
|
| | def test_encode_deterministic(self):
|
| | enc = TextEncoder(dimension=D)
|
| | v1 = enc.encode("hello world")
|
| | v2 = enc.encode("hello world")
|
| | assert v1 == v2
|
| |
|
| | def test_different_texts_different_vectors(self):
|
| | enc = TextEncoder(dimension=D)
|
| | v1 = enc.encode("hello world")
|
| | v2 = enc.encode("goodbye moon")
|
| | assert v1 != v2
|
| |
|
| | def test_similar_texts_more_similar(self):
|
| | """Texts sharing words should be more similar than completely different texts."""
|
| | np.random.seed(42)
|
| | enc = TextEncoder(dimension=D)
|
| | v_base = enc.encode("the quick brown fox")
|
| | v_similar = enc.encode("the quick brown dog")
|
| | v_different = enc.encode("quantum computing research paper")
|
| |
|
| | sim_similar = v_base.similarity(v_similar)
|
| | sim_different = v_base.similarity(v_different)
|
| | assert sim_similar > sim_different, (
|
| | f"Similar text should have higher similarity. "
|
| | f"sim_similar={sim_similar:.3f}, sim_different={sim_different:.3f}"
|
| | )
|
| |
|
| | def test_encode_with_context(self):
|
| | enc = TextEncoder(dimension=D)
|
| | context = BinaryHDV.random(D)
|
| | v = enc.encode_with_context("hello world", context)
|
| |
|
| | v_no_ctx = enc.encode("hello world")
|
| | assert v != v_no_ctx
|
| |
|
| | recovered = v.xor_bind(context)
|
| | assert recovered == v_no_ctx
|
| |
|
| | def test_empty_text(self):
|
| | """Empty text should still produce a valid vector."""
|
| | enc = TextEncoder(dimension=D)
|
| | v = enc.encode("")
|
| | assert v.dimension == D
|
| | assert v.data.shape == (D // 8,)
|
| |
|
| | def test_token_caching(self):
|
| | enc = TextEncoder(dimension=D)
|
| | enc.encode("hello world")
|
| | assert "hello" in enc._token_cache
|
| | assert "world" in enc._token_cache
|
| |
|
| |
|
| | class TestFullDimension:
|
| | """Tests at full 16,384 dimensions to verify scaling."""
|
| |
|
| | def test_full_dim_roundtrip(self):
|
| | v = BinaryHDV.random(16384)
|
| | assert v.data.shape == (2048,)
|
| | raw = v.to_bytes()
|
| | assert len(raw) == 2048
|
| | v2 = BinaryHDV.from_bytes(raw, 16384)
|
| | assert v == v2
|
| |
|
| | def test_full_dim_hamming(self):
|
| | a = BinaryHDV.random(16384)
|
| | b = BinaryHDV.random(16384)
|
| | dist = a.hamming_distance(b)
|
| |
|
| | assert 6000 < dist < 10000
|
| |
|
| | def test_full_dim_batch_search(self):
|
| | np.random.seed(42)
|
| | query = BinaryHDV.random(16384)
|
| | n = 1000
|
| | db = np.stack(
|
| | [BinaryHDV.random(16384).data for _ in range(n)], axis=0
|
| | )
|
| | results = top_k_nearest(query, db, k=10)
|
| | assert len(results) == 10
|
| |
|
| | for i in range(len(results) - 1):
|
| | assert results[i][1] <= results[i + 1][1]
|
| |
|