"""Unit tests for Pinecone Pydantic models.""" from __future__ import annotations import json import pytest from tools.pinecone_models import PineconeRecord, ProcessedStandardSet class TestEducationLevelsProcessing: """Test education_levels field validator.""" def test_simple_array(self): """Test simple array without comma-separated values.""" record = PineconeRecord( **{"_id": "test-id"}, content="Test content", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=["01", "02"], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=0, is_leaf=True, is_root=True, root_id="test-id", ancestor_ids=[], child_ids=[], sibling_count=0, ) assert record.education_levels == ["01", "02"] def test_comma_separated_strings(self): """Test array with comma-separated values.""" record = PineconeRecord( **{"_id": "test-id"}, content="Test content", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=["01,02", "02", "03"], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=0, is_leaf=True, is_root=True, root_id="test-id", ancestor_ids=[], child_ids=[], sibling_count=0, ) assert record.education_levels == ["01", "02", "03"] def test_high_school_range(self): """Test high school grade levels.""" record = PineconeRecord( **{"_id": "test-id"}, content="Test content", standard_set_id="set-1", standard_set_title="High School", subject="Math", education_levels=["09,10,11,12"], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=0, is_leaf=True, is_root=True, root_id="test-id", ancestor_ids=[], child_ids=[], sibling_count=0, ) assert record.education_levels == ["09", "10", "11", "12"] def test_empty_array(self): """Test empty array.""" record = PineconeRecord( **{"_id": "test-id"}, content="Test content", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=[], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=0, is_leaf=True, is_root=True, root_id="test-id", ancestor_ids=[], child_ids=[], sibling_count=0, ) assert record.education_levels == [] def test_whitespace_handling(self): """Test that whitespace is stripped.""" record = PineconeRecord( **{"_id": "test-id"}, content="Test content", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=["01 , 02", " 03 "], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=0, is_leaf=True, is_root=True, root_id="test-id", ancestor_ids=[], child_ids=[], sibling_count=0, ) assert record.education_levels == ["01", "02", "03"] class TestParentIdNullHandling: """Test that parent_id null is properly serialized.""" def test_root_node_parent_id_null(self): """Test root node has parent_id as null.""" record = PineconeRecord( **{"_id": "root-id"}, content="Root content", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=["01"], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=0, is_leaf=False, is_root=True, parent_id=None, root_id="root-id", ancestor_ids=[], child_ids=["child-1"], sibling_count=0, ) assert record.parent_id is None # Test JSON serialization preserves null json_str = record.model_dump_json() data = json.loads(json_str) assert data["parent_id"] is None def test_child_node_parent_id_set(self): """Test child node has parent_id set.""" record = PineconeRecord( **{"_id": "child-id"}, content="Child content", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=["01"], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=1, is_leaf=True, is_root=False, parent_id="parent-id", root_id="root-id", ancestor_ids=["root-id"], child_ids=[], sibling_count=0, ) assert record.parent_id == "parent-id" # Test JSON serialization json_str = record.model_dump_json() data = json.loads(json_str) assert data["parent_id"] == "parent-id" class TestOptionalFields: """Test optional fields can be omitted.""" def test_all_optional_fields_omitted(self): """Test record with all optional fields omitted.""" record = PineconeRecord( **{"_id": "test-id"}, content="Test content", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=["01"], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=0, is_leaf=True, is_root=True, root_id="test-id", ancestor_ids=[], child_ids=[], sibling_count=0, ) assert record.normalized_subject is None assert record.asn_identifier is None assert record.statement_notation is None assert record.statement_label is None assert record.publication_status is None def test_optional_fields_set(self): """Test record with optional fields set.""" record = PineconeRecord( **{"_id": "test-id"}, content="Test content", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", normalized_subject="Math", education_levels=["01"], document_id="doc-1", document_valid="2021", publication_status="Published", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", asn_identifier="S12345", statement_notation="1.G.K", statement_label="Standard", depth=1, is_leaf=True, is_root=False, parent_id="parent-id", root_id="root-id", ancestor_ids=["root-id"], child_ids=[], sibling_count=1, ) assert record.normalized_subject == "Math" assert record.asn_identifier == "S12345" assert record.statement_notation == "1.G.K" assert record.statement_label == "Standard" assert record.publication_status == "Published" class TestProcessedStandardSet: """Test ProcessedStandardSet container model.""" def test_empty_records(self): """Test ProcessedStandardSet with empty records.""" processed = ProcessedStandardSet(records=[]) assert processed.records == [] def test_multiple_records(self): """Test ProcessedStandardSet with multiple records.""" record1 = PineconeRecord( **{"_id": "id-1"}, content="Content 1", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=["01"], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=0, is_leaf=True, is_root=True, root_id="id-1", ancestor_ids=[], child_ids=[], sibling_count=0, ) record2 = PineconeRecord( **{"_id": "id-2"}, content="Content 2", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=["01"], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=1, is_leaf=True, is_root=False, parent_id="id-1", root_id="id-1", ancestor_ids=["id-1"], child_ids=[], sibling_count=0, ) processed = ProcessedStandardSet(records=[record1, record2]) assert len(processed.records) == 2 assert processed.records[0].id == "id-1" assert processed.records[1].id == "id-2" def test_json_serialization(self): """Test JSON serialization of ProcessedStandardSet.""" record = PineconeRecord( **{"_id": "test-id"}, content="Test content", standard_set_id="set-1", standard_set_title="Grade 1", subject="Math", education_levels=["01"], document_id="doc-1", document_valid="2021", jurisdiction_id="jur-1", jurisdiction_title="Wyoming", depth=0, is_leaf=True, is_root=True, root_id="test-id", ancestor_ids=[], child_ids=[], sibling_count=0, ) processed = ProcessedStandardSet(records=[record]) json_str = processed.model_dump_json(by_alias=True) data = json.loads(json_str) assert "records" in data assert len(data["records"]) == 1 assert data["records"][0]["_id"] == "test-id" assert data["records"][0]["parent_id"] is None # Verify null handling