Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| End-to-End RAG Integration Test | |
| Validates the complete RAG system: embeddings, retrieval, semantic search, and FractalStat hybrid scoring | |
| """ | |
| import pytest | |
| import sys | |
| import time | |
| from pathlib import Path | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| from warbler_cda.embeddings import EmbeddingProviderFactory | |
| from warbler_cda.retrieval_api import RetrievalAPI, RetrievalMode, RetrievalQuery | |
| class TestEndToEndRAG: | |
| """End-to-end RAG system validation.""" | |
| def setup(self): | |
| """Setup RAG system for testing.""" | |
| self.embedding_provider = EmbeddingProviderFactory.get_default_provider() | |
| self.api = RetrievalAPI( | |
| embedding_provider=self.embedding_provider, | |
| config={ | |
| "enable_fractalstat_hybrid": True, | |
| "cache_ttl_seconds": 300, | |
| }, | |
| ) | |
| yield | |
| self._report_metrics() | |
| def _report_metrics(self): | |
| """Report RAG system metrics.""" | |
| metrics = self.api.get_retrieval_metrics() | |
| print("\n" + "=" * 60) | |
| print("RAG SYSTEM METRICS") | |
| print("=" * 60) | |
| print(f"Embedding Provider: {self.embedding_provider.provider_id}") | |
| print(f"Embedding Dimension: {self.embedding_provider.get_dimension()}") | |
| print(f"Documents in Store: {metrics['context_store_size']}") | |
| print(f"Total Queries: {metrics['retrieval_metrics']['total_queries']}") | |
| print("=" * 60) | |
| def test_01_embedding_generation(self): | |
| """Test 01: Verify embeddings are generated correctly.""" | |
| print("\n[TEST 01] Embedding Generation") | |
| test_text = "Semantic embeddings enable efficient document retrieval" | |
| embedding = self.embedding_provider.embed_text(test_text) | |
| assert isinstance(embedding, list) | |
| assert len(embedding) > 0 | |
| assert all(isinstance(x, float) for x in embedding) | |
| print(f"[PASS] Generated {len(embedding)}-dimensional embedding") | |
| print(f" Sample values: {embedding[:5]}") | |
| def test_02_embedding_similarity(self): | |
| """Test 02: Verify similarity scoring works.""" | |
| print("\n[TEST 02] Embedding Similarity Scoring") | |
| text1 = "performance optimization techniques" | |
| text2 = "optimization for better performance" | |
| text3 = "completely unrelated weather report" | |
| emb1 = self.embedding_provider.embed_text(text1) | |
| emb2 = self.embedding_provider.embed_text(text2) | |
| emb3 = self.embedding_provider.embed_text(text3) | |
| sim_12 = self.embedding_provider.calculate_similarity(emb1, emb2) | |
| sim_13 = self.embedding_provider.calculate_similarity(emb1, emb3) | |
| print(f"[PASS] Similarity '{text1}' vs '{text2}': {sim_12:.4f}") | |
| print(f"[PASS] Similarity '{text1}' vs '{text3}': {sim_13:.4f}") | |
| assert sim_12 > sim_13, "Similar texts should score higher" | |
| def test_03_document_ingestion(self): | |
| """Test 03: Verify documents can be ingested and stored.""" | |
| print("\n[TEST 03] Document Ingestion") | |
| documents = [ | |
| ("doc_1", "Performance optimization requires careful profiling"), | |
| ("doc_2", "Memory management is critical for scalability"), | |
| ("doc_3", "Semantic embeddings improve search relevance"), | |
| ("doc_4", "Caching strategies reduce database load"), | |
| ("doc_5", "Compression algorithms optimize storage"), | |
| ] | |
| for doc_id, content in documents: | |
| result = self.api.add_document(doc_id, content) | |
| assert result is True | |
| print(f"[PASS] Ingested: {doc_id}") | |
| assert self.api.get_context_store_size() == 5 | |
| print(f"[PASS] Total documents: {self.api.get_context_store_size()}") | |
| def test_04_semantic_search(self): | |
| """Test 04: Verify semantic search retrieval works.""" | |
| print("\n[TEST 04] Semantic Search Retrieval") | |
| documents = [ | |
| ("doc_1", "How to optimize database queries for performance"), | |
| ("doc_2", "Memory leaks and profiling techniques"), | |
| ("doc_3", "Network optimization for distributed systems"), | |
| ("doc_4", "Caching patterns and implementation"), | |
| ] | |
| for doc_id, content in documents: | |
| self.api.add_document(doc_id, content) | |
| query = RetrievalQuery( | |
| query_id="test_search_1", | |
| mode=RetrievalMode.SEMANTIC_SIMILARITY, | |
| semantic_query="how to optimize performance", | |
| max_results=3, | |
| confidence_threshold=0.3, | |
| ) | |
| assembly = self.api.retrieve_context(query) | |
| assert assembly is not None | |
| assert len(assembly.results) > 0 | |
| print(f"[PASS] Retrieved {len(assembly.results)} relevant documents") | |
| for i, result in enumerate(assembly.results, 1): | |
| print(f" {i}. [{result.relevance_score:.4f}] {result.content[:50]}...") | |
| def test_05_max_results_respected(self): | |
| """Test 05: Verify max_results parameter is respected.""" | |
| print("\n[TEST 05] Max Results Parameter") | |
| for i in range(10): | |
| self.api.add_document(f"doc_{i}", f"Document content {i}") | |
| query = RetrievalQuery( | |
| query_id="test_max_results", | |
| mode=RetrievalMode.SEMANTIC_SIMILARITY, | |
| semantic_query="document", | |
| max_results=3, | |
| confidence_threshold=0.0, | |
| ) | |
| assembly = self.api.retrieve_context(query) | |
| assert len(assembly.results) <= 3 | |
| print(f"[PASS] Query returned {len(assembly.results)} results (max 3 requested)") | |
| def test_06_confidence_threshold(self): | |
| """Test 06: Verify confidence threshold filtering.""" | |
| print("\n[TEST 06] Confidence Threshold Filtering") | |
| documents = [ | |
| ("doc_1", "Python programming language basics"), | |
| ("doc_2", "Advanced Python techniques and patterns"), | |
| ("doc_3", "JavaScript for web development"), | |
| ] | |
| for doc_id, content in documents: | |
| self.api.add_document(doc_id, content) | |
| query_strict = RetrievalQuery( | |
| query_id="test_strict", | |
| mode=RetrievalMode.SEMANTIC_SIMILARITY, | |
| semantic_query="python programming", | |
| max_results=10, | |
| confidence_threshold=0.8, | |
| ) | |
| query_loose = RetrievalQuery( | |
| query_id="test_loose", | |
| mode=RetrievalMode.SEMANTIC_SIMILARITY, | |
| semantic_query="python programming", | |
| max_results=10, | |
| confidence_threshold=0.2, | |
| ) | |
| strict_results = self.api.retrieve_context(query_strict) | |
| loose_results = self.api.retrieve_context(query_loose) | |
| print(f"[PASS] Strict threshold (0.8): {len(strict_results.results)} results") | |
| print(f"[PASS] Loose threshold (0.2): {len(loose_results.results)} results") | |
| assert len(strict_results.results) <= len(loose_results.results) | |
| def test_07_fractalstat_hybrid_scoring(self): | |
| """Test 07: Verify FractalStat hybrid scoring works.""" | |
| print("\n[TEST 07] FractalStat Hybrid Scoring") | |
| try: | |
| from warbler_cda.embeddings.sentence_transformer_provider import ( | |
| SentenceTransformerEmbeddingProvider, | |
| ) | |
| provider = SentenceTransformerEmbeddingProvider() | |
| hybrid_api = RetrievalAPI( | |
| embedding_provider=provider, config={"enable_fractalstat_hybrid": True} | |
| ) | |
| except ImportError: | |
| pytest.skip("SentenceTransformer not installed for FractalStat testing") | |
| documents = [ | |
| ("doc_1", "Semantic embeddings with FractalStat coordinates"), | |
| ("doc_2", "Hybrid scoring combines multiple metrics"), | |
| ("doc_3", "Multi-dimensional retrieval approach"), | |
| ] | |
| for doc_id, content in documents: | |
| hybrid_api.add_document(doc_id, content) | |
| query = RetrievalQuery( | |
| query_id="test_hybrid", | |
| mode=RetrievalMode.SEMANTIC_SIMILARITY, | |
| semantic_query="semantic embeddings and scoring", | |
| max_results=3, | |
| fractalstat_hybrid=True, | |
| weight_semantic=0.6, | |
| weight_fractalstat=0.4, | |
| ) | |
| assembly = hybrid_api.retrieve_context(query) | |
| assert assembly is not None | |
| if assembly.results: | |
| for result in assembly.results: | |
| assert hasattr(result, "semantic_similarity") | |
| assert hasattr(result, "fractalstat_resonance") | |
| print( | |
| f"[PASS] Result: semantic={result.semantic_similarity:.4f}, FractalStat={result.fractalstat_resonance:.4f}" | |
| ) | |
| def test_08_temporal_retrieval(self): | |
| """Test 08: Verify temporal retrieval works.""" | |
| print("\n[TEST 08] Temporal Retrieval") | |
| current_time = time.time() | |
| documents = [ | |
| ("recent_doc", "Recently added document"), | |
| ("old_doc", "Older document"), | |
| ] | |
| for doc_id, content in documents: | |
| self.api.add_document(doc_id, content) | |
| query = RetrievalQuery( | |
| query_id="test_temporal", | |
| mode=RetrievalMode.TEMPORAL_SEQUENCE, | |
| temporal_range=(current_time - 3600, current_time + 3600), | |
| max_results=10, | |
| ) | |
| assembly = self.api.retrieve_context(query) | |
| assert assembly is not None | |
| print(f"[PASS] Temporal query retrieved {len(assembly.results)} results") | |
| def test_09_retrieval_metrics(self): | |
| """Test 09: Verify retrieval metrics are tracked.""" | |
| print("\n[TEST 09] Retrieval Metrics Tracking") | |
| for i in range(3): | |
| self.api.add_document(f"doc_{i}", f"Content {i}") | |
| for i in range(2): | |
| query = RetrievalQuery( | |
| query_id=f"metric_query_{i}", | |
| mode=RetrievalMode.SEMANTIC_SIMILARITY, | |
| semantic_query="content", | |
| max_results=5, | |
| ) | |
| self.api.retrieve_context(query) | |
| metrics = self.api.get_retrieval_metrics() | |
| assert metrics["context_store_size"] == 3 | |
| assert metrics["retrieval_metrics"]["total_queries"] >= 2 | |
| print(f"[PASS] Metrics tracked: {metrics['retrieval_metrics']['total_queries']} queries") | |
| def test_10_full_rag_pipeline(self): | |
| """Test 10: Complete RAG pipeline end-to-end.""" | |
| print("\n[TEST 10] Full RAG Pipeline") | |
| knowledge_base = [ | |
| "Python is a popular programming language", | |
| "Machine learning models learn from data", | |
| "Embeddings represent text as vectors", | |
| "Semantic search finds relevant documents", | |
| "RAG systems combine retrieval and generation", | |
| ] | |
| print("Step 1: Ingesting knowledge base...") | |
| for i, content in enumerate(knowledge_base): | |
| self.api.add_document(f"kb_{i}", content) | |
| print(f"[PASS] Ingested {len(knowledge_base)} documents") | |
| print("Step 2: Creating query...") | |
| query = RetrievalQuery( | |
| query_id="rag_pipeline_query", | |
| mode=RetrievalMode.SEMANTIC_SIMILARITY, | |
| semantic_query="How do embeddings work in machine learning?", | |
| max_results=3, | |
| confidence_threshold=0.3, | |
| ) | |
| print(f"[PASS] Query created: '{query.semantic_query}'") | |
| print("Step 3: Retrieving context...") | |
| assembly = self.api.retrieve_context(query) | |
| print(f"[PASS] Retrieved {len(assembly.results)} relevant results") | |
| print("Step 4: Analyzing results...") | |
| for i, result in enumerate(assembly.results, 1): | |
| print(f" {i}. Score: {result.relevance_score:.4f}") | |
| print(f" Content: {result.content[:60]}...") | |
| assert len(assembly.results) > 0 | |
| print("[PASS] RAG pipeline executed successfully") | |
| if __name__ == "__main__": | |
| pytest.main([__file__, "-v", "-s"]) | |