Spaces:
Running
on
Zero
Running
on
Zero
| #!/usr/bin/env python3 | |
| """ | |
| Gradio interface for Warbler CDA on HuggingFace Spaces. | |
| Provides a web UI for the FractalStat RAG system with GPU acceleration. | |
| """ | |
| import gradio as gr | |
| import time | |
| # Import Warbler CDA components | |
| from warbler_cda.retrieval_api import RetrievalAPI, RetrievalQuery, RetrievalMode | |
| from warbler_cda.embeddings import EmbeddingProviderFactory | |
| from warbler_cda.fractalstat_rag_bridge import FractalStatRAGBridge | |
| from warbler_cda.semantic_anchors import SemanticAnchorGraph | |
| from warbler_cda.pack_loader import PackLoader | |
| # Initialize the system | |
| print("π Initializing Warbler CDA...") | |
| # Create embedding provider (will use sentence-transformers with GPU if available) | |
| embedding_provider = EmbeddingProviderFactory.get_default_provider() | |
| print(f"β Embedding provider: {embedding_provider.get_provider_info()['provider_id']}") | |
| # Create semantic anchors (required by RetrievalAPI) | |
| semantic_anchors = SemanticAnchorGraph(embedding_provider=embedding_provider) | |
| print("β Semantic anchors initialized") | |
| # Create FractalStat bridge | |
| fractalstat_bridge = FractalStatRAGBridge() | |
| print("β FractalStat bridge initialized") | |
| # Create RetrievalAPI with proper components | |
| api = RetrievalAPI( | |
| semantic_anchors=semantic_anchors, | |
| embedding_provider=embedding_provider, | |
| fractalstat_bridge=fractalstat_bridge, | |
| config={"enable_fractalstat_hybrid": True} | |
| ) | |
| print("β RetrievalAPI initialized") | |
| # Load packs | |
| print("π Loading Warbler packs...") | |
| pack_loader = PackLoader() | |
| documents = pack_loader.discover_documents() | |
| # If no packs found, try to download them | |
| if len(documents) == 0: | |
| print("β οΈ No packs found locally. Attempting to download from HuggingFace...") | |
| try: | |
| from warbler_cda.utils.hf_warbler_ingest import HFWarblerIngestor | |
| ingestor = HFWarblerIngestor(packs_dir=pack_loader.packs_dir, verbose=True) | |
| # Download a small demo dataset for deployment | |
| print("π¦ Downloading warbler-pack-hf-prompt-report...") | |
| success = ingestor.ingest_dataset("prompt-report") | |
| if success: | |
| # Reload after download | |
| documents = pack_loader.discover_documents() | |
| print(f"β Downloaded {len(documents)} documents") | |
| else: | |
| print("β Failed to download dataset, using sample documents...") | |
| documents = [] | |
| except Exception as e: | |
| print(f"β οΈ Could not download packs: {e}") | |
| print("Using sample documents instead...") | |
| documents = [] | |
| if len(documents) == 0: | |
| # Fallback to sample documents | |
| sample_docs = [ | |
| {"id": "sample1", "content": "FractalStat is an 8-dimensional addressing system for intelligent retrieval.", "metadata": {}}, | |
| {"id": "sample2", "content": "Semantic search finds documents by meaning, not just keywords.", "metadata": {}}, | |
| {"id": "sample3", "content": "Bob the Skeptic validates results to prevent bias and hallucinations.", "metadata": {}}, | |
| ] | |
| for doc in sample_docs: | |
| api.add_document(doc["id"], doc["content"], doc["metadata"]) | |
| print(f"β Loaded {len(sample_docs)} sample documents") | |
| else: | |
| print(f"β Found {len(documents)} documents") | |
| # Ingest documents | |
| for doc in documents: | |
| api.add_document( | |
| doc_id=doc["id"], | |
| content=doc["content"], | |
| metadata=doc.get("metadata", {}) | |
| ) | |
| print(f"π Warbler CDA ready with {api.get_context_store_size()} documents!") | |
| def query_warbler(query_text: str, max_results: int = 5, use_hybrid: bool = True) -> str: | |
| """Query the Warbler CDA system.""" | |
| if not query_text.strip(): | |
| return "Please enter a query." | |
| start_time = time.time() | |
| # Create query | |
| query = RetrievalQuery( | |
| query_id=f"gradio_{int(time.time())}", | |
| mode=RetrievalMode.SEMANTIC_SIMILARITY, | |
| semantic_query=query_text, | |
| max_results=max_results, | |
| fractalstat_hybrid=use_hybrid | |
| ) | |
| # Execute query | |
| assembly = api.retrieve_context(query) | |
| elapsed_ms = (time.time() - start_time) * 1000 | |
| # Format results | |
| output = f"## Query Results\n\n" | |
| output += f"**Query:** {query_text}\n\n" | |
| output += f"**Found:** {len(assembly.results)} results in {elapsed_ms:.0f}ms\n\n" | |
| output += f"**Quality Score:** {assembly.assembly_quality:.3f}\n\n" | |
| if assembly.results: | |
| output += "### Top Results\n\n" | |
| for i, result in enumerate(assembly.results[:max_results], 1): | |
| output += f"**{i}. Score: {result.relevance_score:.3f}**\n\n" | |
| output += f"{result.content[:300]}...\n\n" | |
| if use_hybrid: | |
| output += f"- Semantic: {result.semantic_similarity:.3f}\n" | |
| output += f"- FractalStat: {result.fractalstat_resonance:.3f}\n\n" | |
| output += "---\n\n" | |
| else: | |
| output += "No results found.\n" | |
| return output | |
| def get_system_stats() -> str: | |
| """Get system statistics.""" | |
| metrics = api.get_retrieval_metrics() | |
| output = "## System Statistics\n\n" | |
| output += f"**Total Documents:** {api.get_context_store_size():,}\n\n" | |
| output += f"**Total Queries:** {metrics['retrieval_metrics']['total_queries']}\n\n" | |
| output += f"**Cache Hit Rate:** {metrics['cache_performance']['hit_rate']:.1%}\n\n" | |
| output += f"**Avg Quality:** {metrics['system_health']['average_quality']:.3f}\n\n" | |
| return output | |
| # Create Gradio interface | |
| with gr.Blocks(title="Warbler CDA - FractalStat RAG") as demo: | |
| gr.Markdown(""" | |
| # π¦ Warbler CDA - FractalStat RAG System | |
| Semantic retrieval with 8D FractalStat multi-dimensional addressing. | |
| **Features:** | |
| - 2.6M+ documents from arXiv, education, fiction, and more | |
| - Hybrid semantic + FractalStat scoring | |
| - Bob the Skeptic bias detection | |
| - Narrative coherence analysis | |
| """) | |
| with gr.Tab("Query"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| query_input = gr.Textbox( | |
| label="Query", | |
| placeholder="Enter your search query...", | |
| lines=2 | |
| ) | |
| max_results = gr.Slider( | |
| minimum=1, | |
| maximum=20, | |
| value=5, | |
| step=1, | |
| label="Max Results" | |
| ) | |
| use_hybrid = gr.Checkbox( | |
| label="Enable FractalStat Hybrid Scoring", | |
| value=True | |
| ) | |
| query_btn = gr.Button("Search", variant="primary") | |
| with gr.Column(): | |
| results_output = gr.Markdown(label="Results") | |
| query_btn.click( # pylint: disable=E1101 | |
| fn=query_warbler, | |
| inputs=[query_input, max_results, use_hybrid], | |
| outputs=results_output | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["hello world", 5, True], | |
| ["rotation dynamics of Saturn's moons", 5, True], | |
| ["anything about machine learning", 10, False], | |
| ], | |
| inputs=[query_input, max_results, use_hybrid] | |
| ) | |
| with gr.Tab("System Stats"): | |
| stats_output = gr.Markdown() | |
| stats_btn = gr.Button("Refresh Stats") | |
| stats_btn.click(fn=get_system_stats, outputs=stats_output) # pylint: disable=E1101 | |
| demo.load(fn=get_system_stats, outputs=stats_output) # pylint: disable=E1101 | |
| with gr.Tab("About"): | |
| gr.Markdown(""" | |
| ## About Warbler CDA | |
| Warbler CDA is a production-ready RAG system featuring: | |
| - **8D FractalStat Addressing**: Multi-dimensional intelligence for superior retrieval | |
| - **Semantic Anchors**: Persistent memory with provenance tracking | |
| - **Bob the Skeptic**: Automatic bias detection and validation | |
| - **Narrative Coherence**: Quality analysis beyond simple similarity | |
| ### Performance | |
| - 84% test coverage with 587 passing tests | |
| - 9-28s query response time | |
| - 0.88 average relevance score | |
| - 75-83% narrative coherence | |
| ### Links | |
| - [Source Code](https://gitlab.com/tiny-walnut-games/the-seed) | |
| - [Documentation](https://gitlab.com/tiny-walnut-games/the-seed/-/tree/main/warbler-cda-package) | |
| - [Performance Report](https://gitlab.com/tiny-walnut-games/the-seed/-/blob/main/warbler-cda-package/WARBLER_CDA_PERFORMANCE_REPORT.md) | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |