Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Demo: Hypothesis Generation (Phase 7). | |
| This script demonstrates the REAL hypothesis generation pipeline: | |
| 1. REAL search: PubMed + ClinicalTrials + Europe PMC (actual API calls) | |
| 2. REAL embeddings: Semantic deduplication | |
| 3. REAL LLM: Mechanistic hypothesis generation | |
| Usage: | |
| # Requires OPENAI_API_KEY or ANTHROPIC_API_KEY | |
| uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's" | |
| uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure" | |
| """ | |
| import argparse | |
| import asyncio | |
| import os | |
| import sys | |
| from typing import Any | |
| from src.agents.hypothesis_agent import HypothesisAgent | |
| from src.services.embeddings import EmbeddingService | |
| from src.tools.clinicaltrials import ClinicalTrialsTool | |
| from src.tools.europepmc import EuropePMCTool | |
| from src.tools.pubmed import PubMedTool | |
| from src.tools.search_handler import SearchHandler | |
| async def run_hypothesis_demo(query: str) -> None: | |
| """Run the REAL hypothesis generation pipeline.""" | |
| try: | |
| print(f"\n{'=' * 60}") | |
| print("DeepCritical Hypothesis Agent Demo (Phase 7)") | |
| print(f"Query: {query}") | |
| print("Mode: REAL (Live API calls)") | |
| print(f"{'=' * 60}\n") | |
| # Step 1: REAL Search | |
| print("[Step 1] Searching PubMed + ClinicalTrials + Europe PMC...") | |
| search_handler = SearchHandler( | |
| tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0 | |
| ) | |
| result = await search_handler.execute(query, max_results_per_tool=5) | |
| print(f" Found {result.total_found} results from {result.sources_searched}") | |
| if result.errors: | |
| print(f" Warnings: {result.errors}") | |
| if not result.evidence: | |
| print("\nNo evidence found. Try a different query.") | |
| return | |
| # Step 2: REAL Embeddings - Deduplicate | |
| print("\n[Step 2] Semantic deduplication...") | |
| embedding_service = EmbeddingService() | |
| unique_evidence = await embedding_service.deduplicate(result.evidence, threshold=0.85) | |
| print(f" {len(result.evidence)} -> {len(unique_evidence)} unique papers") | |
| # Show what we found | |
| print("\n[Evidence collected]") | |
| max_title_len = 50 | |
| for i, e in enumerate(unique_evidence[:5], 1): | |
| raw_title = e.citation.title | |
| if len(raw_title) > max_title_len: | |
| title = raw_title[:max_title_len] + "..." | |
| else: | |
| title = raw_title | |
| print(f" {i}. [{e.citation.source.upper()}] {title}") | |
| # Step 3: REAL LLM - Generate hypotheses | |
| print("\n[Step 3] Generating mechanistic hypotheses (LLM)...") | |
| evidence_store: dict[str, Any] = {"current": unique_evidence, "hypotheses": []} | |
| agent = HypothesisAgent(evidence_store, embedding_service) | |
| print("-" * 60) | |
| response = await agent.run(query) | |
| print(response.messages[0].text) | |
| print("-" * 60) | |
| # Show stored hypotheses | |
| hypotheses = evidence_store.get("hypotheses", []) | |
| print(f"\n{len(hypotheses)} hypotheses stored") | |
| if hypotheses: | |
| print("\nGenerated search queries for further investigation:") | |
| for h in hypotheses: | |
| queries = h.to_search_queries() | |
| print(f" {h.drug} -> {h.target}:") | |
| for q in queries[:3]: | |
| print(f" - {q}") | |
| except Exception as e: | |
| print(f"\n❌ Error during hypothesis generation: {e}") | |
| raise | |
| async def main() -> None: | |
| """Entry point.""" | |
| parser = argparse.ArgumentParser( | |
| description="Hypothesis Generation Demo (REAL - No Mocks)", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's" | |
| uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure" | |
| uv run python examples/hypothesis_demo/run_hypothesis.py "aspirin cancer prevention" | |
| """, | |
| ) | |
| parser.add_argument( | |
| "query", | |
| nargs="?", | |
| default="metformin Alzheimer's disease", | |
| help="Research query", | |
| ) | |
| args = parser.parse_args() | |
| # Fail fast: require API key | |
| if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")): | |
| print("=" * 60) | |
| print("ERROR: This demo requires a real LLM.") | |
| print() | |
| print("Set one of the following in your .env file:") | |
| print(" OPENAI_API_KEY=sk-...") | |
| print(" ANTHROPIC_API_KEY=sk-ant-...") | |
| print() | |
| print("This is a REAL demo, not a mock. No fake data.") | |
| print("=" * 60) | |
| sys.exit(1) | |
| await run_hypothesis_demo(args.query) | |
| print("\n" + "=" * 60) | |
| print("Demo complete! This was a REAL pipeline:") | |
| print(" 1. REAL search: PubMed + ClinicalTrials + Europe PMC APIs") | |
| print(" 2. REAL embeddings: Actual sentence-transformers") | |
| print(" 3. REAL LLM: Actual hypothesis generation") | |
| print("=" * 60 + "\n") | |
| if __name__ == "__main__": | |
| asyncio.run(main()) | |