Spaces:
Running
Running
File size: 5,131 Bytes
016b413 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
#!/usr/bin/env python3
"""
Demo: Hypothesis Generation (Phase 7).
This script demonstrates the REAL hypothesis generation pipeline:
1. REAL search: PubMed + ClinicalTrials + Europe PMC (actual API calls)
2. REAL embeddings: Semantic deduplication
3. REAL LLM: Mechanistic hypothesis generation
Usage:
# Requires OPENAI_API_KEY or ANTHROPIC_API_KEY
uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
"""
import argparse
import asyncio
import os
import sys
from typing import Any
from src.agents.hypothesis_agent import HypothesisAgent
from src.services.embeddings import EmbeddingService
from src.tools.clinicaltrials import ClinicalTrialsTool
from src.tools.europepmc import EuropePMCTool
from src.tools.pubmed import PubMedTool
from src.tools.search_handler import SearchHandler
async def run_hypothesis_demo(query: str) -> None:
"""Run the REAL hypothesis generation pipeline."""
try:
print(f"\n{'=' * 60}")
print("DeepCritical Hypothesis Agent Demo (Phase 7)")
print(f"Query: {query}")
print("Mode: REAL (Live API calls)")
print(f"{'=' * 60}\n")
# Step 1: REAL Search
print("[Step 1] Searching PubMed + ClinicalTrials + Europe PMC...")
search_handler = SearchHandler(
tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
)
result = await search_handler.execute(query, max_results_per_tool=5)
print(f" Found {result.total_found} results from {result.sources_searched}")
if result.errors:
print(f" Warnings: {result.errors}")
if not result.evidence:
print("\nNo evidence found. Try a different query.")
return
# Step 2: REAL Embeddings - Deduplicate
print("\n[Step 2] Semantic deduplication...")
embedding_service = EmbeddingService()
unique_evidence = await embedding_service.deduplicate(result.evidence, threshold=0.85)
print(f" {len(result.evidence)} -> {len(unique_evidence)} unique papers")
# Show what we found
print("\n[Evidence collected]")
max_title_len = 50
for i, e in enumerate(unique_evidence[:5], 1):
raw_title = e.citation.title
if len(raw_title) > max_title_len:
title = raw_title[:max_title_len] + "..."
else:
title = raw_title
print(f" {i}. [{e.citation.source.upper()}] {title}")
# Step 3: REAL LLM - Generate hypotheses
print("\n[Step 3] Generating mechanistic hypotheses (LLM)...")
evidence_store: dict[str, Any] = {"current": unique_evidence, "hypotheses": []}
agent = HypothesisAgent(evidence_store, embedding_service)
print("-" * 60)
response = await agent.run(query)
print(response.messages[0].text)
print("-" * 60)
# Show stored hypotheses
hypotheses = evidence_store.get("hypotheses", [])
print(f"\n{len(hypotheses)} hypotheses stored")
if hypotheses:
print("\nGenerated search queries for further investigation:")
for h in hypotheses:
queries = h.to_search_queries()
print(f" {h.drug} -> {h.target}:")
for q in queries[:3]:
print(f" - {q}")
except Exception as e:
print(f"\n❌ Error during hypothesis generation: {e}")
raise
async def main() -> None:
"""Entry point."""
parser = argparse.ArgumentParser(
description="Hypothesis Generation Demo (REAL - No Mocks)",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
uv run python examples/hypothesis_demo/run_hypothesis.py "aspirin cancer prevention"
""",
)
parser.add_argument(
"query",
nargs="?",
default="metformin Alzheimer's disease",
help="Research query",
)
args = parser.parse_args()
# Fail fast: require API key
if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
print("=" * 60)
print("ERROR: This demo requires a real LLM.")
print()
print("Set one of the following in your .env file:")
print(" OPENAI_API_KEY=sk-...")
print(" ANTHROPIC_API_KEY=sk-ant-...")
print()
print("This is a REAL demo, not a mock. No fake data.")
print("=" * 60)
sys.exit(1)
await run_hypothesis_demo(args.query)
print("\n" + "=" * 60)
print("Demo complete! This was a REAL pipeline:")
print(" 1. REAL search: PubMed + ClinicalTrials + Europe PMC APIs")
print(" 2. REAL embeddings: Actual sentence-transformers")
print(" 3. REAL LLM: Actual hypothesis generation")
print("=" * 60 + "\n")
if __name__ == "__main__":
asyncio.run(main())
|