File size: 5,131 Bytes
016b413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python3
"""
Demo: Hypothesis Generation (Phase 7).

This script demonstrates the REAL hypothesis generation pipeline:
1. REAL search: PubMed + ClinicalTrials + Europe PMC (actual API calls)
2. REAL embeddings: Semantic deduplication
3. REAL LLM: Mechanistic hypothesis generation

Usage:
    # Requires OPENAI_API_KEY or ANTHROPIC_API_KEY
    uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
    uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
"""

import argparse
import asyncio
import os
import sys
from typing import Any

from src.agents.hypothesis_agent import HypothesisAgent
from src.services.embeddings import EmbeddingService
from src.tools.clinicaltrials import ClinicalTrialsTool
from src.tools.europepmc import EuropePMCTool
from src.tools.pubmed import PubMedTool
from src.tools.search_handler import SearchHandler


async def run_hypothesis_demo(query: str) -> None:
    """Run the REAL hypothesis generation pipeline."""
    try:
        print(f"\n{'=' * 60}")
        print("DeepCritical Hypothesis Agent Demo (Phase 7)")
        print(f"Query: {query}")
        print("Mode: REAL (Live API calls)")
        print(f"{'=' * 60}\n")

        # Step 1: REAL Search
        print("[Step 1] Searching PubMed + ClinicalTrials + Europe PMC...")
        search_handler = SearchHandler(
            tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
        )
        result = await search_handler.execute(query, max_results_per_tool=5)

        print(f"  Found {result.total_found} results from {result.sources_searched}")
        if result.errors:
            print(f"  Warnings: {result.errors}")

        if not result.evidence:
            print("\nNo evidence found. Try a different query.")
            return

        # Step 2: REAL Embeddings - Deduplicate
        print("\n[Step 2] Semantic deduplication...")
        embedding_service = EmbeddingService()
        unique_evidence = await embedding_service.deduplicate(result.evidence, threshold=0.85)
        print(f"  {len(result.evidence)} -> {len(unique_evidence)} unique papers")

        # Show what we found
        print("\n[Evidence collected]")
        max_title_len = 50
        for i, e in enumerate(unique_evidence[:5], 1):
            raw_title = e.citation.title
            if len(raw_title) > max_title_len:
                title = raw_title[:max_title_len] + "..."
            else:
                title = raw_title
            print(f"  {i}. [{e.citation.source.upper()}] {title}")

        # Step 3: REAL LLM - Generate hypotheses
        print("\n[Step 3] Generating mechanistic hypotheses (LLM)...")
        evidence_store: dict[str, Any] = {"current": unique_evidence, "hypotheses": []}
        agent = HypothesisAgent(evidence_store, embedding_service)

        print("-" * 60)
        response = await agent.run(query)
        print(response.messages[0].text)
        print("-" * 60)

        # Show stored hypotheses
        hypotheses = evidence_store.get("hypotheses", [])
        print(f"\n{len(hypotheses)} hypotheses stored")

        if hypotheses:
            print("\nGenerated search queries for further investigation:")
            for h in hypotheses:
                queries = h.to_search_queries()
                print(f"  {h.drug} -> {h.target}:")
                for q in queries[:3]:
                    print(f"    - {q}")

    except Exception as e:
        print(f"\n❌ Error during hypothesis generation: {e}")
        raise


async def main() -> None:
    """Entry point."""
    parser = argparse.ArgumentParser(
        description="Hypothesis Generation Demo (REAL - No Mocks)",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
    uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
    uv run python examples/hypothesis_demo/run_hypothesis.py "aspirin cancer prevention"
        """,
    )
    parser.add_argument(
        "query",
        nargs="?",
        default="metformin Alzheimer's disease",
        help="Research query",
    )
    args = parser.parse_args()

    # Fail fast: require API key
    if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
        print("=" * 60)
        print("ERROR: This demo requires a real LLM.")
        print()
        print("Set one of the following in your .env file:")
        print("  OPENAI_API_KEY=sk-...")
        print("  ANTHROPIC_API_KEY=sk-ant-...")
        print()
        print("This is a REAL demo, not a mock. No fake data.")
        print("=" * 60)
        sys.exit(1)

    await run_hypothesis_demo(args.query)

    print("\n" + "=" * 60)
    print("Demo complete! This was a REAL pipeline:")
    print("  1. REAL search: PubMed + ClinicalTrials + Europe PMC APIs")
    print("  2. REAL embeddings: Actual sentence-transformers")
    print("  3. REAL LLM: Actual hypothesis generation")
    print("=" * 60 + "\n")


if __name__ == "__main__":
    asyncio.run(main())