Spaces:

DataQuests
/

DeepCritical

Running

DeepCritical / examples /hypothesis_demo /run_hypothesis.py

Joseph Pollack

Initial commit - Independent repository - Breaking fork relationship

016b413 13 days ago

5.13 kB

	#!/usr/bin/env python3
	"""
	Demo: Hypothesis Generation (Phase 7).

	This script demonstrates the REAL hypothesis generation pipeline:
	1. REAL search: PubMed + ClinicalTrials + Europe PMC (actual API calls)
	2. REAL embeddings: Semantic deduplication
	3. REAL LLM: Mechanistic hypothesis generation

	Usage:
	# Requires OPENAI_API_KEY or ANTHROPIC_API_KEY
	uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
	uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
	"""

	import argparse
	import asyncio
	import os
	import sys
	from typing import Any

	from src.agents.hypothesis_agent import HypothesisAgent
	from src.services.embeddings import EmbeddingService
	from src.tools.clinicaltrials import ClinicalTrialsTool
	from src.tools.europepmc import EuropePMCTool
	from src.tools.pubmed import PubMedTool
	from src.tools.search_handler import SearchHandler


	async def run_hypothesis_demo(query: str) -> None:
	"""Run the REAL hypothesis generation pipeline."""
	try:
	print(f"\n{'=' * 60}")
	print("DeepCritical Hypothesis Agent Demo (Phase 7)")
	print(f"Query: {query}")
	print("Mode: REAL (Live API calls)")
	print(f"{'=' * 60}\n")

	# Step 1: REAL Search
	print("[Step 1] Searching PubMed + ClinicalTrials + Europe PMC...")
	search_handler = SearchHandler(
	tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
	)
	result = await search_handler.execute(query, max_results_per_tool=5)

	print(f" Found {result.total_found} results from {result.sources_searched}")
	if result.errors:
	print(f" Warnings: {result.errors}")

	if not result.evidence:
	print("\nNo evidence found. Try a different query.")
	return

	# Step 2: REAL Embeddings - Deduplicate
	print("\n[Step 2] Semantic deduplication...")
	embedding_service = EmbeddingService()
	unique_evidence = await embedding_service.deduplicate(result.evidence, threshold=0.85)
	print(f" {len(result.evidence)} -> {len(unique_evidence)} unique papers")

	# Show what we found
	print("\n[Evidence collected]")
	max_title_len = 50
	for i, e in enumerate(unique_evidence[:5], 1):
	raw_title = e.citation.title
	if len(raw_title) > max_title_len:
	title = raw_title[:max_title_len] + "..."
	else:
	title = raw_title
	print(f" {i}. [{e.citation.source.upper()}] {title}")

	# Step 3: REAL LLM - Generate hypotheses
	print("\n[Step 3] Generating mechanistic hypotheses (LLM)...")
	evidence_store: dict[str, Any] = {"current": unique_evidence, "hypotheses": []}
	agent = HypothesisAgent(evidence_store, embedding_service)

	print("-" * 60)
	response = await agent.run(query)
	print(response.messages[0].text)
	print("-" * 60)

	# Show stored hypotheses
	hypotheses = evidence_store.get("hypotheses", [])
	print(f"\n{len(hypotheses)} hypotheses stored")

	if hypotheses:
	print("\nGenerated search queries for further investigation:")
	for h in hypotheses:
	queries = h.to_search_queries()
	print(f" {h.drug} -> {h.target}:")
	for q in queries[:3]:
	print(f" - {q}")

	except Exception as e:
	print(f"\n❌ Error during hypothesis generation: {e}")
	raise


	async def main() -> None:
	"""Entry point."""
	parser = argparse.ArgumentParser(
	description="Hypothesis Generation Demo (REAL - No Mocks)",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's"
	uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure"
	uv run python examples/hypothesis_demo/run_hypothesis.py "aspirin cancer prevention"
	""",
	)
	parser.add_argument(
	"query",
	nargs="?",
	default="metformin Alzheimer's disease",
	help="Research query",
	)
	args = parser.parse_args()

	# Fail fast: require API key
	if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
	print("=" * 60)
	print("ERROR: This demo requires a real LLM.")
	print()
	print("Set one of the following in your .env file:")
	print(" OPENAI_API_KEY=sk-...")
	print(" ANTHROPIC_API_KEY=sk-ant-...")
	print()
	print("This is a REAL demo, not a mock. No fake data.")
	print("=" * 60)
	sys.exit(1)

	await run_hypothesis_demo(args.query)

	print("\n" + "=" * 60)
	print("Demo complete! This was a REAL pipeline:")
	print(" 1. REAL search: PubMed + ClinicalTrials + Europe PMC APIs")
	print(" 2. REAL embeddings: Actual sentence-transformers")
	print(" 3. REAL LLM: Actual hypothesis generation")
	print("=" * 60 + "\n")


	if __name__ == "__main__":
	asyncio.run(main())