Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

Joseph Pollack commited on 13 days ago

Commit

026ee5d

1 Parent(s): 016b413

Restore recent changes

Browse files

Files changed (33) hide show

docs/api/agents.md +270 -0
docs/api/models.md +248 -0
docs/api/orchestrators.md +195 -0
docs/api/services.md +201 -0
docs/api/tools.md +235 -0
docs/architecture/agents.md +192 -0
docs/architecture/middleware.md +142 -0
docs/architecture/services.md +142 -0
docs/architecture/tools.md +175 -0
docs/contributing/code-quality.md +81 -0
docs/contributing/code-style.md +61 -0
docs/contributing/error-handling.md +69 -0
docs/contributing/implementation-patterns.md +84 -0
docs/contributing/index.md +163 -0
docs/contributing/prompt-engineering.md +69 -0
docs/contributing/testing.md +65 -0
docs/getting-started/examples.md +209 -0
docs/getting-started/installation.md +148 -0
docs/getting-started/mcp-integration.md +215 -0
docs/getting-started/quick-start.md +119 -0
docs/license.md +39 -0
docs/overview/architecture.md +196 -0
docs/overview/features.md +148 -0
docs/team.md +44 -0
src/app.py +516 -157
src/middleware/state_machine.py +4 -0
src/tools/crawl_adapter.py +4 -0
src/tools/web_search_adapter.py +4 -0
tests/unit/middleware/__init__.py +14 -0
tests/unit/middleware/test_budget_tracker_phase7.py +14 -0
tests/unit/middleware/test_state_machine.py +14 -0
tests/unit/middleware/test_workflow_manager.py +14 -0
tests/unit/orchestrator/__init__.py +14 -0

docs/api/agents.md ADDED Viewed

	@@ -0,0 +1,270 @@

+# Agents API Reference
+This page documents the API for DeepCritical agents.
+## KnowledgeGapAgent
+**Module**: `src.agents.knowledge_gap`
+**Purpose**: Evaluates research state and identifies knowledge gaps.
+### Methods
+#### `evaluate`
+```python
+async def evaluate(
+    self,
+    query: str,
+    background_context: str,
+    conversation_history: Conversation,
+    iteration: int,
+    time_elapsed_minutes: float,
+    max_time_minutes: float
+) -> KnowledgeGapOutput
+```
+Evaluates research completeness and identifies outstanding knowledge gaps.
+**Parameters**:
+- `query`: Research query string
+- `background_context`: Background context for the query
+- `conversation_history`: Conversation history with previous iterations
+- `iteration`: Current iteration number
+- `time_elapsed_minutes`: Elapsed time in minutes
+- `max_time_minutes`: Maximum time limit in minutes
+**Returns**: `KnowledgeGapOutput` with:
+- `research_complete`: Boolean indicating if research is complete
+- `outstanding_gaps`: List of remaining knowledge gaps
+## ToolSelectorAgent
+**Module**: `src.agents.tool_selector`
+**Purpose**: Selects appropriate tools for addressing knowledge gaps.
+### Methods
+#### `select_tools`
+```python
+async def select_tools(
+    self,
+    query: str,
+    knowledge_gaps: list[str],
+    available_tools: list[str]
+) -> AgentSelectionPlan
+```
+Selects tools for addressing knowledge gaps.
+**Parameters**:
+- `query`: Research query string
+- `knowledge_gaps`: List of knowledge gaps to address
+- `available_tools`: List of available tool names
+**Returns**: `AgentSelectionPlan` with list of `AgentTask` objects.
+## WriterAgent
+**Module**: `src.agents.writer`
+**Purpose**: Generates final reports from research findings.
+### Methods
+#### `write_report`
+```python
+async def write_report(
+    self,
+    query: str,
+    findings: str,
+    output_length: str = "medium",
+    output_instructions: str | None = None
+) -> str
+```
+Generates a markdown report from research findings.
+**Parameters**:
+- `query`: Research query string
+- `findings`: Research findings to include in report
+- `output_length`: Desired output length ("short", "medium", "long")
+- `output_instructions`: Additional instructions for report generation
+**Returns**: Markdown string with numbered citations.
+## LongWriterAgent
+**Module**: `src.agents.long_writer`
+**Purpose**: Long-form report generation with section-by-section writing.
+### Methods
+#### `write_next_section`
+```python
+async def write_next_section(
+    self,
+    query: str,
+    draft: ReportDraft,
+    section_title: str,
+    section_content: str
+) -> LongWriterOutput
+```
+Writes the next section of a long-form report.
+**Parameters**:
+- `query`: Research query string
+- `draft`: Current report draft
+- `section_title`: Title of the section to write
+- `section_content`: Content/guidance for the section
+**Returns**: `LongWriterOutput` with updated draft.
+#### `write_report`
+```python
+async def write_report(
+    self,
+    query: str,
+    report_title: str,
+    report_draft: ReportDraft
+) -> str
+```
+Generates final report from draft.
+**Parameters**:
+- `query`: Research query string
+- `report_title`: Title of the report
+- `report_draft`: Complete report draft
+**Returns**: Final markdown report string.
+## ProofreaderAgent
+**Module**: `src.agents.proofreader`
+**Purpose**: Proofreads and polishes report drafts.
+### Methods
+#### `proofread`
+```python
+async def proofread(
+    self,
+    query: str,
+    report_title: str,
+    report_draft: ReportDraft
+) -> str
+```
+Proofreads and polishes a report draft.
+**Parameters**:
+- `query`: Research query string
+- `report_title`: Title of the report
+- `report_draft`: Report draft to proofread
+**Returns**: Polished markdown string.
+## ThinkingAgent
+**Module**: `src.agents.thinking`
+**Purpose**: Generates observations from conversation history.
+### Methods
+#### `generate_observations`
+```python
+async def generate_observations(
+    self,
+    query: str,
+    background_context: str,
+    conversation_history: Conversation
+) -> str
+```
+Generates observations from conversation history.
+**Parameters**:
+- `query`: Research query string
+- `background_context`: Background context
+- `conversation_history`: Conversation history
+**Returns**: Observation string.
+## InputParserAgent
+**Module**: `src.agents.input_parser`
+**Purpose**: Parses and improves user queries, detects research mode.
+### Methods
+#### `parse_query`
+```python
+async def parse_query(
+    self,
+    query: str
+) -> ParsedQuery
+```
+Parses and improves a user query.
+**Parameters**:
+- `query`: Original query string
+**Returns**: `ParsedQuery` with:
+- `original_query`: Original query string
+- `improved_query`: Refined query string
+- `research_mode`: "iterative" or "deep"
+- `key_entities`: List of key entities
+- `research_questions`: List of research questions
+## Factory Functions
+All agents have factory functions in `src.agent_factory.agents`:
+```python
+def create_knowledge_gap_agent(model: Any | None = None) -> KnowledgeGapAgent
+def create_tool_selector_agent(model: Any | None = None) -> ToolSelectorAgent
+def create_writer_agent(model: Any | None = None) -> WriterAgent
+def create_long_writer_agent(model: Any | None = None) -> LongWriterAgent
+def create_proofreader_agent(model: Any | None = None) -> ProofreaderAgent
+def create_thinking_agent(model: Any | None = None) -> ThinkingAgent
+def create_input_parser_agent(model: Any | None = None) -> InputParserAgent
+```
+**Parameters**:
+- `model`: Optional Pydantic AI model. If None, uses `get_model()` from settings.
+**Returns**: Agent instance.
+## See Also
+- [Architecture - Agents](../architecture/agents.md) - Architecture overview
+- [Models API](models.md) - Data models used by agents

docs/api/models.md ADDED Viewed

	@@ -0,0 +1,248 @@

+# Models API Reference
+This page documents the Pydantic models used throughout DeepCritical.
+## Evidence
+**Module**: `src.utils.models`
+**Purpose**: Represents evidence from search results.
+```python
+class Evidence(BaseModel):
+    citation: Citation
+    content: str
+    relevance_score: float = Field(ge=0.0, le=1.0)
+    metadata: dict[str, Any] = Field(default_factory=dict)
+```
+**Fields**:
+- `citation`: Citation information (title, URL, date, authors)
+- `content`: Evidence text content
+- `relevance_score`: Relevance score (0.0-1.0)
+- `metadata`: Additional metadata dictionary
+## Citation
+**Module**: `src.utils.models`
+**Purpose**: Citation information for evidence.
+```python
+class Citation(BaseModel):
+    title: str
+    url: str
+    date: str | None = None
+    authors: list[str] = Field(default_factory=list)
+```
+**Fields**:
+- `title`: Article/trial title
+- `url`: Source URL
+- `date`: Publication date (optional)
+- `authors`: List of authors (optional)
+## KnowledgeGapOutput
+**Module**: `src.utils.models`
+**Purpose**: Output from knowledge gap evaluation.
+```python
+class KnowledgeGapOutput(BaseModel):
+    research_complete: bool
+    outstanding_gaps: list[str] = Field(default_factory=list)
+```
+**Fields**:
+- `research_complete`: Boolean indicating if research is complete
+- `outstanding_gaps`: List of remaining knowledge gaps
+## AgentSelectionPlan
+**Module**: `src.utils.models`
+**Purpose**: Plan for tool/agent selection.
+```python
+class AgentSelectionPlan(BaseModel):
+    tasks: list[AgentTask] = Field(default_factory=list)
+```
+**Fields**:
+- `tasks`: List of agent tasks to execute
+## AgentTask
+**Module**: `src.utils.models`
+**Purpose**: Individual agent task.
+```python
+class AgentTask(BaseModel):
+    agent_name: str
+    query: str
+    context: dict[str, Any] = Field(default_factory=dict)
+```
+**Fields**:
+- `agent_name`: Name of agent to use
+- `query`: Task query
+- `context`: Additional context dictionary
+## ReportDraft
+**Module**: `src.utils.models`
+**Purpose**: Draft structure for long-form reports.
+```python
+class ReportDraft(BaseModel):
+    title: str
+    sections: list[ReportSection] = Field(default_factory=list)
+    references: list[Citation] = Field(default_factory=list)
+```
+**Fields**:
+- `title`: Report title
+- `sections`: List of report sections
+- `references`: List of citations
+## ReportSection
+**Module**: `src.utils.models`
+**Purpose**: Individual section in a report draft.
+```python
+class ReportSection(BaseModel):
+    title: str
+    content: str
+    order: int
+```
+**Fields**:
+- `title`: Section title
+- `content`: Section content
+- `order`: Section order number
+## ParsedQuery
+**Module**: `src.utils.models`
+**Purpose**: Parsed and improved query.
+```python
+class ParsedQuery(BaseModel):
+    original_query: str
+    improved_query: str
+    research_mode: Literal["iterative", "deep"]
+    key_entities: list[str] = Field(default_factory=list)
+    research_questions: list[str] = Field(default_factory=list)
+```
+**Fields**:
+- `original_query`: Original query string
+- `improved_query`: Refined query string
+- `research_mode`: Research mode ("iterative" or "deep")
+- `key_entities`: List of key entities
+- `research_questions`: List of research questions
+## Conversation
+**Module**: `src.utils.models`
+**Purpose**: Conversation history with iterations.
+```python
+class Conversation(BaseModel):
+    iterations: list[IterationData] = Field(default_factory=list)
+```
+**Fields**:
+- `iterations`: List of iteration data
+## IterationData
+**Module**: `src.utils.models`
+**Purpose**: Data for a single iteration.
+```python
+class IterationData(BaseModel):
+    iteration: int
+    observations: str | None = None
+    knowledge_gaps: list[str] = Field(default_factory=list)
+    tool_calls: list[dict[str, Any]] = Field(default_factory=list)
+    findings: str | None = None
+    thoughts: str | None = None
+```
+**Fields**:
+- `iteration`: Iteration number
+- `observations`: Generated observations
+- `knowledge_gaps`: Identified knowledge gaps
+- `tool_calls`: Tool calls made
+- `findings`: Findings from tools
+- `thoughts`: Agent thoughts
+## AgentEvent
+**Module**: `src.utils.models`
+**Purpose**: Event emitted during research execution.
+```python
+class AgentEvent(BaseModel):
+    type: str
+    iteration: int | None = None
+    data: dict[str, Any] = Field(default_factory=dict)
+```
+**Fields**:
+- `type`: Event type (e.g., "started", "search_complete", "complete")
+- `iteration`: Iteration number (optional)
+- `data`: Event data dictionary
+## BudgetStatus
+**Module**: `src.utils.models`
+**Purpose**: Current budget status.
+```python
+class BudgetStatus(BaseModel):
+    tokens_used: int
+    tokens_limit: int
+    time_elapsed_seconds: float
+    time_limit_seconds: float
+    iterations: int
+    iterations_limit: int
+```
+**Fields**:
+- `tokens_used`: Tokens used so far
+- `tokens_limit`: Token limit
+- `time_elapsed_seconds`: Elapsed time in seconds
+- `time_limit_seconds`: Time limit in seconds
+- `iterations`: Current iteration count
+- `iterations_limit`: Iteration limit
+## See Also
+- [Architecture - Agents](../architecture/agents.md) - How models are used
+- [Configuration](../configuration/index.md) - Model configuration

docs/api/orchestrators.md ADDED Viewed

	@@ -0,0 +1,195 @@

+# Orchestrators API Reference
+This page documents the API for DeepCritical orchestrators.
+## IterativeResearchFlow
+**Module**: `src.orchestrator.research_flow`
+**Purpose**: Single-loop research with search-judge-synthesize cycles.
+### Methods
+#### `run`
+```python
+async def run(
+    self,
+    query: str,
+    background_context: str = "",
+    max_iterations: int | None = None,
+    max_time_minutes: float | None = None,
+    token_budget: int | None = None
+) -> AsyncGenerator[AgentEvent, None]
+```
+Runs iterative research flow.
+**Parameters**:
+- `query`: Research query string
+- `background_context`: Background context (default: "")
+- `max_iterations`: Maximum iterations (default: from settings)
+- `max_time_minutes`: Maximum time in minutes (default: from settings)
+- `token_budget`: Token budget (default: from settings)
+**Yields**: `AgentEvent` objects for:
+- `started`: Research started
+- `search_complete`: Search completed
+- `judge_complete`: Evidence evaluation completed
+- `synthesizing`: Generating report
+- `complete`: Research completed
+- `error`: Error occurred
+## DeepResearchFlow
+**Module**: `src.orchestrator.research_flow`
+**Purpose**: Multi-section parallel research with planning and synthesis.
+### Methods
+#### `run`
+```python
+async def run(
+    self,
+    query: str,
+    background_context: str = "",
+    max_iterations_per_section: int | None = None,
+    max_time_minutes: float | None = None,
+    token_budget: int | None = None
+) -> AsyncGenerator[AgentEvent, None]
+```
+Runs deep research flow.
+**Parameters**:
+- `query`: Research query string
+- `background_context`: Background context (default: "")
+- `max_iterations_per_section`: Maximum iterations per section (default: from settings)
+- `max_time_minutes`: Maximum time in minutes (default: from settings)
+- `token_budget`: Token budget (default: from settings)
+**Yields**: `AgentEvent` objects for:
+- `started`: Research started
+- `planning`: Creating research plan
+- `looping`: Running parallel research loops
+- `synthesizing`: Synthesizing results
+- `complete`: Research completed
+- `error`: Error occurred
+## GraphOrchestrator
+**Module**: `src.orchestrator.graph_orchestrator`
+**Purpose**: Graph-based execution using Pydantic AI agents as nodes.
+### Methods
+#### `run`
+```python
+async def run(
+    self,
+    query: str,
+    research_mode: str = "auto",
+    use_graph: bool = True
+) -> AsyncGenerator[AgentEvent, None]
+```
+Runs graph-based research orchestration.
+**Parameters**:
+- `query`: Research query string
+- `research_mode`: Research mode ("iterative", "deep", or "auto")
+- `use_graph`: Whether to use graph execution (default: True)
+**Yields**: `AgentEvent` objects during graph execution.
+## Orchestrator Factory
+**Module**: `src.orchestrator_factory`
+**Purpose**: Factory for creating orchestrators.
+### Functions
+#### `create_orchestrator`
+```python
+def create_orchestrator(
+    search_handler: SearchHandlerProtocol,
+    judge_handler: JudgeHandlerProtocol,
+    config: dict[str, Any],
+    mode: str | None = None
+) -> Any
+```
+Creates an orchestrator instance.
+**Parameters**:
+- `search_handler`: Search handler protocol implementation
+- `judge_handler`: Judge handler protocol implementation
+- `config`: Configuration dictionary
+- `mode`: Orchestrator mode ("simple", "advanced", "magentic", or None for auto-detect)
+**Returns**: Orchestrator instance.
+**Raises**:
+- `ValueError`: If requirements not met
+**Modes**:
+- `"simple"`: Legacy orchestrator
+- `"advanced"` or `"magentic"`: Magentic orchestrator (requires OpenAI API key)
+- `None`: Auto-detect based on API key availability
+## MagenticOrchestrator
+**Module**: `src.orchestrator_magentic`
+**Purpose**: Multi-agent coordination using Microsoft Agent Framework.
+### Methods
+#### `run`
+```python
+async def run(
+    self,
+    query: str,
+    max_rounds: int = 15,
+    max_stalls: int = 3
+) -> AsyncGenerator[AgentEvent, None]
+```
+Runs Magentic orchestration.
+**Parameters**:
+- `query`: Research query string
+- `max_rounds`: Maximum rounds (default: 15)
+- `max_stalls`: Maximum stalls before reset (default: 3)
+**Yields**: `AgentEvent` objects converted from Magentic events.
+**Requirements**:
+- `agent-framework-core` package
+- OpenAI API key
+## See Also
+- [Architecture - Orchestrators](../architecture/orchestrators.md) - Architecture overview
+- [Graph Orchestration](../architecture/graph-orchestration.md) - Graph execution details

docs/api/services.md ADDED Viewed

	@@ -0,0 +1,201 @@

+# Services API Reference
+This page documents the API for DeepCritical services.
+## EmbeddingService
+**Module**: `src.services.embeddings`
+**Purpose**: Local sentence-transformers for semantic search and deduplication.
+### Methods
+#### `embed`
+```python
+async def embed(self, text: str) -> list[float]
+```
+Generates embedding for a text string.
+**Parameters**:
+- `text`: Text to embed
+**Returns**: Embedding vector as list of floats.
+#### `embed_batch`
+```python
+async def embed_batch(self, texts: list[str]) -> list[list[float]]
+```
+Generates embeddings for multiple texts.
+**Parameters**:
+- `texts`: List of texts to embed
+**Returns**: List of embedding vectors.
+#### `similarity`
+```python
+async def similarity(self, text1: str, text2: str) -> float
+```
+Calculates similarity between two texts.
+**Parameters**:
+- `text1`: First text
+- `text2`: Second text
+**Returns**: Similarity score (0.0-1.0).
+#### `find_duplicates`
+```python
+async def find_duplicates(
+    self,
+    texts: list[str],
+    threshold: float = 0.85
+) -> list[tuple[int, int]]
+```
+Finds duplicate texts based on similarity threshold.
+**Parameters**:
+- `texts`: List of texts to check
+- `threshold`: Similarity threshold (default: 0.85)
+**Returns**: List of (index1, index2) tuples for duplicate pairs.
+### Factory Function
+#### `get_embedding_service`
+```python
+@lru_cache(maxsize=1)
+def get_embedding_service() -> EmbeddingService
+```
+Returns singleton EmbeddingService instance.
+## LlamaIndexRAGService
+**Module**: `src.services.rag`
+**Purpose**: Retrieval-Augmented Generation using LlamaIndex.
+### Methods
+#### `ingest_evidence`
+```python
+async def ingest_evidence(self, evidence: list[Evidence]) -> None
+```
+Ingests evidence into RAG service.
+**Parameters**:
+- `evidence`: List of Evidence objects to ingest
+**Note**: Requires OpenAI API key for embeddings.
+#### `retrieve`
+```python
+async def retrieve(
+    self,
+    query: str,
+    top_k: int = 5
+) -> list[Document]
+```
+Retrieves relevant documents for a query.
+**Parameters**:
+- `query`: Search query string
+- `top_k`: Number of top results to return (default: 5)
+**Returns**: List of Document objects with metadata.
+#### `query`
+```python
+async def query(
+    self,
+    query: str,
+    top_k: int = 5
+) -> str
+```
+Queries RAG service and returns formatted results.
+**Parameters**:
+- `query`: Search query string
+- `top_k`: Number of top results to return (default: 5)
+**Returns**: Formatted query results as string.
+### Factory Function
+#### `get_rag_service`
+```python
+@lru_cache(maxsize=1)
+def get_rag_service() -> LlamaIndexRAGService | None
+```
+Returns singleton LlamaIndexRAGService instance, or None if OpenAI key not available.
+## StatisticalAnalyzer
+**Module**: `src.services.statistical_analyzer`
+**Purpose**: Secure execution of AI-generated statistical code.
+### Methods
+#### `analyze`
+```python
+async def analyze(
+    self,
+    hypothesis: str,
+    evidence: list[Evidence],
+    data_description: str | None = None
+) -> AnalysisResult
+```
+Analyzes a hypothesis using statistical methods.
+**Parameters**:
+- `hypothesis`: Hypothesis to analyze
+- `evidence`: List of Evidence objects
+- `data_description`: Optional data description
+**Returns**: `AnalysisResult` with:
+- `verdict`: SUPPORTED, REFUTED, or INCONCLUSIVE
+- `code`: Generated analysis code
+- `output`: Execution output
+- `error`: Error message if execution failed
+**Note**: Requires Modal credentials for sandbox execution.
+## See Also
+- [Architecture - Services](../architecture/services.md) - Architecture overview
+- [Configuration](../configuration/index.md) - Service configuration

docs/api/tools.md ADDED Viewed

	@@ -0,0 +1,235 @@

+# Tools API Reference
+This page documents the API for DeepCritical search tools.
+## SearchTool Protocol
+All tools implement the `SearchTool` protocol:
+```python
+class SearchTool(Protocol):
+    @property
+    def name(self) -> str: ...
+    async def search(
+        self,
+        query: str,
+        max_results: int = 10
+    ) -> list[Evidence]: ...
+```
+## PubMedTool
+**Module**: `src.tools.pubmed`
+**Purpose**: Search peer-reviewed biomedical literature from PubMed.
+### Properties
+#### `name`
+```python
+@property
+def name(self) -> str
+```
+Returns tool name: `"pubmed"`
+### Methods
+#### `search`
+```python
+async def search(
+    self,
+    query: str,
+    max_results: int = 10
+) -> list[Evidence]
+```
+Searches PubMed for articles.
+**Parameters**:
+- `query`: Search query string
+- `max_results`: Maximum number of results to return (default: 10)
+**Returns**: List of `Evidence` objects with PubMed articles.
+**Raises**:
+- `SearchError`: If search fails
+- `RateLimitError`: If rate limit is exceeded
+## ClinicalTrialsTool
+**Module**: `src.tools.clinicaltrials`
+**Purpose**: Search ClinicalTrials.gov for interventional studies.
+### Properties
+#### `name`
+```python
+@property
+def name(self) -> str
+```
+Returns tool name: `"clinicaltrials"`
+### Methods
+#### `search`
+```python
+async def search(
+    self,
+    query: str,
+    max_results: int = 10
+) -> list[Evidence]
+```
+Searches ClinicalTrials.gov for trials.
+**Parameters**:
+- `query`: Search query string
+- `max_results`: Maximum number of results to return (default: 10)
+**Returns**: List of `Evidence` objects with clinical trials.
+**Note**: Only returns interventional studies with status: COMPLETED, ACTIVE_NOT_RECRUITING, RECRUITING, ENROLLING_BY_INVITATION
+**Raises**:
+- `SearchError`: If search fails
+## EuropePMCTool
+**Module**: `src.tools.europepmc`
+**Purpose**: Search Europe PMC for preprints and peer-reviewed articles.
+### Properties
+#### `name`
+```python
+@property
+def name(self) -> str
+```
+Returns tool name: `"europepmc"`
+### Methods
+#### `search`
+```python
+async def search(
+    self,
+    query: str,
+    max_results: int = 10
+) -> list[Evidence]
+```
+Searches Europe PMC for articles and preprints.
+**Parameters**:
+- `query`: Search query string
+- `max_results`: Maximum number of results to return (default: 10)
+**Returns**: List of `Evidence` objects with articles/preprints.
+**Note**: Includes both preprints (marked with `[PREPRINT - Not peer-reviewed]`) and peer-reviewed articles.
+**Raises**:
+- `SearchError`: If search fails
+## RAGTool
+**Module**: `src.tools.rag_tool`
+**Purpose**: Semantic search within collected evidence.
+### Properties
+#### `name`
+```python
+@property
+def name(self) -> str
+```
+Returns tool name: `"rag"`
+### Methods
+#### `search`
+```python
+async def search(
+    self,
+    query: str,
+    max_results: int = 10
+) -> list[Evidence]
+```
+Searches collected evidence using semantic similarity.
+**Parameters**:
+- `query`: Search query string
+- `max_results`: Maximum number of results to return (default: 10)
+**Returns**: List of `Evidence` objects from collected evidence.
+**Note**: Requires evidence to be ingested into RAG service first.
+## SearchHandler
+**Module**: `src.tools.search_handler`
+**Purpose**: Orchestrates parallel searches across multiple tools.
+### Methods
+#### `search`
+```python
+async def search(
+    self,
+    query: str,
+    tools: list[SearchTool] | None = None,
+    max_results_per_tool: int = 10
+) -> SearchResult
+```
+Searches multiple tools in parallel.
+**Parameters**:
+- `query`: Search query string
+- `tools`: List of tools to use (default: all available tools)
+- `max_results_per_tool`: Maximum results per tool (default: 10)
+**Returns**: `SearchResult` with:
+- `evidence`: Aggregated list of evidence
+- `tool_results`: Results per tool
+- `total_count`: Total number of results
+**Note**: Uses `asyncio.gather()` for parallel execution. Handles tool failures gracefully.
+## See Also
+- [Architecture - Tools](../architecture/tools.md) - Architecture overview
+- [Models API](models.md) - Data models used by tools

docs/architecture/agents.md ADDED Viewed

	@@ -0,0 +1,192 @@

+# Agents Architecture
+DeepCritical uses Pydantic AI agents for all AI-powered operations. All agents follow a consistent pattern and use structured output types.
+## Agent Pattern
+All agents use the Pydantic AI `Agent` class with the following structure:
+- **System Prompt**: Module-level constant with date injection
+- **Agent Class**: `__init__(model: Any | None = None)`
+- **Main Method**: Async method (e.g., `async def evaluate()`, `async def write_report()`)
+- **Factory Function**: `def create_agent_name(model: Any | None = None) -> AgentName`
+## Model Initialization
+Agents use `get_model()` from `src/agent_factory/judges.py` if no model is provided. This supports:
+- OpenAI models
+- Anthropic models
+- HuggingFace Inference API models
+The model selection is based on the configured `LLM_PROVIDER` in settings.
+## Error Handling
+Agents return fallback values on failure rather than raising exceptions:
+- `KnowledgeGapOutput(research_complete=False, outstanding_gaps=[...])`
+- Empty strings for text outputs
+- Default structured outputs
+All errors are logged with context using structlog.
+## Input Validation
+All agents validate inputs:
+- Check that queries/inputs are not empty
+- Truncate very long inputs with warnings
+- Handle None values gracefully
+## Output Types
+Agents use structured output types from `src/utils/models.py`:
+- `KnowledgeGapOutput`: Research completeness evaluation
+- `AgentSelectionPlan`: Tool selection plan
+- `ReportDraft`: Long-form report structure
+- `ParsedQuery`: Query parsing and mode detection
+For text output (writer agents), agents return `str` directly.
+## Agent Types
+### Knowledge Gap Agent
+**File**: `src/agents/knowledge_gap.py`
+**Purpose**: Evaluates research state and identifies knowledge gaps.
+**Output**: `KnowledgeGapOutput` with:
+- `research_complete`: Boolean indicating if research is complete
+- `outstanding_gaps`: List of remaining knowledge gaps
+**Methods**:
+- `async def evaluate(query, background_context, conversation_history, iteration, time_elapsed_minutes, max_time_minutes) -> KnowledgeGapOutput`
+### Tool Selector Agent
+**File**: `src/agents/tool_selector.py`
+**Purpose**: Selects appropriate tools for addressing knowledge gaps.
+**Output**: `AgentSelectionPlan` with list of `AgentTask` objects.
+**Available Agents**:
+- `WebSearchAgent`: General web search for fresh information
+- `SiteCrawlerAgent`: Research specific entities/companies
+- `RAGAgent`: Semantic search within collected evidence
+### Writer Agent
+**File**: `src/agents/writer.py`
+**Purpose**: Generates final reports from research findings.
+**Output**: Markdown string with numbered citations.
+**Methods**:
+- `async def write_report(query, findings, output_length, output_instructions) -> str`
+**Features**:
+- Validates inputs
+- Truncates very long findings (max 50000 chars) with warning
+- Retry logic for transient failures (3 retries)
+- Citation validation before returning
+### Long Writer Agent
+**File**: `src/agents/long_writer.py`
+**Purpose**: Long-form report generation with section-by-section writing.
+**Input/Output**: Uses `ReportDraft` models.
+**Methods**:
+- `async def write_next_section(query, draft, section_title, section_content) -> LongWriterOutput`
+- `async def write_report(query, report_title, report_draft) -> str`
+**Features**:
+- Writes sections iteratively
+- Aggregates references across sections
+- Reformats section headings and references
+- Deduplicates and renumbers references
+### Proofreader Agent
+**File**: `src/agents/proofreader.py`
+**Purpose**: Proofreads and polishes report drafts.
+**Input**: `ReportDraft`
+**Output**: Polished markdown string
+**Methods**:
+- `async def proofread(query, report_title, report_draft) -> str`
+**Features**:
+- Removes duplicate content across sections
+- Adds executive summary if multiple sections
+- Preserves all references and citations
+- Improves flow and readability
+### Thinking Agent
+**File**: `src/agents/thinking.py`
+**Purpose**: Generates observations from conversation history.
+**Output**: Observation string
+**Methods**:
+- `async def generate_observations(query, background_context, conversation_history) -> str`
+### Input Parser Agent
+**File**: `src/agents/input_parser.py`
+**Purpose**: Parses and improves user queries, detects research mode.
+**Output**: `ParsedQuery` with:
+- `original_query`: Original query string
+- `improved_query`: Refined query string
+- `research_mode`: "iterative" or "deep"
+- `key_entities`: List of key entities
+- `research_questions`: List of research questions
+## Factory Functions
+All agents have factory functions in `src/agent_factory/agents.py`:
+```python
+def create_knowledge_gap_agent(model: Any | None = None) -> KnowledgeGapAgent
+def create_tool_selector_agent(model: Any | None = None) -> ToolSelectorAgent
+def create_writer_agent(model: Any | None = None) -> WriterAgent
+# ... etc
+```
+Factory functions:
+- Use `get_model()` if no model provided
+- Raise `ConfigurationError` if creation fails
+- Log agent creation
+## See Also
+- [Orchestrators](orchestrators.md) - How agents are orchestrated
+- [API Reference - Agents](../api/agents.md) - API documentation
+- [Contributing - Code Style](../contributing/code-style.md) - Development guidelines

docs/architecture/middleware.md ADDED Viewed

	@@ -0,0 +1,142 @@

+# Middleware Architecture
+DeepCritical uses middleware for state management, budget tracking, and workflow coordination.
+## State Management
+### WorkflowState
+**File**: `src/middleware/state_machine.py`
+**Purpose**: Thread-safe state management for research workflows
+**Implementation**: Uses `ContextVar` for thread-safe isolation
+**State Components**:
+- `evidence: list[Evidence]`: Collected evidence from searches
+- `conversation: Conversation`: Iteration history (gaps, tool calls, findings, thoughts)
+- `embedding_service: Any`: Embedding service for semantic search
+**Methods**:
+- `add_evidence(evidence: Evidence)`: Adds evidence with URL-based deduplication
+- `async search_related(query: str, top_k: int = 5) -> list[Evidence]`: Semantic search
+**Initialization**:
+```python
+from src.middleware.state_machine import init_workflow_state
+init_workflow_state(embedding_service)
+```
+**Access**:
+```python
+from src.middleware.state_machine import get_workflow_state
+state = get_workflow_state()  # Auto-initializes if missing
+```
+## Workflow Manager
+**File**: `src/middleware/workflow_manager.py`
+**Purpose**: Coordinates parallel research loops
+**Methods**:
+- `add_loop(loop: ResearchLoop)`: Add a research loop to manage
+- `async run_loops_parallel() -> list[ResearchLoop]`: Run all loops in parallel
+- `update_loop_status(loop_id: str, status: str)`: Update loop status
+- `sync_loop_evidence_to_state()`: Synchronize evidence from loops to global state
+**Features**:
+- Uses `asyncio.gather()` for parallel execution
+- Handles errors per loop (doesn't fail all if one fails)
+- Tracks loop status: `pending`, `running`, `completed`, `failed`, `cancelled`
+- Evidence deduplication across parallel loops
+**Usage**:
+```python
+from src.middleware.workflow_manager import WorkflowManager
+manager = WorkflowManager()
+manager.add_loop(loop1)
+manager.add_loop(loop2)
+completed_loops = await manager.run_loops_parallel()
+```
+## Budget Tracker
+**File**: `src/middleware/budget_tracker.py`
+**Purpose**: Tracks and enforces resource limits
+**Budget Components**:
+- **Tokens**: LLM token usage
+- **Time**: Elapsed time in seconds
+- **Iterations**: Number of iterations
+**Methods**:
+- `create_budget(token_limit, time_limit_seconds, iterations_limit) -> BudgetStatus`
+- `add_tokens(tokens: int)`: Add token usage
+- `start_timer()`: Start time tracking
+- `update_timer()`: Update elapsed time
+- `increment_iteration()`: Increment iteration count
+- `check_budget() -> BudgetStatus`: Check current budget status
+- `can_continue() -> bool`: Check if research can continue
+**Token Estimation**:
+- `estimate_tokens(text: str) -> int`: ~4 chars per token
+- `estimate_llm_call_tokens(prompt: str, response: str) -> int`: Estimate LLM call tokens
+**Usage**:
+```python
+from src.middleware.budget_tracker import BudgetTracker
+tracker = BudgetTracker()
+budget = tracker.create_budget(
+    token_limit=100000,
+    time_limit_seconds=600,
+    iterations_limit=10
+)
+tracker.start_timer()
+# ... research operations ...
+if not tracker.can_continue():
+    # Budget exceeded, stop research
+    pass
+```
+## Models
+All middleware models are defined in `src/utils/models.py`:
+- `IterationData`: Data for a single iteration
+- `Conversation`: Conversation history with iterations
+- `ResearchLoop`: Research loop state and configuration
+- `BudgetStatus`: Current budget status
+## Thread Safety
+All middleware components use `ContextVar` for thread-safe isolation:
+- Each request/thread has its own workflow state
+- No global mutable state
+- Safe for concurrent requests
+## See Also
+- [Orchestrators](orchestrators.md) - How middleware is used in orchestration
+- [API Reference - Orchestrators](../api/orchestrators.md) - API documentation
+- [Contributing - Code Style](../contributing/code-style.md) - Development guidelines

docs/architecture/services.md ADDED Viewed

	@@ -0,0 +1,142 @@

+# Services Architecture
+DeepCritical provides several services for embeddings, RAG, and statistical analysis.
+## Embedding Service
+**File**: `src/services/embeddings.py`
+**Purpose**: Local sentence-transformers for semantic search and deduplication
+**Features**:
+- **No API Key Required**: Uses local sentence-transformers models
+- **Async-Safe**: All operations use `run_in_executor()` to avoid blocking
+- **ChromaDB Storage**: Vector storage for embeddings
+- **Deduplication**: 0.85 similarity threshold (85% similarity = duplicate)
+**Model**: Configurable via `settings.local_embedding_model` (default: `all-MiniLM-L6-v2`)
+**Methods**:
+- `async def embed(text: str) -> list[float]`: Generate embeddings
+- `async def embed_batch(texts: list[str]) -> list[list[float]]`: Batch embedding
+- `async def similarity(text1: str, text2: str) -> float`: Calculate similarity
+- `async def find_duplicates(texts: list[str], threshold: float = 0.85) -> list[tuple[int, int]]`: Find duplicates
+**Usage**:
+```python
+from src.services.embeddings import get_embedding_service
+service = get_embedding_service()
+embedding = await service.embed("text to embed")
+```
+## LlamaIndex RAG Service
+**File**: `src/services/rag.py`
+**Purpose**: Retrieval-Augmented Generation using LlamaIndex
+**Features**:
+- **OpenAI Embeddings**: Requires `OPENAI_API_KEY`
+- **ChromaDB Storage**: Vector database for document storage
+- **Metadata Preservation**: Preserves source, title, URL, date, authors
+- **Lazy Initialization**: Graceful fallback if OpenAI key not available
+**Methods**:
+- `async def ingest_evidence(evidence: list[Evidence]) -> None`: Ingest evidence into RAG
+- `async def retrieve(query: str, top_k: int = 5) -> list[Document]`: Retrieve relevant documents
+- `async def query(query: str, top_k: int = 5) -> str`: Query with RAG
+**Usage**:
+```python
+from src.services.rag import get_rag_service
+service = get_rag_service()
+if service:
+    documents = await service.retrieve("query", top_k=5)
+```
+## Statistical Analyzer
+**File**: `src/services/statistical_analyzer.py`
+**Purpose**: Secure execution of AI-generated statistical code
+**Features**:
+- **Modal Sandbox**: Secure, isolated execution environment
+- **Code Generation**: Generates Python code via LLM
+- **Library Pinning**: Version-pinned libraries in `SANDBOX_LIBRARIES`
+- **Network Isolation**: `block_network=True` by default
+**Libraries Available**:
+- pandas, numpy, scipy
+- matplotlib, scikit-learn
+- statsmodels
+**Output**: `AnalysisResult` with:
+- `verdict`: SUPPORTED, REFUTED, or INCONCLUSIVE
+- `code`: Generated analysis code
+- `output`: Execution output
+- `error`: Error message if execution failed
+**Usage**:
+```python
+from src.services.statistical_analyzer import StatisticalAnalyzer
+analyzer = StatisticalAnalyzer()
+result = await analyzer.analyze(
+    hypothesis="Metformin reduces cancer risk",
+    evidence=evidence_list
+)
+```
+## Singleton Pattern
+All services use the singleton pattern with `@lru_cache(maxsize=1)`:
+```python
+@lru_cache(maxsize=1)
+def get_embedding_service() -> EmbeddingService:
+    return EmbeddingService()
+```
+This ensures:
+- Single instance per process
+- Lazy initialization
+- No dependencies required at import time
+## Service Availability
+Services check availability before use:
+```python
+from src.utils.config import settings
+if settings.modal_available:
+    # Use Modal sandbox
+    pass
+if settings.has_openai_key:
+    # Use OpenAI embeddings for RAG
+    pass
+```
+## See Also
+- [Tools](tools.md) - How services are used by search tools
+- [API Reference - Services](../api/services.md) - API documentation
+- [Configuration](../configuration/index.md) - Service configuration

docs/architecture/tools.md ADDED Viewed

	@@ -0,0 +1,175 @@

+# Tools Architecture
+DeepCritical implements a protocol-based search tool system for retrieving evidence from multiple sources.
+## SearchTool Protocol
+All tools implement the `SearchTool` protocol from `src/tools/base.py`:
+```python
+class SearchTool(Protocol):
+    @property
+    def name(self) -> str: ...
+    async def search(
+        self,
+        query: str,
+        max_results: int = 10
+    ) -> list[Evidence]: ...
+```
+## Rate Limiting
+All tools use the `@retry` decorator from tenacity:
+```python
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(...)
+)
+async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+    # Implementation
+```
+Tools with API rate limits implement `_rate_limit()` method and use shared rate limiters from `src/tools/rate_limiter.py`.
+## Error Handling
+Tools raise custom exceptions:
+- `SearchError`: General search failures
+- `RateLimitError`: Rate limit exceeded
+Tools handle HTTP errors (429, 500, timeout) and return empty lists on non-critical errors (with warning logs).
+## Query Preprocessing
+Tools use `preprocess_query()` from `src/tools/query_utils.py` to:
+- Remove noise from queries
+- Expand synonyms
+- Normalize query format
+## Evidence Conversion
+All tools convert API responses to `Evidence` objects with:
+- `Citation`: Title, URL, date, authors
+- `content`: Evidence text
+- `relevance_score`: 0.0-1.0 relevance score
+- `metadata`: Additional metadata
+Missing fields are handled gracefully with defaults.
+## Tool Implementations
+### PubMed Tool
+**File**: `src/tools/pubmed.py`
+**API**: NCBI E-utilities (ESearch → EFetch)
+**Rate Limiting**:
+- 0.34s between requests (3 req/sec without API key)
+- 0.1s between requests (10 req/sec with NCBI API key)
+**Features**:
+- XML parsing with `xmltodict`
+- Handles single vs. multiple articles
+- Query preprocessing
+- Evidence conversion with metadata extraction
+### ClinicalTrials Tool
+**File**: `src/tools/clinicaltrials.py`
+**API**: ClinicalTrials.gov API v2
+**Important**: Uses `requests` library (NOT httpx) because WAF blocks httpx TLS fingerprint.
+**Execution**: Runs in thread pool: `await asyncio.to_thread(requests.get, ...)`
+**Filtering**:
+- Only interventional studies
+- Status: `COMPLETED`, `ACTIVE_NOT_RECRUITING`, `RECRUITING`, `ENROLLING_BY_INVITATION`
+**Features**:
+- Parses nested JSON structure
+- Extracts trial metadata
+- Evidence conversion
+### Europe PMC Tool
+**File**: `src/tools/europepmc.py`
+**API**: Europe PMC REST API
+**Features**:
+- Handles preprint markers: `[PREPRINT - Not peer-reviewed]`
+- Builds URLs from DOI or PMID
+- Checks `pubTypeList` for preprint detection
+- Includes both preprints and peer-reviewed articles
+### RAG Tool
+**File**: `src/tools/rag_tool.py`
+**Purpose**: Semantic search within collected evidence
+**Implementation**: Wraps `LlamaIndexRAGService`
+**Features**:
+- Returns Evidence from RAG results
+- Handles evidence ingestion
+- Semantic similarity search
+- Metadata preservation
+### Search Handler
+**File**: `src/tools/search_handler.py`
+**Purpose**: Orchestrates parallel searches across multiple tools
+**Features**:
+- Uses `asyncio.gather()` with `return_exceptions=True`
+- Aggregates results into `SearchResult`
+- Handles tool failures gracefully
+- Deduplicates results by URL
+## Tool Registration
+Tools are registered in the search handler:
+```python
+from src.tools.pubmed import PubMedTool
+from src.tools.clinicaltrials import ClinicalTrialsTool
+from src.tools.europepmc import EuropePMCTool
+search_handler = SearchHandler(
+    tools=[
+        PubMedTool(),
+        ClinicalTrialsTool(),
+        EuropePMCTool(),
+    ]
+)
+```
+## See Also
+- [Services](services.md) - RAG and embedding services
+- [API Reference - Tools](../api/tools.md) - API documentation
+- [Contributing - Implementation Patterns](../contributing/implementation-patterns.md) - Development guidelines

docs/contributing/code-quality.md ADDED Viewed

	@@ -0,0 +1,81 @@

+# Code Quality & Documentation
+This document outlines code quality standards and documentation requirements.
+## Linting
+- Ruff with 100-char line length
+- Ignore rules documented in `pyproject.toml`:
+  - `PLR0913`: Too many arguments (agents need many params)
+  - `PLR0912`: Too many branches (complex orchestrator logic)
+  - `PLR0911`: Too many return statements (complex agent logic)
+  - `PLR2004`: Magic values (statistical constants)
+  - `PLW0603`: Global statement (singleton pattern)
+  - `PLC0415`: Lazy imports for optional dependencies
+## Type Checking
+- `mypy --strict` compliance
+- `ignore_missing_imports = true` (for optional dependencies)
+- Exclude: `reference_repos/`, `examples/`
+- All functions must have complete type annotations
+## Pre-commit
+- Run `make check` before committing
+- Must pass: lint + typecheck + test-cov
+- Pre-commit hooks installed via `make install`
+## Documentation
+### Docstrings
+- Google-style docstrings for all public functions
+- Include Args, Returns, Raises sections
+- Use type hints in docstrings only if needed for clarity
+Example:
+```python
+async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+    """Search PubMed and return evidence.
+    Args:
+        query: The search query string
+        max_results: Maximum number of results to return
+    Returns:
+        List of Evidence objects
+    Raises:
+        SearchError: If the search fails
+        RateLimitError: If we hit rate limits
+    """
+```
+### Code Comments
+- Explain WHY, not WHAT
+- Document non-obvious patterns (e.g., why `requests` not `httpx` for ClinicalTrials)
+- Mark critical sections: `# CRITICAL: ...`
+- Document rate limiting rationale
+- Explain async patterns when non-obvious
+## See Also
+- [Code Style](code-style.md) - Code style guidelines
+- [Testing](testing.md) - Testing guidelines

docs/contributing/code-style.md ADDED Viewed

	@@ -0,0 +1,61 @@

+# Code Style & Conventions
+This document outlines the code style and conventions for DeepCritical.
+## Type Safety
+- **ALWAYS** use type hints for all function parameters and return types
+- Use `mypy --strict` compliance (no `Any` unless absolutely necessary)
+- Use `TYPE_CHECKING` imports for circular dependencies:
+```python
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from src.services.embeddings import EmbeddingService
+```
+## Pydantic Models
+- All data exchange uses Pydantic models (`src/utils/models.py`)
+- Models are frozen (`model_config = {"frozen": True}`) for immutability
+- Use `Field()` with descriptions for all model fields
+- Validate with `ge=`, `le=`, `min_length=`, `max_length=` constraints
+## Async Patterns
+- **ALL** I/O operations must be async (`async def`, `await`)
+- Use `asyncio.gather()` for parallel operations
+- CPU-bound work (embeddings, parsing) must use `run_in_executor()`:
+```python
+loop = asyncio.get_running_loop()
+result = await loop.run_in_executor(None, cpu_bound_function, args)
+```
+- Never block the event loop with synchronous I/O
+## Common Pitfalls
+1. **Blocking the event loop**: Never use sync I/O in async functions
+2. **Missing type hints**: All functions must have complete type annotations
+3. **Global mutable state**: Use ContextVar or pass via parameters
+4. **Import errors**: Lazy-load optional dependencies (magentic, modal, embeddings)
+## See Also
+- [Error Handling](error-handling.md) - Error handling guidelines
+- [Implementation Patterns](implementation-patterns.md) - Common patterns

docs/contributing/error-handling.md ADDED Viewed

	@@ -0,0 +1,69 @@

+# Error Handling & Logging
+This document outlines error handling and logging conventions for DeepCritical.
+## Exception Hierarchy
+Use custom exception hierarchy (`src/utils/exceptions.py`):
+- `DeepCriticalError` (base)
+- `SearchError` → `RateLimitError`
+- `JudgeError`
+- `ConfigurationError`
+## Error Handling Rules
+- Always chain exceptions: `raise SearchError(...) from e`
+- Log errors with context using `structlog`:
+```python
+logger.error("Operation failed", error=str(e), context=value)
+```
+- Never silently swallow exceptions
+- Provide actionable error messages
+## Logging
+- Use `structlog` for all logging (NOT `print` or `logging`)
+- Import: `import structlog; logger = structlog.get_logger()`
+- Log with structured data: `logger.info("event", key=value)`
+- Use appropriate levels: DEBUG, INFO, WARNING, ERROR
+## Logging Examples
+```python
+logger.info("Starting search", query=query, tools=[t.name for t in tools])
+logger.warning("Search tool failed", tool=tool.name, error=str(result))
+logger.error("Assessment failed", error=str(e))
+```
+## Error Chaining
+Always preserve exception context:
+```python
+try:
+    result = await api_call()
+except httpx.HTTPError as e:
+    raise SearchError(f"API call failed: {e}") from e
+```
+## See Also
+- [Code Style](code-style.md) - Code style guidelines
+- [Testing](testing.md) - Testing guidelines

docs/contributing/implementation-patterns.md ADDED Viewed

	@@ -0,0 +1,84 @@

+# Implementation Patterns
+This document outlines common implementation patterns used in DeepCritical.
+## Search Tools
+All tools implement `SearchTool` protocol (`src/tools/base.py`):
+- Must have `name` property
+- Must implement `async def search(query, max_results) -> list[Evidence]`
+- Use `@retry` decorator from tenacity for resilience
+- Rate limiting: Implement `_rate_limit()` for APIs with limits (e.g., PubMed)
+- Error handling: Raise `SearchError` or `RateLimitError` on failures
+Example pattern:
+```python
+class MySearchTool:
+    @property
+    def name(self) -> str:
+        return "mytool"
+    @retry(stop=stop_after_attempt(3), wait=wait_exponential(...))
+    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
+        # Implementation
+        return evidence_list
+```
+## Judge Handlers
+- Implement `JudgeHandlerProtocol` (`async def assess(question, evidence) -> JudgeAssessment`)
+- Use pydantic-ai `Agent` with `output_type=JudgeAssessment`
+- System prompts in `src/prompts/judge.py`
+- Support fallback handlers: `MockJudgeHandler`, `HFInferenceJudgeHandler`
+- Always return valid `JudgeAssessment` (never raise exceptions)
+## Agent Factory Pattern
+- Use factory functions for creating agents (`src/agent_factory/`)
+- Lazy initialization for optional dependencies (e.g., embeddings, Modal)
+- Check requirements before initialization:
+```python
+def check_magentic_requirements() -> None:
+    if not settings.has_openai_key:
+        raise ConfigurationError("Magentic requires OpenAI")
+```
+## State Management
+- **Magentic Mode**: Use `ContextVar` for thread-safe state (`src/agents/state.py`)
+- **Simple Mode**: Pass state via function parameters
+- Never use global mutable state (except singletons via `@lru_cache`)
+## Singleton Pattern
+Use `@lru_cache(maxsize=1)` for singletons:
+```python
+@lru_cache(maxsize=1)
+def get_embedding_service() -> EmbeddingService:
+    return EmbeddingService()
+```
+- Lazy initialization to avoid requiring dependencies at import time
+## See Also
+- [Code Style](code-style.md) - Code style guidelines
+- [Error Handling](error-handling.md) - Error handling guidelines

docs/contributing/index.md ADDED Viewed

	@@ -0,0 +1,163 @@

+# Contributing to DeepCritical
+Thank you for your interest in contributing to DeepCritical! This guide will help you get started.
+## Git Workflow
+- `main`: Production-ready (GitHub)
+- `dev`: Development integration (GitHub)
+- Use feature branches: `yourname-dev`
+- **NEVER** push directly to `main` or `dev` on HuggingFace
+- GitHub is source of truth; HuggingFace is for deployment
+## Development Commands
+```bash
+make install      # Install dependencies + pre-commit
+make check        # Lint + typecheck + test (MUST PASS)
+make test         # Run unit tests
+make lint         # Run ruff
+make format       # Format with ruff
+make typecheck    # Run mypy
+make test-cov     # Test with coverage
+```
+## Getting Started
+1. **Fork the repository** on GitHub
+2. **Clone your fork**:
+   ```bash
+   git clone https://github.com/yourusername/GradioDemo.git
+   cd GradioDemo
+   ```
+3. **Install dependencies**:
+   ```bash
+   make install
+   ```
+4. **Create a feature branch**:
+   ```bash
+   git checkout -b yourname-feature-name
+   ```
+5. **Make your changes** following the guidelines below
+6. **Run checks**:
+   ```bash
+   make check
+   ```
+7. **Commit and push**:
+   ```bash
+   git commit -m "Description of changes"
+   git push origin yourname-feature-name
+   ```
+8. **Create a pull request** on GitHub
+## Development Guidelines
+### Code Style
+- Follow [Code Style Guidelines](code-style.md)
+- All code must pass `mypy --strict`
+- Use `ruff` for linting and formatting
+- Line length: 100 characters
+### Error Handling
+- Follow [Error Handling Guidelines](error-handling.md)
+- Always chain exceptions: `raise SearchError(...) from e`
+- Use structured logging with `structlog`
+- Never silently swallow exceptions
+### Testing
+- Follow [Testing Guidelines](testing.md)
+- Write tests before implementation (TDD)
+- Aim for >80% coverage on critical paths
+- Use markers: `unit`, `integration`, `slow`
+### Implementation Patterns
+- Follow [Implementation Patterns](implementation-patterns.md)
+- Use factory functions for agent/tool creation
+- Implement protocols for extensibility
+- Use singleton pattern with `@lru_cache(maxsize=1)`
+### Prompt Engineering
+- Follow [Prompt Engineering Guidelines](prompt-engineering.md)
+- Always validate citations
+- Use diverse evidence selection
+- Never trust LLM-generated citations without validation
+### Code Quality
+- Follow [Code Quality Guidelines](code-quality.md)
+- Google-style docstrings for all public functions
+- Explain WHY, not WHAT in comments
+- Mark critical sections: `# CRITICAL: ...`
+## MCP Integration
+### MCP Tools
+- Functions in `src/mcp_tools.py` for Claude Desktop
+- Full type hints required
+- Google-style docstrings with Args/Returns sections
+- Formatted string returns (markdown)
+### Gradio MCP Server
+- Enable with `mcp_server=True` in `demo.launch()`
+- Endpoint: `/gradio_api/mcp/`
+- Use `ssr_mode=False` to fix hydration issues in HF Spaces
+## Common Pitfalls
+1. **Blocking the event loop**: Never use sync I/O in async functions
+2. **Missing type hints**: All functions must have complete type annotations
+3. **Hallucinated citations**: Always validate references
+4. **Global mutable state**: Use ContextVar or pass via parameters
+5. **Import errors**: Lazy-load optional dependencies (magentic, modal, embeddings)
+6. **Rate limiting**: Always implement for external APIs
+7. **Error chaining**: Always use `from e` when raising exceptions
+## Key Principles
+1. **Type Safety First**: All code must pass `mypy --strict`
+2. **Async Everything**: All I/O must be async
+3. **Test-Driven**: Write tests before implementation
+4. **No Hallucinations**: Validate all citations
+5. **Graceful Degradation**: Support free tier (HF Inference) when no API keys
+6. **Lazy Loading**: Don't require optional dependencies at import time
+7. **Structured Logging**: Use structlog, never print()
+8. **Error Chaining**: Always preserve exception context
+## Pull Request Process
+1. Ensure all checks pass: `make check`
+2. Update documentation if needed
+3. Add tests for new features
+4. Update CHANGELOG if applicable
+5. Request review from maintainers
+6. Address review feedback
+7. Wait for approval before merging
+## Questions?
+- Open an issue on GitHub
+- Check existing documentation
+- Review code examples in the codebase
+Thank you for contributing to DeepCritical!

docs/contributing/prompt-engineering.md ADDED Viewed

	@@ -0,0 +1,69 @@

+# Prompt Engineering & Citation Validation
+This document outlines prompt engineering guidelines and citation validation rules.
+## Judge Prompts
+- System prompt in `src/prompts/judge.py`
+- Format evidence with truncation (1500 chars per item)
+- Handle empty evidence case separately
+- Always request structured JSON output
+- Use `format_user_prompt()` and `format_empty_evidence_prompt()` helpers
+## Hypothesis Prompts
+- Use diverse evidence selection (MMR algorithm)
+- Sentence-aware truncation (`truncate_at_sentence()`)
+- Format: Drug → Target → Pathway → Effect
+- System prompt emphasizes mechanistic reasoning
+- Use `format_hypothesis_prompt()` with embeddings for diversity
+## Report Prompts
+- Include full citation details for validation
+- Use diverse evidence selection (n=20)
+- **CRITICAL**: Emphasize citation validation rules
+- Format hypotheses with support/contradiction counts
+- System prompt includes explicit JSON structure requirements
+## Citation Validation
+- **ALWAYS** validate references before returning reports
+- Use `validate_references()` from `src/utils/citation_validator.py`
+- Remove hallucinated citations (URLs not in evidence)
+- Log warnings for removed citations
+- Never trust LLM-generated citations without validation
+## Citation Validation Rules
+1. Every reference URL must EXACTLY match a provided evidence URL
+2. Do NOT invent, fabricate, or hallucinate any references
+3. Do NOT modify paper titles, authors, dates, or URLs
+4. If unsure about a citation, OMIT it rather than guess
+5. Copy URLs exactly as provided - do not create similar-looking URLs
+## Evidence Selection
+- Use `select_diverse_evidence()` for MMR-based selection
+- Balance relevance vs diversity (lambda=0.7 default)
+- Sentence-aware truncation preserves meaning
+- Limit evidence per prompt to avoid context overflow
+## See Also
+- [Code Quality](code-quality.md) - Code quality guidelines
+- [Error Handling](error-handling.md) - Error handling guidelines

docs/contributing/testing.md ADDED Viewed

	@@ -0,0 +1,65 @@

+# Testing Requirements
+This document outlines testing requirements and guidelines for DeepCritical.
+## Test Structure
+- Unit tests in `tests/unit/` (mocked, fast)
+- Integration tests in `tests/integration/` (real APIs, marked `@pytest.mark.integration`)
+- Use markers: `unit`, `integration`, `slow`
+## Mocking
+- Use `respx` for httpx mocking
+- Use `pytest-mock` for general mocking
+- Mock LLM calls in unit tests (use `MockJudgeHandler`)
+- Fixtures in `tests/conftest.py`: `mock_httpx_client`, `mock_llm_response`
+## TDD Workflow
+1. Write failing test in `tests/unit/`
+2. Implement in `src/`
+3. Ensure test passes
+4. Run `make check` (lint + typecheck + test)
+## Test Examples
+```python
+@pytest.mark.unit
+async def test_pubmed_search(mock_httpx_client):
+    tool = PubMedTool()
+    results = await tool.search("metformin", max_results=5)
+    assert len(results) > 0
+    assert all(isinstance(r, Evidence) for r in results)
+@pytest.mark.integration
+async def test_real_pubmed_search():
+    tool = PubMedTool()
+    results = await tool.search("metformin", max_results=3)
+    assert len(results) <= 3
+```
+## Test Coverage
+- Run `make test-cov` for coverage report
+- Aim for >80% coverage on critical paths
+- Exclude: `__init__.py`, `TYPE_CHECKING` blocks
+## See Also
+- [Code Style](code-style.md) - Code style guidelines
+- [Implementation Patterns](implementation-patterns.md) - Common patterns

docs/getting-started/examples.md ADDED Viewed

	@@ -0,0 +1,209 @@

+# Examples
+This page provides examples of using DeepCritical for various research tasks.
+## Basic Research Query
+### Example 1: Drug Information
+**Query**:
+```
+What are the latest treatments for Alzheimer's disease?
+```
+**What DeepCritical Does**:
+1. Searches PubMed for recent papers
+2. Searches ClinicalTrials.gov for active trials
+3. Evaluates evidence quality
+4. Synthesizes findings into a comprehensive report
+### Example 2: Clinical Trial Search
+**Query**:
+```
+What clinical trials are investigating metformin for cancer prevention?
+```
+**What DeepCritical Does**:
+1. Searches ClinicalTrials.gov for relevant trials
+2. Searches PubMed for supporting literature
+3. Provides trial details and status
+4. Summarizes findings
+## Advanced Research Queries
+### Example 3: Comprehensive Review
+**Query**:
+```
+Review the evidence for using metformin as an anti-aging intervention,
+including clinical trials, mechanisms of action, and safety profile.
+```
+**What DeepCritical Does**:
+1. Uses deep research mode (multi-section)
+2. Searches multiple sources in parallel
+3. Generates sections on:
+   - Clinical trials
+   - Mechanisms of action
+   - Safety profile
+4. Synthesizes comprehensive report
+### Example 4: Hypothesis Testing
+**Query**:
+```
+Test the hypothesis that regular exercise reduces Alzheimer's disease risk.
+```
+**What DeepCritical Does**:
+1. Generates testable hypotheses
+2. Searches for supporting/contradicting evidence
+3. Performs statistical analysis (if Modal configured)
+4. Provides verdict: SUPPORTED, REFUTED, or INCONCLUSIVE
+## MCP Tool Examples
+### Using search_pubmed
+```
+Search PubMed for "CRISPR gene editing cancer therapy"
+```
+### Using search_clinical_trials
+```
+Find active clinical trials for "diabetes type 2 treatment"
+```
+### Using search_all
+```
+Search all sources for "COVID-19 vaccine side effects"
+```
+### Using analyze_hypothesis
+```
+Analyze whether vitamin D supplementation reduces COVID-19 severity
+```
+## Code Examples
+### Python API Usage
+```python
+from src.orchestrator_factory import create_orchestrator
+from src.tools.search_handler import SearchHandler
+from src.agent_factory.judges import create_judge_handler
+# Create orchestrator
+search_handler = SearchHandler()
+judge_handler = create_judge_handler()
+orchestrator = create_orchestrator(
+    search_handler=search_handler,
+    judge_handler=judge_handler,
+    config={},
+    mode="advanced"
+)
+# Run research query
+query = "What are the latest treatments for Alzheimer's disease?"
+async for event in orchestrator.run(query):
+    print(f"Event: {event.type} - {event.data}")
+```
+### Gradio UI Integration
+```python
+import gradio as gr
+from src.app import create_research_interface
+# Create interface
+interface = create_research_interface()
+# Launch
+interface.launch(server_name="0.0.0.0", server_port=7860)
+```
+## Research Patterns
+### Iterative Research
+Single-loop research with search-judge-synthesize cycles:
+```python
+from src.orchestrator.research_flow import IterativeResearchFlow
+flow = IterativeResearchFlow(
+    search_handler=search_handler,
+    judge_handler=judge_handler,
+    use_graph=False
+)
+async for event in flow.run(query):
+    # Handle events
+    pass
+```
+### Deep Research
+Multi-section parallel research:
+```python
+from src.orchestrator.research_flow import DeepResearchFlow
+flow = DeepResearchFlow(
+    search_handler=search_handler,
+    judge_handler=judge_handler,
+    use_graph=True
+)
+async for event in flow.run(query):
+    # Handle events
+    pass
+```
+## Configuration Examples
+### Basic Configuration
+```bash
+# .env file
+LLM_PROVIDER=openai
+OPENAI_API_KEY=your_key_here
+MAX_ITERATIONS=10
+```
+### Advanced Configuration
+```bash
+# .env file
+LLM_PROVIDER=anthropic
+ANTHROPIC_API_KEY=your_key_here
+EMBEDDING_PROVIDER=local
+WEB_SEARCH_PROVIDER=duckduckgo
+MAX_ITERATIONS=20
+DEFAULT_TOKEN_LIMIT=200000
+USE_GRAPH_EXECUTION=true
+```
+## Next Steps
+- Read the [Configuration Guide](../configuration/index.md) for all options
+- Explore the [Architecture Documentation](../architecture/graph-orchestration.md)
+- Check out the [API Reference](../api/agents.md) for programmatic usage

docs/getting-started/installation.md ADDED Viewed

	@@ -0,0 +1,148 @@

+# Installation
+This guide will help you install and set up DeepCritical on your system.
+## Prerequisites
+- Python 3.11 or higher
+- `uv` package manager (recommended) or `pip`
+- At least one LLM API key (OpenAI, Anthropic, or HuggingFace)
+## Installation Steps
+### 1. Install uv (Recommended)
+`uv` is a fast Python package installer and resolver. Install it with:
+```bash
+pip install uv
+```
+### 2. Clone the Repository
+```bash
+git clone https://github.com/DeepCritical/GradioDemo.git
+cd GradioDemo
+```
+### 3. Install Dependencies
+Using `uv` (recommended):
+```bash
+uv sync
+```
+Using `pip`:
+```bash
+pip install -e .
+```
+### 4. Install Optional Dependencies
+For embeddings support (local sentence-transformers):
+```bash
+uv sync --extra embeddings
+```
+For Modal sandbox execution:
+```bash
+uv sync --extra modal
+```
+For Magentic orchestration:
+```bash
+uv sync --extra magentic
+```
+Install all extras:
+```bash
+uv sync --all-extras
+```
+### 5. Configure Environment Variables
+Create a `.env` file in the project root:
+```bash
+# Required: At least one LLM provider
+LLM_PROVIDER=openai  # or "anthropic" or "huggingface"
+OPENAI_API_KEY=your_openai_api_key_here
+# Optional: Other services
+NCBI_API_KEY=your_ncbi_api_key_here  # For higher PubMed rate limits
+MODAL_TOKEN_ID=your_modal_token_id
+MODAL_TOKEN_SECRET=your_modal_token_secret
+```
+See the [Configuration Guide](../configuration/index.md) for all available options.
+### 6. Verify Installation
+Run the application:
+```bash
+uv run gradio run src/app.py
+```
+Open your browser to `http://localhost:7860` to verify the installation.
+## Development Setup
+For development, install dev dependencies:
+```bash
+uv sync --all-extras --dev
+```
+Install pre-commit hooks:
+```bash
+uv run pre-commit install
+```
+## Troubleshooting
+### Common Issues
+**Import Errors**:
+- Ensure you've installed all required dependencies
+- Check that Python 3.11+ is being used
+**API Key Errors**:
+- Verify your `.env` file is in the project root
+- Check that API keys are correctly formatted
+- Ensure at least one LLM provider is configured
+**Module Not Found**:
+- Run `uv sync` or `pip install -e .` again
+- Check that you're in the correct virtual environment
+**Port Already in Use**:
+- Change the port in `src/app.py` or use environment variable
+- Kill the process using port 7860
+## Next Steps
+- Read the [Quick Start Guide](quick-start.md)
+- Learn about [MCP Integration](mcp-integration.md)
+- Explore [Examples](examples.md)

docs/getting-started/mcp-integration.md ADDED Viewed

	@@ -0,0 +1,215 @@

+# MCP Integration
+DeepCritical exposes a Model Context Protocol (MCP) server, allowing you to use its search tools directly from Claude Desktop or other MCP clients.
+## What is MCP?
+The Model Context Protocol (MCP) is a standard for connecting AI assistants to external tools and data sources. DeepCritical implements an MCP server that exposes its search capabilities as MCP tools.
+## MCP Server URL
+When running locally:
+```
+http://localhost:7860/gradio_api/mcp/
+```
+## Claude Desktop Configuration
+### 1. Locate Configuration File
+**macOS**:
+```
+~/Library/Application Support/Claude/claude_desktop_config.json
+```
+**Windows**:
+```
+%APPDATA%\Claude\claude_desktop_config.json
+```
+**Linux**:
+```
+~/.config/Claude/claude_desktop_config.json
+```
+### 2. Add DeepCritical Server
+Edit `claude_desktop_config.json` and add:
+```json
+{
+  "mcpServers": {
+    "deepcritical": {
+      "url": "http://localhost:7860/gradio_api/mcp/"
+    }
+  }
+}
+```
+### 3. Restart Claude Desktop
+Close and restart Claude Desktop for changes to take effect.
+### 4. Verify Connection
+In Claude Desktop, you should see DeepCritical tools available:
+- `search_pubmed`
+- `search_clinical_trials`
+- `search_biorxiv`
+- `search_all`
+- `analyze_hypothesis`
+## Available Tools
+### search_pubmed
+Search peer-reviewed biomedical literature from PubMed.
+**Parameters**:
+- `query` (string): Search query
+- `max_results` (integer, optional): Maximum number of results (default: 10)
+**Example**:
+```
+Search PubMed for "metformin diabetes"
+```
+### search_clinical_trials
+Search ClinicalTrials.gov for interventional studies.
+**Parameters**:
+- `query` (string): Search query
+- `max_results` (integer, optional): Maximum number of results (default: 10)
+**Example**:
+```
+Search clinical trials for "Alzheimer's disease treatment"
+```
+### search_biorxiv
+Search bioRxiv/medRxiv preprints via Europe PMC.
+**Parameters**:
+- `query` (string): Search query
+- `max_results` (integer, optional): Maximum number of results (default: 10)
+**Example**:
+```
+Search bioRxiv for "CRISPR gene editing"
+```
+### search_all
+Search all sources simultaneously (PubMed, ClinicalTrials.gov, Europe PMC).
+**Parameters**:
+- `query` (string): Search query
+- `max_results` (integer, optional): Maximum number of results per source (default: 10)
+**Example**:
+```
+Search all sources for "COVID-19 vaccine efficacy"
+```
+### analyze_hypothesis
+Perform secure statistical analysis using Modal sandboxes.
+**Parameters**:
+- `hypothesis` (string): Hypothesis to analyze
+- `data` (string, optional): Data description or code
+**Example**:
+```
+Analyze the hypothesis that metformin reduces cancer risk
+```
+## Using Tools in Claude Desktop
+Once configured, you can ask Claude to use DeepCritical tools:
+```
+Use DeepCritical to search PubMed for recent papers on Alzheimer's disease treatments.
+```
+Claude will automatically:
+1. Call the appropriate DeepCritical tool
+2. Retrieve results
+3. Use the results in its response
+## Troubleshooting
+### Connection Issues
+**Server Not Found**:
+- Ensure DeepCritical is running (`uv run gradio run src/app.py`)
+- Verify the URL in `claude_desktop_config.json` is correct
+- Check that port 7860 is not blocked by firewall
+**Tools Not Appearing**:
+- Restart Claude Desktop after configuration changes
+- Check Claude Desktop logs for errors
+- Verify MCP server is accessible at the configured URL
+### Authentication
+If DeepCritical requires authentication:
+- Configure API keys in DeepCritical settings
+- Use HuggingFace OAuth login
+- Ensure API keys are valid
+## Advanced Configuration
+### Custom Port
+If running on a different port, update the URL:
+```json
+{
+  "mcpServers": {
+    "deepcritical": {
+      "url": "http://localhost:8080/gradio_api/mcp/"
+    }
+  }
+}
+```
+### Multiple Instances
+You can configure multiple DeepCritical instances:
+```json
+{
+  "mcpServers": {
+    "deepcritical-local": {
+      "url": "http://localhost:7860/gradio_api/mcp/"
+    },
+    "deepcritical-remote": {
+      "url": "https://your-server.com/gradio_api/mcp/"
+    }
+  }
+}
+```
+## Next Steps
+- Learn about [Configuration](../configuration/index.md) for advanced settings
+- Explore [Examples](examples.md) for use cases
+- Read the [Architecture Documentation](../architecture/graph-orchestration.md)

docs/getting-started/quick-start.md ADDED Viewed

	@@ -0,0 +1,119 @@

+# Quick Start Guide
+Get up and running with DeepCritical in minutes.
+## Start the Application
+```bash
+uv run gradio run src/app.py
+```
+Open your browser to `http://localhost:7860`.
+## First Research Query
+1. **Enter a Research Question**
+   Type your research question in the chat interface, for example:
+   - "What are the latest treatments for Alzheimer's disease?"
+   - "Review the evidence for metformin in cancer prevention"
+   - "What clinical trials are investigating COVID-19 vaccines?"
+2. **Submit the Query**
+   Click "Submit" or press Enter. The system will:
+   - Generate observations about your query
+   - Identify knowledge gaps
+   - Search multiple sources (PubMed, ClinicalTrials.gov, Europe PMC)
+   - Evaluate evidence quality
+   - Synthesize findings into a report
+3. **Review Results**
+   Watch the real-time progress in the chat interface:
+   - Search operations and results
+   - Evidence evaluation
+   - Report generation
+   - Final research report with citations
+## Authentication
+### HuggingFace OAuth (Recommended)
+1. Click "Sign in with HuggingFace" at the top of the app
+2. Authorize the application
+3. Your HuggingFace API token will be automatically used
+4. No need to manually enter API keys
+### Manual API Key
+1. Open the Settings accordion
+2. Enter your API key:
+   - OpenAI API key
+   - Anthropic API key
+   - HuggingFace API key
+3. Click "Save Settings"
+4. Manual keys take priority over OAuth tokens
+## Understanding the Interface
+### Chat Interface
+- **Input**: Enter your research questions here
+- **Messages**: View conversation history and research progress
+- **Streaming**: Real-time updates as research progresses
+### Status Indicators
+- **Searching**: Active search operations
+- **Evaluating**: Evidence quality assessment
+- **Synthesizing**: Report generation
+- **Complete**: Research finished
+### Settings
+- **API Keys**: Configure LLM providers
+- **Research Mode**: Choose iterative or deep research
+- **Budget Limits**: Set token, time, and iteration limits
+## Example Queries
+### Simple Query
+```
+What are the side effects of metformin?
+```
+### Complex Query
+```
+Review the evidence for using metformin as an anti-aging intervention,
+including clinical trials, mechanisms of action, and safety profile.
+```
+### Clinical Trial Query
+```
+What are the active clinical trials investigating Alzheimer's disease treatments?
+```
+## Next Steps
+- Learn about [MCP Integration](mcp-integration.md) to use DeepCritical from Claude Desktop
+- Explore [Examples](examples.md) for more use cases
+- Read the [Configuration Guide](../configuration/index.md) for advanced settings
+- Check out the [Architecture Documentation](../architecture/graph-orchestration.md) to understand how it works

docs/license.md ADDED Viewed

	@@ -0,0 +1,39 @@

+# License
+DeepCritical is licensed under the MIT License.
+## MIT License
+Copyright (c) 2024 DeepCritical Team
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

docs/overview/architecture.md ADDED Viewed

	@@ -0,0 +1,196 @@

+# Architecture Overview
+DeepCritical is a deep research agent system that uses iterative search-and-judge loops to comprehensively answer research questions. The system supports multiple orchestration patterns, graph-based execution, parallel research workflows, and long-running task management with real-time streaming.
+## Core Architecture
+### Orchestration Patterns
+1. **Graph Orchestrator** (`src/orchestrator/graph_orchestrator.py`):
+   - Graph-based execution using Pydantic AI agents as nodes
+   - Supports both iterative and deep research patterns
+   - Node types: Agent, State, Decision, Parallel
+   - Edge types: Sequential, Conditional, Parallel
+   - Conditional routing based on knowledge gaps, budget, and iterations
+   - Parallel execution for concurrent research loops
+   - Event streaming via `AsyncGenerator[AgentEvent]` for real-time UI updates
+   - Fallback to agent chains when graph execution is disabled
+2. **Deep Research Flow** (`src/orchestrator/research_flow.py`):
+   - **Pattern**: Planner → Parallel Iterative Loops (one per section) → Synthesis
+   - Uses `PlannerAgent` to break query into report sections
+   - Runs `IterativeResearchFlow` instances in parallel per section via `WorkflowManager`
+   - Synthesizes results using `LongWriterAgent` or `ProofreaderAgent`
+   - Supports both graph execution (`use_graph=True`) and agent chains (`use_graph=False`)
+   - Budget tracking per section and globally
+   - State synchronization across parallel loops
+3. **Iterative Research Flow** (`src/orchestrator/research_flow.py`):
+   - **Pattern**: Generate observations → Evaluate gaps → Select tools → Execute → Judge → Continue/Complete
+   - Uses `KnowledgeGapAgent`, `ToolSelectorAgent`, `ThinkingAgent`, `WriterAgent`
+   - `JudgeHandler` assesses evidence sufficiency
+   - Iterates until research complete or constraints met (iterations, time, tokens)
+   - Supports graph execution and agent chains
+4. **Magentic Orchestrator** (`src/orchestrator_magentic.py`):
+   - Multi-agent coordination using `agent-framework-core`
+   - ChatAgent pattern with internal LLMs per agent
+   - Uses `MagenticBuilder` with participants: searcher, hypothesizer, judge, reporter
+   - Manager orchestrates agents via `OpenAIChatClient`
+   - Requires OpenAI API key (function calling support)
+   - Event-driven: converts Magentic events to `AgentEvent` for UI streaming
+   - Supports long-running workflows with max rounds and stall/reset handling
+5. **Hierarchical Orchestrator** (`src/orchestrator_hierarchical.py`):
+   - Uses `SubIterationMiddleware` with `ResearchTeam` and `LLMSubIterationJudge`
+   - Adapts Magentic ChatAgent to `SubIterationTeam` protocol
+   - Event-driven via `asyncio.Queue` for coordination
+   - Supports sub-iteration patterns for complex research tasks
+6. **Legacy Simple Mode** (`src/legacy_orchestrator.py`):
+   - Linear search-judge-synthesize loop
+   - Uses `SearchHandlerProtocol` and `JudgeHandlerProtocol`
+   - Generator-based design yielding `AgentEvent` objects
+   - Backward compatibility for simple use cases
+## Long-Running Task Support
+The system is designed for long-running research tasks with comprehensive state management and streaming:
+1. **Event Streaming**:
+   - All orchestrators yield `AgentEvent` objects via `AsyncGenerator`
+   - Real-time UI updates through Gradio chat interface
+   - Event types: `started`, `searching`, `search_complete`, `judging`, `judge_complete`, `looping`, `synthesizing`, `hypothesizing`, `complete`, `error`
+   - Metadata includes iteration numbers, tool names, result counts, durations
+2. **Budget Tracking** (`src/middleware/budget_tracker.py`):
+   - Per-loop and global budget management
+   - Tracks: tokens, time (seconds), iterations
+   - Budget enforcement at decision nodes
+   - Token estimation (~4 chars per token)
+   - Early termination when budgets exceeded
+   - Budget summaries for monitoring
+3. **Workflow Manager** (`src/middleware/workflow_manager.py`):
+   - Coordinates parallel research loops
+   - Tracks loop status: `pending`, `running`, `completed`, `failed`, `cancelled`
+   - Synchronizes evidence between loops and global state
+   - Handles errors per loop (doesn't fail all if one fails)
+   - Supports loop cancellation and timeout handling
+   - Evidence deduplication across parallel loops
+4. **State Management** (`src/middleware/state_machine.py`):
+   - Thread-safe isolation using `ContextVar` for concurrent requests
+   - `WorkflowState` tracks: evidence, conversation history, embedding service
+   - Evidence deduplication by URL
+   - Semantic search via embedding service
+   - State persistence across long-running workflows
+   - Supports both iterative and deep research patterns
+5. **Gradio UI** (`src/app.py`):
+   - Real-time streaming of research progress
+   - Accordion-based UI for pending/done operations
+   - OAuth integration (HuggingFace)
+   - Multiple backend support (API keys, free tier)
+   - Handles long-running tasks with progress indicators
+   - Event accumulation for pending operations
+## Graph Architecture
+The graph orchestrator (`src/orchestrator/graph_orchestrator.py`) implements a flexible graph-based execution model:
+**Node Types**:
+- **Agent Nodes**: Execute Pydantic AI agents (e.g., `KnowledgeGapAgent`, `ToolSelectorAgent`)
+- **State Nodes**: Update or read workflow state (evidence, conversation)
+- **Decision Nodes**: Make routing decisions (research complete?, budget exceeded?)
+- **Parallel Nodes**: Execute multiple nodes concurrently (parallel research loops)
+**Edge Types**:
+- **Sequential Edges**: Always traversed (no condition)
+- **Conditional Edges**: Traversed based on condition (e.g., if research complete → writer, else → tool selector)
+- **Parallel Edges**: Used for parallel execution branches
+**Graph Patterns**:
+- **Iterative Graph**: `[Input] → [Thinking] → [Knowledge Gap] → [Decision: Complete?] → [Tool Selector] or [Writer]`
+- **Deep Research Graph**: `[Input] → [Planner] → [Parallel Iterative Loops] → [Synthesizer]`
+**Execution Flow**:
+1. Graph construction from nodes and edges
+2. Graph validation (no cycles, all nodes reachable)
+3. Graph execution from entry node
+4. Node execution based on type
+5. Edge evaluation for next node(s)
+6. Parallel execution via `asyncio.gather()`
+7. State updates at state nodes
+8. Event streaming for UI
+## Key Components
+- **Orchestrators**: Multiple orchestration patterns (`src/orchestrator/`, `src/orchestrator_*.py`)
+- **Research Flows**: Iterative and deep research patterns (`src/orchestrator/research_flow.py`)
+- **Graph Builder**: Graph construction utilities (`src/agent_factory/graph_builder.py`)
+- **Agents**: Pydantic AI agents (`src/agents/`, `src/agent_factory/agents.py`)
+- **Search Tools**: PubMed, ClinicalTrials.gov, Europe PMC, RAG (`src/tools/`)
+- **Judge Handler**: LLM-based evidence assessment (`src/agent_factory/judges.py`)
+- **Embeddings**: Semantic search & deduplication (`src/services/embeddings.py`)
+- **Statistical Analyzer**: Modal sandbox execution (`src/services/statistical_analyzer.py`)
+- **Middleware**: State management, budget tracking, workflow coordination (`src/middleware/`)
+- **MCP Tools**: Claude Desktop integration (`src/mcp_tools.py`)
+- **Gradio UI**: Web interface with MCP server and streaming (`src/app.py`)
+## Research Team & Parallel Execution
+The system supports complex research workflows through:
+1. **WorkflowManager**: Coordinates multiple parallel research loops
+   - Creates and tracks `ResearchLoop` instances
+   - Runs loops in parallel via `asyncio.gather()`
+   - Synchronizes evidence to global state
+   - Handles loop failures gracefully
+2. **Deep Research Pattern**: Breaks complex queries into sections
+   - Planner creates report outline with sections
+   - Each section runs as independent iterative research loop
+   - Loops execute in parallel
+   - Evidence shared across loops via global state
+   - Final synthesis combines all section results
+3. **State Synchronization**: Thread-safe evidence sharing
+   - Evidence deduplication by URL
+   - Global state accessible to all loops
+   - Semantic search across all collected evidence
+   - Conversation history tracking per iteration
+## Configuration & Modes
+- **Orchestrator Factory** (`src/orchestrator_factory.py`):
+  - Auto-detects mode: "advanced" if OpenAI key available, else "simple"
+  - Supports explicit mode selection: "simple", "magentic", "advanced"
+  - Lazy imports for optional dependencies
+- **Research Modes**:
+  - `iterative`: Single research loop
+  - `deep`: Multi-section parallel research
+  - `auto`: Auto-detect based on query complexity
+- **Execution Modes**:
+  - `use_graph=True`: Graph-based execution (parallel, conditional routing)
+  - `use_graph=False`: Agent chains (sequential, backward compatible)

docs/overview/features.md ADDED Viewed

	@@ -0,0 +1,148 @@

+# Features
+DeepCritical provides a comprehensive set of features for AI-assisted research:
+## Core Features
+### Multi-Source Search
+- **PubMed**: Search peer-reviewed biomedical literature via NCBI E-utilities
+- **ClinicalTrials.gov**: Search interventional clinical trials
+- **Europe PMC**: Search preprints and peer-reviewed articles (includes bioRxiv/medRxiv)
+- **RAG**: Semantic search within collected evidence using LlamaIndex
+### MCP Integration
+- **Model Context Protocol**: Expose search tools via MCP server
+- **Claude Desktop**: Use DeepCritical tools directly from Claude Desktop
+- **MCP Clients**: Compatible with any MCP-compatible client
+### Authentication
+- **HuggingFace OAuth**: Sign in with HuggingFace account for automatic API token usage
+- **Manual API Keys**: Support for OpenAI, Anthropic, and HuggingFace API keys
+- **Free Tier Support**: Automatic fallback to HuggingFace Inference API
+### Secure Code Execution
+- **Modal Sandbox**: Secure execution of AI-generated statistical code
+- **Isolated Environment**: Network isolation and package version pinning
+- **Safe Execution**: Prevents malicious code execution
+### Semantic Search & RAG
+- **LlamaIndex Integration**: Advanced RAG capabilities
+- **Vector Storage**: ChromaDB for embedding storage
+- **Semantic Deduplication**: Automatic detection of similar evidence
+- **Embedding Service**: Local sentence-transformers (no API key required)
+### Orchestration Patterns
+- **Graph-Based Execution**: Flexible graph orchestration with conditional routing
+- **Parallel Research Loops**: Run multiple research tasks concurrently
+- **Iterative Research**: Single-loop research with search-judge-synthesize cycles
+- **Deep Research**: Multi-section parallel research with planning and synthesis
+- **Magentic Orchestration**: Multi-agent coordination using Microsoft Agent Framework
+### Real-Time Streaming
+- **Event Streaming**: Real-time updates via `AsyncGenerator[AgentEvent]`
+- **Progress Tracking**: Monitor research progress with detailed event metadata
+- **UI Integration**: Seamless integration with Gradio chat interface
+### Budget Management
+- **Token Budget**: Track and limit LLM token usage
+- **Time Budget**: Enforce time limits per research loop
+- **Iteration Budget**: Limit maximum iterations
+- **Per-Loop Budgets**: Independent budgets for parallel research loops
+### State Management
+- **Thread-Safe Isolation**: ContextVar-based state management
+- **Evidence Deduplication**: Automatic URL-based deduplication
+- **Conversation History**: Track iteration history and agent interactions
+- **State Synchronization**: Share evidence across parallel loops
+## Advanced Features
+### Agent System
+- **Pydantic AI Agents**: Type-safe agent implementation
+- **Structured Output**: Pydantic models for agent responses
+- **Agent Factory**: Centralized agent creation with fallback support
+- **Specialized Agents**: Knowledge gap, tool selector, writer, proofreader, and more
+### Search Tools
+- **Rate Limiting**: Built-in rate limiting for external APIs
+- **Retry Logic**: Automatic retry with exponential backoff
+- **Query Preprocessing**: Automatic query enhancement and synonym expansion
+- **Evidence Conversion**: Automatic conversion to structured Evidence objects
+### Error Handling
+- **Custom Exceptions**: Hierarchical exception system
+- **Error Chaining**: Preserve exception context
+- **Structured Logging**: Comprehensive logging with structlog
+- **Graceful Degradation**: Fallback handlers for missing dependencies
+### Configuration
+- **Pydantic Settings**: Type-safe configuration management
+- **Environment Variables**: Support for `.env` files
+- **Validation**: Automatic configuration validation
+- **Flexible Providers**: Support for multiple LLM and embedding providers
+### Testing
+- **Unit Tests**: Comprehensive unit test coverage
+- **Integration Tests**: Real API integration tests
+- **Mock Support**: Extensive mocking utilities
+- **Coverage Reports**: Code coverage tracking
+## UI Features
+### Gradio Interface
+- **Real-Time Chat**: Interactive chat interface
+- **Streaming Updates**: Live progress updates
+- **Accordion UI**: Organized display of pending/done operations
+- **OAuth Integration**: Seamless HuggingFace authentication
+### MCP Server
+- **RESTful API**: HTTP-based MCP server
+- **Tool Discovery**: Automatic tool registration
+- **Request Handling**: Async request processing
+- **Error Responses**: Structured error responses
+## Development Features
+### Code Quality
+- **Type Safety**: Full type hints with mypy strict mode
+- **Linting**: Ruff for code quality
+- **Formatting**: Automatic code formatting
+- **Pre-commit Hooks**: Automated quality checks
+### Documentation
+- **Comprehensive Docs**: Detailed documentation for all components
+- **Code Examples**: Extensive code examples
+- **Architecture Diagrams**: Visual architecture documentation
+- **API Reference**: Complete API documentation

docs/team.md ADDED Viewed

	@@ -0,0 +1,44 @@

+# Team
+DeepCritical is developed by a team of researchers and developers working on AI-assisted research.
+## Team Members
+### The-Obstacle-Is-The-Way
+- GitHub: [The-Obstacle-Is-The-Way](https://github.com/The-Obstacle-Is-The-Way)
+### MarioAderman
+- GitHub: [MarioAderman](https://github.com/MarioAderman)
+### Josephrp
+- GitHub: [Josephrp](https://github.com/Josephrp)
+## About
+The DeepCritical team met online in the Alzheimer's Critical Literature Review Group in the Hugging Science initiative. We're building the agent framework we want to use for AI-assisted research to turn the vast amounts of clinical data into cures.
+## Contributing
+We welcome contributions! See the [Contributing Guide](contributing/index.md) for details.
+## Links
+- [GitHub Repository](https://github.com/DeepCritical/GradioDemo)
+- [HuggingFace Space](https://huggingface.co/spaces/DataQuests/DeepCritical)

src/app.py CHANGED Viewed

@@ -5,12 +5,24 @@ from collections.abc import AsyncGenerator
 from typing import Any
 import gradio as gr
-from pydantic_ai.models.anthropic import AnthropicModel
-from pydantic_ai.models.huggingface import HuggingFaceModel
-from pydantic_ai.models.openai import OpenAIChatModel as OpenAIModel
-from pydantic_ai.providers.anthropic import AnthropicProvider
-from pydantic_ai.providers.huggingface import HuggingFaceProvider
-from pydantic_ai.providers.openai import OpenAIProvider
 from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, MockJudgeHandler
 from src.orchestrator_factory import create_orchestrator
@@ -19,14 +31,15 @@ from src.tools.europepmc import EuropePMCTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
 from src.utils.config import settings
-from src.utils.models import OrchestratorConfig
 def configure_orchestrator(
     use_mock: bool = False,
     mode: str = "simple",
-    user_api_key: str | None = None,
-    api_provider: str = "huggingface",
 ) -> tuple[Any, str]:
     """
     Create an orchestrator instance.
@@ -34,8 +47,9 @@ def configure_orchestrator(
     Args:
         use_mock: If True, use MockJudgeHandler (no API key needed)
         mode: Orchestrator mode ("simple" or "advanced")
-        user_api_key: Optional user-provided API key (BYOK)
-        api_provider: API provider ("huggingface", "openai", or "anthropic")
     Returns:
         Tuple of (Orchestrator instance, backend_name)
@@ -61,46 +75,52 @@ def configure_orchestrator(
         judge_handler = MockJudgeHandler()
         backend_info = "Mock (Testing)"
-    # 2. API Key (User provided or Env) - HuggingFace, OpenAI, or Anthropic
-    elif (
-        user_api_key
-        or (
-            api_provider == "huggingface"
-            and (os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY"))
         )
-        or (api_provider == "openai" and os.getenv("OPENAI_API_KEY"))
-        or (api_provider == "anthropic" and os.getenv("ANTHROPIC_API_KEY"))
-    ):
-        model: AnthropicModel | HuggingFaceModel | OpenAIModel | None = None
-        if user_api_key:
-            # Validate key/provider match to prevent silent auth failures
-            if api_provider == "openai" and user_api_key.startswith("sk-ant-"):
-                raise ValueError("Anthropic key provided but OpenAI provider selected")
-            is_openai_key = user_api_key.startswith("sk-") and not user_api_key.startswith(
-                "sk-ant-"
             )
-            if api_provider == "anthropic" and is_openai_key:
-                raise ValueError("OpenAI key provided but Anthropic provider selected")
-            if api_provider == "huggingface":
-                model_name = settings.huggingface_model or "meta-llama/Llama-3.1-8B-Instruct"
-                hf_provider = HuggingFaceProvider(api_key=user_api_key)
-                model = HuggingFaceModel(model_name, provider=hf_provider)
-            elif api_provider == "anthropic":
-                anthropic_provider = AnthropicProvider(api_key=user_api_key)
-                model = AnthropicModel(settings.anthropic_model, provider=anthropic_provider)
-            elif api_provider == "openai":
-                openai_provider = OpenAIProvider(api_key=user_api_key)
-                model = OpenAIModel(settings.openai_model, provider=openai_provider)
-            backend_info = f"API ({api_provider.upper()})"
-        else:
-            backend_info = "API (Env Config)"
         judge_handler = JudgeHandler(model=model)
-    # 3. Free Tier (HuggingFace Inference)
     else:
-        judge_handler = HFInferenceJudgeHandler()
-        backend_info = "Free Tier (Llama 3.1 / Mistral)"
     orchestrator = create_orchestrator(
         search_handler=search_handler,
@@ -112,13 +132,289 @@ def configure_orchestrator(
     return orchestrator, backend_info
 async def research_agent(
     message: str,
     history: list[dict[str, Any]],
     mode: str = "simple",
-    api_key: str = "",
-    api_provider: str = "huggingface",
-) -> AsyncGenerator[str, None]:
     """
     Gradio chat function that runs the research agent.
@@ -126,142 +422,205 @@ async def research_agent(
         message: User's research question
         history: Chat history (Gradio format)
         mode: Orchestrator mode ("simple" or "advanced")
-        api_key: Optional user-provided API key (BYOK - Bring Your Own Key)
-        api_provider: API provider ("huggingface", "openai", or "anthropic")
     Yields:
-        Markdown-formatted responses for streaming
     """
     if not message.strip():
-        yield "Please enter a research question."
         return
-    # Clean user-provided API key
-    user_api_key = api_key.strip() if api_key else None
-    # Check available keys
-    has_huggingface = bool(os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY"))
-    has_openai = bool(os.getenv("OPENAI_API_KEY"))
-    has_anthropic = bool(os.getenv("ANTHROPIC_API_KEY"))
-    has_user_key = bool(user_api_key)
-    has_paid_key = has_openai or has_anthropic or has_user_key
-    # Advanced mode requires OpenAI specifically (due to agent-framework binding)
-    if mode == "advanced" and not (has_openai or (has_user_key and api_provider == "openai")):
-        yield (
-            "⚠️ **Warning**: Advanced mode currently requires OpenAI API key. "
-            "Falling back to simple mode.\n\n"
-        )
-        mode = "simple"
-    # Inform user about their key being used
-    if has_user_key:
-        yield (
-            f"🔑 **Using your {api_provider.upper()} API key** - "
-            "Your key is used only for this session and is never stored.\n\n"
-        )
-    elif not has_paid_key and not has_huggingface:
-        # No keys at all - will use FREE HuggingFace Inference (public models)
-        yield (
-            "🤗 **Free Tier**: Using HuggingFace Inference (Llama 3.1 / Mistral) for AI analysis.\n"
-            "For premium models or higher rate limits, enter a HuggingFace, OpenAI, or Anthropic API key below.\n\n"
-        )
-    # Run the agent and stream events
-    response_parts: list[str] = []
     try:
         # use_mock=False - let configure_orchestrator decide based on available keys
-        # It will use: Paid API > HF Inference (free tier)
         orchestrator, backend_name = configure_orchestrator(
             use_mock=False,  # Never use mock in production - HF Inference is the free fallback
-            mode=mode,
-            user_api_key=user_api_key,
-            api_provider=api_provider,
         )
-        yield f"🧠 **Backend**: {backend_name}\n\n"
-        async for event in orchestrator.run(message):
-            # Format event as markdown
-            event_md = event.to_markdown()
-            response_parts.append(event_md)
-            # If complete, show full response
-            if event.type == "complete":
-                yield event.message
-            else:
-                # Show progress
-                yield "\n\n".join(response_parts)
     except Exception as e:
-        yield f"❌ **Error**: {e!s}"
-def create_demo() -> gr.ChatInterface:
     """
-    Create the Gradio demo interface with MCP support.
     Returns:
-        Configured Gradio Blocks interface with MCP server enabled
     """
-    # 1. Unwrapped ChatInterface (Fixes Accordion Bug)
-    demo = gr.ChatInterface(
-        fn=research_agent,
-        title="🧬 DeepCritical",
-        description=(
-            "*AI-Powered Drug Repurposing Agent — searches PubMed, "
-            "ClinicalTrials.gov & Europe PMC*\n\n"
-            "---\n"
-            "*Research tool only — not for medical advice.*  \n"
-            "**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`"
-        ),
-        examples=[
-            [
-                "What drugs could be repurposed for Alzheimer's disease?",
-                "simple",
-                "",
-                "openai",
-            ],
-            [
-                "Is metformin effective for treating cancer?",
-                "simple",
-                "",
-                "openai",
-            ],
-            [
-                "What medications show promise for Long COVID treatment?",
-                "simple",
-                "",
-                "openai",
-            ],
-        ],
-        additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False),
-        additional_inputs=[
-            gr.Radio(
                 choices=["simple", "advanced"],
                 value="simple",
                 label="Orchestrator Mode",
-                info=(
-                    "Simple: Linear (Free Tier Friendly) | Advanced: Multi-Agent (Requires OpenAI)"
-                ),
-            ),
-            gr.Textbox(
-                label="🔑 API Key (Optional - BYOK)",
-                placeholder="sk-... or sk-ant-...",
-                type="password",
-                info="Enter your own API key. Never stored.",
-            ),
-            gr.Radio(
-                choices=["huggingface", "openai", "anthropic"],
-                value="huggingface",
-                label="API Provider",
-                info="Select the provider for your API key (HuggingFace is default and free)",
             ),
-        ],
-    )
-    return demo
 def main() -> None:

 from typing import Any
 import gradio as gr
+# Try to import HuggingFace support (may not be available in all pydantic-ai versions)
+# According to https://ai.pydantic.dev/models/huggingface/, HuggingFace support requires
+# pydantic-ai with huggingface extra or pydantic-ai-slim[huggingface]
+# There are two ways to use HuggingFace:
+# 1. Inference API: HuggingFaceModel with HuggingFaceProvider (uses AsyncInferenceClient internally)
+# 2. Local models: Would use transformers directly (not via pydantic-ai)
+try:
+    from huggingface_hub import AsyncInferenceClient
+    from pydantic_ai.models.huggingface import HuggingFaceModel
+    from pydantic_ai.providers.huggingface import HuggingFaceProvider
+    _HUGGINGFACE_AVAILABLE = True
+except ImportError:
+    HuggingFaceModel = None  # type: ignore[assignment, misc]
+    HuggingFaceProvider = None  # type: ignore[assignment, misc]
+    AsyncInferenceClient = None  # type: ignore[assignment, misc]
+    _HUGGINGFACE_AVAILABLE = False
 from src.agent_factory.judges import HFInferenceJudgeHandler, JudgeHandler, MockJudgeHandler
 from src.orchestrator_factory import create_orchestrator
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
 from src.utils.config import settings
+from src.utils.models import AgentEvent, OrchestratorConfig
 def configure_orchestrator(
     use_mock: bool = False,
     mode: str = "simple",
+    oauth_token: str | None = None,
+    hf_model: str | None = None,
+    hf_provider: str | None = None,
 ) -> tuple[Any, str]:
     """
     Create an orchestrator instance.
     Args:
         use_mock: If True, use MockJudgeHandler (no API key needed)
         mode: Orchestrator mode ("simple" or "advanced")
+        oauth_token: Optional OAuth token from HuggingFace login
+        hf_model: Selected HuggingFace model ID
+        hf_provider: Selected inference provider
     Returns:
         Tuple of (Orchestrator instance, backend_name)
         judge_handler = MockJudgeHandler()
         backend_info = "Mock (Testing)"
+    # 2. API Key (OAuth or Env) - HuggingFace only (OAuth provides HF token)
+    # Priority: oauth_token > env vars
+    # On HuggingFace Spaces, OAuth token is available via request.oauth_token
+    effective_api_key = oauth_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY")
+    if effective_api_key:
+        # We have an API key (OAuth or env) - use pydantic-ai with JudgeHandler
+        # This uses HuggingFace's own inference API, not third-party providers
+        model: Any | None = None
+        # Use selected model or fall back to env var/settings
+        model_name = (
+            hf_model
+            or os.getenv("HF_MODEL")
+            or settings.huggingface_model
+            or "Qwen/Qwen3-Next-80B-A3B-Thinking"
         )
+        if not _HUGGINGFACE_AVAILABLE:
+            raise ImportError(
+                "HuggingFace models are not available in this version of pydantic-ai. "
+                "Please install with: uv add 'pydantic-ai[huggingface]' or use 'openai'/'anthropic' as the LLM provider."
             )
+        # Inference API - uses HuggingFace Inference API via AsyncInferenceClient
+        # Per https://ai.pydantic.dev/models/huggingface/#configure-the-provider
+        # Create AsyncInferenceClient for inference API
+        # AsyncInferenceClient accepts 'token' parameter for API key
+        hf_client = AsyncInferenceClient(token=effective_api_key)  # type: ignore[misc]
+        # Pass client to HuggingFaceProvider for inference API usage
+        provider = HuggingFaceProvider(hf_client=hf_client)  # type: ignore[misc]
+        model = HuggingFaceModel(model_name, provider=provider)  # type: ignore[misc]
+        backend_info = "API (HuggingFace OAuth)" if oauth_token else "API (Env Config)"
         judge_handler = JudgeHandler(model=model)
+    # 3. Free Tier (HuggingFace Inference) - NO API KEY AVAILABLE
     else:
+        # No API key available - use HFInferenceJudgeHandler with public models
+        # Don't use third-party providers (novita, groq, etc.) as they require their own API keys
+        # Use HuggingFace's own inference API with public/ungated models
+        # Pass empty provider to use HuggingFace's default (not third-party providers)
+        judge_handler = HFInferenceJudgeHandler(
+            model_id=hf_model,
+            api_key=None,  # No API key - will use public models only
+            provider=None,  # Don't specify provider - use HuggingFace's default
+        )
+        model_display = hf_model.split("/")[-1] if hf_model else "Default (Public Models)"
+        backend_info = f"Free Tier ({model_display} - Public Models Only)"
     orchestrator = create_orchestrator(
         search_handler=search_handler,
     return orchestrator, backend_info
+def event_to_chat_message(event: AgentEvent) -> dict[str, Any]:
+    """
+    Convert AgentEvent to gr.ChatMessage with metadata for accordion display.
+    Args:
+        event: The AgentEvent to convert
+    Returns:
+        ChatMessage with metadata for collapsible accordion
+    """
+    # Map event types to accordion titles and determine if pending
+    event_configs: dict[str, dict[str, Any]] = {
+        "started": {"title": "🚀 Starting Research", "status": "done", "icon": "🚀"},
+        "searching": {"title": "🔍 Searching Literature", "status": "pending", "icon": "🔍"},
+        "search_complete": {"title": "📚 Search Results", "status": "done", "icon": "📚"},
+        "judging": {"title": "🧠 Evaluating Evidence", "status": "pending", "icon": "🧠"},
+        "judge_complete": {"title": "✅ Evidence Assessment", "status": "done", "icon": "✅"},
+        "looping": {"title": "🔄 Research Iteration", "status": "pending", "icon": "🔄"},
+        "synthesizing": {"title": "📝 Synthesizing Report", "status": "pending", "icon": "📝"},
+        "hypothesizing": {"title": "🔬 Generating Hypothesis", "status": "pending", "icon": "🔬"},
+        "analyzing": {"title": "📊 Statistical Analysis", "status": "pending", "icon": "📊"},
+        "analysis_complete": {"title": "📈 Analysis Results", "status": "done", "icon": "📈"},
+        "streaming": {"title": "📡 Processing", "status": "pending", "icon": "📡"},
+        "complete": {"title": None, "status": "done", "icon": "🎉"},  # Main response, no accordion
+        "error": {"title": "❌ Error", "status": "done", "icon": "❌"},
+    }
+    config = event_configs.get(
+        event.type, {"title": f"• {event.type}", "status": "done", "icon": "•"}
+    )
+    # For complete events, return main response without accordion
+    if event.type == "complete":
+        # Return as dict format for Gradio Chatbot compatibility
+        return {
+            "role": "assistant",
+            "content": event.message,
+        }
+    # Build metadata for accordion according to Gradio ChatMessage spec
+    # Metadata keys: title (str), status ("pending"|"done"), log (str), duration (float)
+    # See: https://www.gradio.app/guides/agents-and-tool-usage
+    metadata: dict[str, Any] = {}
+    # Title is required for accordion display - must be string
+    if config["title"]:
+        metadata["title"] = str(config["title"])
+    # Set status (pending shows spinner, done is collapsed)
+    # Must be exactly "pending" or "done" per Gradio spec
+    if config["status"] == "pending":
+        metadata["status"] = "pending"
+    elif config["status"] == "done":
+        metadata["status"] = "done"
+    # Add duration if available in data (must be float)
+    if event.data and isinstance(event.data, dict) and "duration" in event.data:
+        duration = event.data["duration"]
+        if isinstance(duration, int | float):
+            metadata["duration"] = float(duration)
+    # Add log info (iteration number, etc.) - must be string
+    log_parts: list[str] = []
+    if event.iteration > 0:
+        log_parts.append(f"Iteration {event.iteration}")
+    if event.data and isinstance(event.data, dict):
+        if "tool" in event.data:
+            log_parts.append(f"Tool: {event.data['tool']}")
+        if "results_count" in event.data:
+            log_parts.append(f"Results: {event.data['results_count']}")
+    if log_parts:
+        metadata["log"] = " | ".join(log_parts)
+    # Return as dict format for Gradio Chatbot compatibility
+    # According to Gradio docs: https://www.gradio.app/guides/agents-and-tool-usage
+    # ChatMessage format: {"role": "assistant", "content": "...", "metadata": {...}}
+    # Metadata must have "title" key for accordion display
+    # Valid metadata keys: title (str), status ("pending"|"done"), log (str), duration (float)
+    result: dict[str, Any] = {
+        "role": "assistant",
+        "content": event.message,
+    }
+    # Only add metadata if it has a title (required for accordion display)
+    # Ensure metadata values match Gradio's expected types
+    if metadata and metadata.get("title"):
+        # Ensure status is valid if present
+        if "status" in metadata:
+            status = metadata["status"]
+            if status not in ("pending", "done"):
+                metadata["status"] = "done"  # Default to "done" if invalid
+        result["metadata"] = metadata
+    return result
+def extract_oauth_info(request: gr.Request | None) -> tuple[str | None, str | None]:
+    """
+    Extract OAuth token and username from Gradio request.
+    Args:
+        request: Gradio request object containing OAuth information
+    Returns:
+        Tuple of (oauth_token, oauth_username)
+    """
+    oauth_token: str | None = None
+    oauth_username: str | None = None
+    if request is None:
+        return oauth_token, oauth_username
+    # Try multiple ways to access OAuth token (Gradio API may vary)
+    # Pattern 1: request.oauth_token.token
+    if hasattr(request, "oauth_token") and request.oauth_token is not None:
+        if hasattr(request.oauth_token, "token"):
+            oauth_token = request.oauth_token.token
+        elif isinstance(request.oauth_token, str):
+            oauth_token = request.oauth_token
+    # Pattern 2: request.headers (fallback)
+    elif hasattr(request, "headers"):
+        # OAuth token might be in headers
+        auth_header = request.headers.get("authorization") or request.headers.get("Authorization")
+        if auth_header and auth_header.startswith("Bearer "):
+            oauth_token = auth_header.replace("Bearer ", "")
+    # Access username from request
+    if hasattr(request, "username") and request.username:
+        oauth_username = request.username
+    # Also try accessing via oauth_profile if available
+    elif hasattr(request, "oauth_profile") and request.oauth_profile is not None:
+        if hasattr(request.oauth_profile, "username"):
+            oauth_username = request.oauth_profile.username
+        elif hasattr(request.oauth_profile, "name"):
+            oauth_username = request.oauth_profile.name
+    return oauth_token, oauth_username
+async def yield_auth_messages(
+    oauth_username: str | None,
+    oauth_token: str | None,
+    has_huggingface: bool,
+    mode: str,
+) -> AsyncGenerator[dict[str, Any], None]:
+    """
+    Yield authentication and mode status messages.
+    Args:
+        oauth_username: OAuth username if available
+        oauth_token: OAuth token if available
+        has_huggingface: Whether HuggingFace credentials are available
+        mode: Orchestrator mode
+    Yields:
+        ChatMessage objects with authentication status
+    """
+    # Show user greeting if logged in via OAuth
+    if oauth_username:
+        yield {
+            "role": "assistant",
+            "content": f"👋 **Welcome, {oauth_username}!** Using your HuggingFace account.\n\n",
+        }
+    # Advanced mode is not supported without OpenAI (which requires manual setup)
+    # For now, we only support simple mode with HuggingFace
+    if mode == "advanced":
+        yield {
+            "role": "assistant",
+            "content": (
+                "⚠️ **Warning**: Advanced mode requires OpenAI API key configuration. "
+                "Falling back to simple mode.\n\n"
+            ),
+        }
+    # Inform user about authentication status
+    if oauth_token:
+        yield {
+            "role": "assistant",
+            "content": (
+                "🔐 **Using HuggingFace OAuth token** - "
+                "Authenticated via your HuggingFace account.\n\n"
+            ),
+        }
+    elif not has_huggingface:
+        # No keys at all - will use FREE HuggingFace Inference (public models)
+        yield {
+            "role": "assistant",
+            "content": (
+                "🤗 **Free Tier**: Using HuggingFace Inference (Llama 3.1 / Mistral) for AI analysis.\n"
+                "For premium models or higher rate limits, sign in with HuggingFace above.\n\n"
+            ),
+        }
+async def handle_orchestrator_events(
+    orchestrator: Any,
+    message: str,
+) -> AsyncGenerator[dict[str, Any], None]:
+    """
+    Handle orchestrator events and yield ChatMessages.
+    Args:
+        orchestrator: The orchestrator instance
+        message: The research question
+    Yields:
+        ChatMessage objects from orchestrator events
+    """
+    # Track pending accordions for real-time updates
+    pending_accordions: dict[str, str] = {}  # title -> accumulated content
+    async for event in orchestrator.run(message):
+        # Convert event to ChatMessage with metadata
+        chat_msg = event_to_chat_message(event)
+        # Handle complete events (main response)
+        if event.type == "complete":
+            # Close any pending accordions first
+            if pending_accordions:
+                for title, content in pending_accordions.items():
+                    yield {
+                        "role": "assistant",
+                        "content": content.strip(),
+                        "metadata": {"title": title, "status": "done"},
+                    }
+                pending_accordions.clear()
+            # Yield final response (no accordion for main response)
+            # chat_msg is already a dict from event_to_chat_message
+            yield chat_msg
+            continue
+        # Handle events with metadata (accordions)
+        # chat_msg is always a dict from event_to_chat_message
+        metadata: dict[str, Any] = chat_msg.get("metadata", {})
+        if metadata:
+            msg_title: str | None = metadata.get("title")
+            msg_status: str | None = metadata.get("status")
+            if msg_title:
+                # For pending operations, accumulate content and show spinner
+                if msg_status == "pending":
+                    if msg_title not in pending_accordions:
+                        pending_accordions[msg_title] = ""
+                    # chat_msg is always a dict, so access content via key
+                    content = chat_msg.get("content", "")
+                    pending_accordions[msg_title] += content + "\n"
+                    # Yield updated accordion with accumulated content
+                    yield {
+                        "role": "assistant",
+                        "content": pending_accordions[msg_title].strip(),
+                        "metadata": chat_msg.get("metadata", {}),
+                    }
+                elif msg_title in pending_accordions:
+                    # Combine pending content with final content
+                    # chat_msg is always a dict, so access content via key
+                    content = chat_msg.get("content", "")
+                    final_content = pending_accordions[msg_title] + content
+                    del pending_accordions[msg_title]
+                    yield {
+                        "role": "assistant",
+                        "content": final_content.strip(),
+                        "metadata": {"title": msg_title, "status": "done"},
+                    }
+                else:
+                    # New done accordion (no pending state)
+                    yield chat_msg
+            else:
+                # No title, yield as-is
+                yield chat_msg
+        else:
+            # No metadata, yield as plain message
+            yield chat_msg
 async def research_agent(
     message: str,
     history: list[dict[str, Any]],
     mode: str = "simple",
+    hf_model: str | None = None,
+    hf_provider: str | None = None,
+    oauth_token: gr.OAuthToken | None = None,
+    oauth_profile: gr.OAuthProfile | None = None,
+) -> AsyncGenerator[dict[str, Any] | list[dict[str, Any]], None]:
     """
     Gradio chat function that runs the research agent.
         message: User's research question
         history: Chat history (Gradio format)
         mode: Orchestrator mode ("simple" or "advanced")
+        hf_model: Selected HuggingFace model ID (from dropdown)
+        hf_provider: Selected inference provider (from dropdown)
+        oauth_token: Gradio OAuth token (None if user not logged in)
+        oauth_profile: Gradio OAuth profile (None if user not logged in)
     Yields:
+        ChatMessage objects with metadata for accordion display
     """
+    # REQUIRE LOGIN BEFORE USE
+    # Extract OAuth token and username using Gradio's OAuth types
+    # According to Gradio docs: OAuthToken and OAuthProfile are None if user not logged in
+    token_value: str | None = None
+    username: str | None = None
+    if oauth_token is not None:
+        # OAuthToken has a .token attribute containing the access token
+        token_value = oauth_token.token if hasattr(oauth_token, "token") else None
+    if oauth_profile is not None:
+        # OAuthProfile has .username, .name, .profile_image attributes
+        username = (
+            oauth_profile.username
+            if hasattr(oauth_profile, "username") and oauth_profile.username
+            else (oauth_profile.name if hasattr(oauth_profile, "name") and oauth_profile.name else None)
+        )
+    # Check if user is logged in (OAuth token or env var)
+    # Fallback to env vars for local development or Spaces with HF_TOKEN secret
+    has_authentication = bool(
+        token_value
+        or os.getenv("HF_TOKEN")
+        or os.getenv("HUGGINGFACE_API_KEY")
+    )
+    if not has_authentication:
+        yield {
+            "role": "assistant",
+            "content": (
+                "🔐 **Authentication Required**\n\n"
+                "Please **sign in with HuggingFace** using the login button at the top of the page "
+                "before using this application.\n\n"
+                "The login button is required to access the AI models and research tools."
+            ),
+        }
+        return
     if not message.strip():
+        yield {
+            "role": "assistant",
+            "content": "Please enter a research question.",
+        }
         return
+    # Check available keys (use token_value instead of oauth_token)
+    has_huggingface = bool(os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_API_KEY") or token_value)
+    # Adjust mode if needed
+    effective_mode = mode
+    if mode == "advanced":
+        effective_mode = "simple"
+    # Yield authentication and mode status messages
+    async for msg in yield_auth_messages(username, token_value, has_huggingface, mode):
+        yield msg
+    # Run the agent and stream events
     try:
         # use_mock=False - let configure_orchestrator decide based on available keys
+        # It will use: OAuth token > Env vars > HF Inference (free tier)
+        # Convert empty strings from Textbox to None for defaults
+        model_id = hf_model if hf_model and hf_model.strip() else None
+        provider_name = hf_provider if hf_provider and hf_provider.strip() else None
         orchestrator, backend_name = configure_orchestrator(
             use_mock=False,  # Never use mock in production - HF Inference is the free fallback
+            mode=effective_mode,
+            oauth_token=token_value,  # Use extracted token value
+            hf_model=model_id,  # None will use defaults in configure_orchestrator
+            hf_provider=provider_name,  # None will use defaults in configure_orchestrator
         )
+        yield {
+            "role": "assistant",
+            "content": f"🧠 **Backend**: {backend_name}\n\n",
+        }
+        # Handle orchestrator events
+        async for msg in handle_orchestrator_events(orchestrator, message):
+            yield msg
     except Exception as e:
+        # Return error message without metadata to avoid issues during example caching
+        # Metadata can cause validation errors when Gradio caches examples
+        # Gradio Chatbot requires plain text - remove all markdown and special characters
+        error_msg = str(e).replace("**", "").replace("*", "").replace("`", "")
+        # Ensure content is a simple string without any special formatting
+        yield {
+            "role": "assistant",
+            "content": f"Error: {error_msg}. Please check your configuration and try again.",
+        }
+def create_demo() -> gr.Blocks:
     """
+    Create the Gradio demo interface with MCP support and OAuth login.
     Returns:
+        Configured Gradio Blocks interface with MCP server and OAuth enabled
     """
+    with gr.Blocks(title="🧬 DeepCritical") as demo:
+        # Add login button at the top in a visible Row container
+        # LoginButton must be visible and properly configured for OAuth to work
+        # Using a Row with scale ensures the button is displayed prominently at the top
+        with gr.Row(equal_height=False):
+            with gr.Column(scale=1, min_width=200):
+                login_btn = gr.LoginButton(
+                    value="Sign in with Hugging Face",
+                    variant="huggingface",
+                    size="lg",
+                )
+        # Create settings components (hidden - used only for additional_inputs)
+        # Model/provider selection removed to avoid dropdown value mismatch errors
+        # Settings will use defaults from configure_orchestrator
+        with gr.Row(visible=False):
+            mode_radio = gr.Radio(
                 choices=["simple", "advanced"],
                 value="simple",
                 label="Orchestrator Mode",
+                info="Simple: Linear | Advanced: Multi-Agent (Requires OpenAI)",
+            )
+            # Hidden text components for model/provider (not dropdowns to avoid value mismatch)
+            # These will be empty by default and use defaults in configure_orchestrator
+            hf_model_dropdown = gr.Textbox(
+                value="",  # Empty string - will be converted to None in research_agent
+                label="🤖 Reasoning Model",
+                visible=False,  # Hidden from UI
+            )
+            hf_provider_dropdown = gr.Textbox(
+                value="",  # Empty string - will be converted to None in research_agent
+                label="⚡ Inference Provider",
+                visible=False,  # Hidden from UI
+            )
+        # Chat interface with model/provider selection
+        # Examples are provided but will NOT run at startup (cache_examples=False)
+        # Users must log in first before using examples or submitting queries
+        gr.ChatInterface(
+            fn=research_agent,
+            title="🧬 DeepCritical",
+            description=(
+                "*AI-Powered Drug Repurposing Agent — searches PubMed, "
+                "ClinicalTrials.gov & Europe PMC*\n\n"
+                "---\n"
+                "*Research tool only — not for medical advice.*  \n"
+                "**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`\n\n"
+                "**⚠️ Authentication Required**: Please **sign in with HuggingFace** above before using this application."
             ),
+            examples=[
+                # When additional_inputs are provided, examples must be lists of lists
+                # Each inner list: [message, mode, hf_model, hf_provider]
+                # Using actual model IDs and provider names from inference_models.py
+                # Note: Provider is optional - if empty, HF will auto-select
+                # These examples will NOT run at startup - users must click them after logging in
+                [
+                    "What drugs could be repurposed for Alzheimer's disease?",
+                    "simple",
+                    "Qwen/Qwen3-Next-80B-A3B-Thinking",
+                    "",
+                ],
+                [
+                    "Is metformin effective for treating cancer?",
+                    "simple",
+                    "Qwen/Qwen3-235B-A22B-Instruct-2507",
+                    "",
+                ],
+                [
+                    "What medications show promise for Long COVID treatment?",
+                    "simple",
+                    "zai-org/GLM-4.5-Air",
+                    "nebius",
+                ],
+            ],
+            cache_examples=False,  # CRITICAL: Disable example caching to prevent examples from running at startup
+            # Examples will only run when user explicitly clicks them (after login)
+            additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=True, visible=True),
+            additional_inputs=[
+                mode_radio,
+                hf_model_dropdown,
+                hf_provider_dropdown,
+                # Note: gr.OAuthToken and gr.OAuthProfile are automatically passed as function parameters
+                # when user is logged in - they should NOT be added to additional_inputs
+            ],
+        )
+    return demo  # type: ignore[no-any-return]
 def main() -> None:

src/middleware/state_machine.py CHANGED Viewed

@@ -127,3 +127,7 @@ def get_workflow_state() -> WorkflowState:
         logger.debug("Workflow state not found, auto-initializing")
         return init_workflow_state()
     return state

         logger.debug("Workflow state not found, auto-initializing")
         return init_workflow_state()
     return state

src/tools/crawl_adapter.py CHANGED Viewed

@@ -56,3 +56,7 @@ async def crawl_website(starting_url: str) -> str:
     except Exception as e:
         logger.error("Crawl failed", error=str(e), url=starting_url)
         return f"Error crawling website: {e!s}"

     except Exception as e:
         logger.error("Crawl failed", error=str(e), url=starting_url)
         return f"Error crawling website: {e!s}"

src/tools/web_search_adapter.py CHANGED Viewed

@@ -61,3 +61,7 @@ async def web_search(query: str) -> str:
     except Exception as e:
         logger.error("Web search failed", error=str(e), query=query)
         return f"Error performing web search: {e!s}"

     except Exception as e:
         logger.error("Web search failed", error=str(e), query=query)
         return f"Error performing web search: {e!s}"

tests/unit/middleware/__init__.py CHANGED Viewed

	@@ -1 +1,15 @@
1	"""Unit tests for middleware components."""


1	"""Unit tests for middleware components."""
2	+
3	+
4	+
5	+
6	+
7	+
8	+
9	+
10	+
11	+
12	+
13	+
14	+
15	+

tests/unit/middleware/test_budget_tracker_phase7.py CHANGED Viewed

@@ -157,3 +157,17 @@ class TestIterationTokenTracking:
         assert budget2 is not None
         assert budget1.iteration_tokens[1] == 100
         assert budget2.iteration_tokens[1] == 200

         assert budget2 is not None
         assert budget1.iteration_tokens[1] == 100
         assert budget2.iteration_tokens[1] == 200

tests/unit/middleware/test_state_machine.py CHANGED Viewed

@@ -354,3 +354,17 @@ class TestContextVarIsolation:
         assert len(state2.evidence) == 1
         assert state1.evidence[0].citation.url == "https://example.com/1"
         assert state2.evidence[0].citation.url == "https://example.com/2"

         assert len(state2.evidence) == 1
         assert state1.evidence[0].citation.url == "https://example.com/1"
         assert state2.evidence[0].citation.url == "https://example.com/2"

tests/unit/middleware/test_workflow_manager.py CHANGED Viewed

@@ -284,3 +284,17 @@ class TestWorkflowManager:
         assert len(shared) == 1
         assert shared[0].content == "Shared"


284
285	assert len(shared) == 1
286	assert shared[0].content == "Shared"
287	+
288	+
289	+
290	+
291	+
292	+
293	+
294	+
295	+
296	+
297	+
298	+
299	+
300	+

tests/unit/orchestrator/__init__.py CHANGED Viewed

	@@ -1 +1,15 @@
1	"""Unit tests for orchestrator module."""


1	"""Unit tests for orchestrator module."""
2	+
3	+
4	+
5	+
6	+
7	+
8	+
9	+
10	+
11	+
12	+
13	+
14	+
15	+