Spaces:
Running
Running
Commit
·
15459e9
1
Parent(s):
7057335
fix(phase5): align configs and add agent tests
Browse files- Add PLR0912, PLC0415 to ruff ignore list for complex orchestrator logic
- Disable warn_unused_ignores in mypy (vendored lib needs type: ignore)
- Fix single ChatMessage input handling in SearchAgent/JudgeAgent
- Add comprehensive unit tests for SearchAgent and JudgeAgent wrappers
- All 38 tests pass, lint and mypy clean, pre-commit aligned
- pyproject.toml +3 -1
- src/agents/judge_agent.py +2 -0
- src/agents/search_agent.py +2 -0
- src/orchestrator_magentic.py +1 -1
- tests/unit/agents/__init__.py +0 -0
- tests/unit/agents/test_judge_agent.py +124 -0
- tests/unit/agents/test_search_agent.py +81 -0
pyproject.toml
CHANGED
|
@@ -76,6 +76,8 @@ select = [
|
|
| 76 |
]
|
| 77 |
ignore = [
|
| 78 |
"PLR0913", # Too many arguments (agents need many params)
|
|
|
|
|
|
|
| 79 |
]
|
| 80 |
|
| 81 |
[tool.ruff.lint.isort]
|
|
@@ -88,7 +90,7 @@ strict = true
|
|
| 88 |
ignore_missing_imports = true
|
| 89 |
disallow_untyped_defs = true
|
| 90 |
warn_return_any = true
|
| 91 |
-
warn_unused_ignores =
|
| 92 |
|
| 93 |
# ============== PYTEST CONFIG ==============
|
| 94 |
[tool.pytest.ini_options]
|
|
|
|
| 76 |
]
|
| 77 |
ignore = [
|
| 78 |
"PLR0913", # Too many arguments (agents need many params)
|
| 79 |
+
"PLR0912", # Too many branches (complex orchestrator logic)
|
| 80 |
+
"PLC0415", # Lazy imports for optional dependencies
|
| 81 |
]
|
| 82 |
|
| 83 |
[tool.ruff.lint.isort]
|
|
|
|
| 90 |
ignore_missing_imports = true
|
| 91 |
disallow_untyped_defs = true
|
| 92 |
warn_return_any = true
|
| 93 |
+
warn_unused_ignores = false
|
| 94 |
|
| 95 |
# ============== PYTEST CONFIG ==============
|
| 96 |
[tool.pytest.ini_options]
|
src/agents/judge_agent.py
CHANGED
|
@@ -51,6 +51,8 @@ class JudgeAgent(BaseAgent): # type: ignore[misc]
|
|
| 51 |
break
|
| 52 |
elif isinstance(messages, str):
|
| 53 |
question = messages
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# Get evidence from shared store
|
| 56 |
evidence = self._evidence_store.get("current", [])
|
|
|
|
| 51 |
break
|
| 52 |
elif isinstance(messages, str):
|
| 53 |
question = messages
|
| 54 |
+
elif isinstance(messages, ChatMessage) and messages.text:
|
| 55 |
+
question = messages.text
|
| 56 |
|
| 57 |
# Get evidence from shared store
|
| 58 |
evidence = self._evidence_store.get("current", [])
|
src/agents/search_agent.py
CHANGED
|
@@ -49,6 +49,8 @@ class SearchAgent(BaseAgent): # type: ignore[misc]
|
|
| 49 |
break
|
| 50 |
elif isinstance(messages, str):
|
| 51 |
query = messages
|
|
|
|
|
|
|
| 52 |
|
| 53 |
if not query:
|
| 54 |
return AgentRunResponse(
|
|
|
|
| 49 |
break
|
| 50 |
elif isinstance(messages, str):
|
| 51 |
query = messages
|
| 52 |
+
elif isinstance(messages, ChatMessage) and messages.text:
|
| 53 |
+
query = messages.text
|
| 54 |
|
| 55 |
if not query:
|
| 56 |
return AgentRunResponse(
|
src/orchestrator_magentic.py
CHANGED
|
@@ -40,7 +40,7 @@ class MagenticOrchestrator:
|
|
| 40 |
self._max_rounds = max_rounds
|
| 41 |
self._evidence_store: dict[str, list[Evidence]] = {"current": []}
|
| 42 |
|
| 43 |
-
async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
|
| 44 |
"""
|
| 45 |
Run the Magentic workflow - same API as simple Orchestrator.
|
| 46 |
|
|
|
|
| 40 |
self._max_rounds = max_rounds
|
| 41 |
self._evidence_store: dict[str, list[Evidence]] = {"current": []}
|
| 42 |
|
| 43 |
+
async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
|
| 44 |
"""
|
| 45 |
Run the Magentic workflow - same API as simple Orchestrator.
|
| 46 |
|
tests/unit/agents/__init__.py
ADDED
|
File without changes
|
tests/unit/agents/test_judge_agent.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Unit tests for JudgeAgent."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import AsyncMock
|
| 4 |
+
|
| 5 |
+
import pytest
|
| 6 |
+
from agent_framework import ChatMessage, Role
|
| 7 |
+
|
| 8 |
+
from src.agents.judge_agent import JudgeAgent
|
| 9 |
+
from src.utils.models import AssessmentDetails, Citation, Evidence, JudgeAssessment
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@pytest.fixture
|
| 13 |
+
def mock_assessment() -> JudgeAssessment:
|
| 14 |
+
"""Create a mock JudgeAssessment."""
|
| 15 |
+
return JudgeAssessment(
|
| 16 |
+
details=AssessmentDetails(
|
| 17 |
+
mechanism_score=8,
|
| 18 |
+
mechanism_reasoning="Strong mechanism evidence",
|
| 19 |
+
clinical_evidence_score=7,
|
| 20 |
+
clinical_reasoning="Good clinical data",
|
| 21 |
+
drug_candidates=["Metformin"],
|
| 22 |
+
key_findings=["Key finding 1"],
|
| 23 |
+
),
|
| 24 |
+
sufficient=True,
|
| 25 |
+
confidence=0.85,
|
| 26 |
+
recommendation="synthesize",
|
| 27 |
+
next_search_queries=[],
|
| 28 |
+
reasoning="Evidence is sufficient for synthesis",
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@pytest.fixture
|
| 33 |
+
def mock_handler(mock_assessment: JudgeAssessment) -> AsyncMock:
|
| 34 |
+
"""Mock judge handler."""
|
| 35 |
+
handler = AsyncMock()
|
| 36 |
+
handler.assess.return_value = mock_assessment
|
| 37 |
+
return handler
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@pytest.fixture
|
| 41 |
+
def sample_evidence() -> list[Evidence]:
|
| 42 |
+
"""Sample evidence for tests."""
|
| 43 |
+
return [
|
| 44 |
+
Evidence(
|
| 45 |
+
content="Test content",
|
| 46 |
+
citation=Citation(
|
| 47 |
+
source="pubmed",
|
| 48 |
+
title="Test Title",
|
| 49 |
+
url="http://test.com",
|
| 50 |
+
date="2023",
|
| 51 |
+
),
|
| 52 |
+
)
|
| 53 |
+
]
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@pytest.mark.asyncio
|
| 57 |
+
async def test_run_assesses_evidence(
|
| 58 |
+
mock_handler: AsyncMock,
|
| 59 |
+
sample_evidence: list[Evidence],
|
| 60 |
+
) -> None:
|
| 61 |
+
"""Test that run assesses evidence from store."""
|
| 62 |
+
store: dict = {"current": sample_evidence}
|
| 63 |
+
agent = JudgeAgent(mock_handler, store)
|
| 64 |
+
|
| 65 |
+
response = await agent.run("test question")
|
| 66 |
+
|
| 67 |
+
# Check handler called with evidence from store
|
| 68 |
+
mock_handler.assess.assert_awaited_once()
|
| 69 |
+
call_args = mock_handler.assess.call_args
|
| 70 |
+
assert call_args[0][0] == "test question"
|
| 71 |
+
assert call_args[0][1] == sample_evidence
|
| 72 |
+
|
| 73 |
+
# Check response
|
| 74 |
+
assert response.messages[0].role == Role.ASSISTANT
|
| 75 |
+
assert "synthesize" in response.messages[0].text
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
@pytest.mark.asyncio
|
| 79 |
+
async def test_run_handles_chat_message_input(
|
| 80 |
+
mock_handler: AsyncMock,
|
| 81 |
+
sample_evidence: list[Evidence],
|
| 82 |
+
) -> None:
|
| 83 |
+
"""Test that run handles ChatMessage input."""
|
| 84 |
+
store: dict = {"current": sample_evidence}
|
| 85 |
+
agent = JudgeAgent(mock_handler, store)
|
| 86 |
+
|
| 87 |
+
message = ChatMessage(role=Role.USER, text="test question")
|
| 88 |
+
await agent.run(message)
|
| 89 |
+
|
| 90 |
+
mock_handler.assess.assert_awaited_once()
|
| 91 |
+
assert mock_handler.assess.call_args[0][0] == "test question"
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
@pytest.mark.asyncio
|
| 95 |
+
async def test_run_handles_list_input(
|
| 96 |
+
mock_handler: AsyncMock,
|
| 97 |
+
sample_evidence: list[Evidence],
|
| 98 |
+
) -> None:
|
| 99 |
+
"""Test that run handles list of messages."""
|
| 100 |
+
store: dict = {"current": sample_evidence}
|
| 101 |
+
agent = JudgeAgent(mock_handler, store)
|
| 102 |
+
|
| 103 |
+
messages = [
|
| 104 |
+
ChatMessage(role=Role.SYSTEM, text="sys"),
|
| 105 |
+
ChatMessage(role=Role.USER, text="test question"),
|
| 106 |
+
]
|
| 107 |
+
await agent.run(messages)
|
| 108 |
+
|
| 109 |
+
mock_handler.assess.assert_awaited_once()
|
| 110 |
+
assert mock_handler.assess.call_args[0][0] == "test question"
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
@pytest.mark.asyncio
|
| 114 |
+
async def test_run_uses_empty_evidence_when_store_empty(
|
| 115 |
+
mock_handler: AsyncMock,
|
| 116 |
+
) -> None:
|
| 117 |
+
"""Test that run works with empty evidence store."""
|
| 118 |
+
store: dict = {"current": []}
|
| 119 |
+
agent = JudgeAgent(mock_handler, store)
|
| 120 |
+
|
| 121 |
+
await agent.run("test")
|
| 122 |
+
|
| 123 |
+
mock_handler.assess.assert_awaited_once()
|
| 124 |
+
assert mock_handler.assess.call_args[0][1] == []
|
tests/unit/agents/test_search_agent.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Unit tests for SearchAgent."""
|
| 2 |
+
|
| 3 |
+
from unittest.mock import AsyncMock
|
| 4 |
+
|
| 5 |
+
import pytest
|
| 6 |
+
from agent_framework import ChatMessage, Role
|
| 7 |
+
|
| 8 |
+
from src.agents.search_agent import SearchAgent
|
| 9 |
+
from src.utils.models import Citation, Evidence, SearchResult
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@pytest.fixture
|
| 13 |
+
def mock_handler() -> AsyncMock:
|
| 14 |
+
"""Mock search handler."""
|
| 15 |
+
handler = AsyncMock()
|
| 16 |
+
handler.execute.return_value = SearchResult(
|
| 17 |
+
query="test query",
|
| 18 |
+
evidence=[
|
| 19 |
+
Evidence(
|
| 20 |
+
content="test content",
|
| 21 |
+
citation=Citation(
|
| 22 |
+
source="pubmed",
|
| 23 |
+
title="Test Title",
|
| 24 |
+
url="http://test.com",
|
| 25 |
+
date="2023",
|
| 26 |
+
authors=["Author A"],
|
| 27 |
+
),
|
| 28 |
+
relevance=1.0,
|
| 29 |
+
)
|
| 30 |
+
],
|
| 31 |
+
sources_searched=["pubmed"],
|
| 32 |
+
total_found=1,
|
| 33 |
+
)
|
| 34 |
+
return handler
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@pytest.mark.asyncio
|
| 38 |
+
async def test_run_executes_search(mock_handler: AsyncMock) -> None:
|
| 39 |
+
"""Test that run executes search and updates evidence store."""
|
| 40 |
+
store: dict = {"current": []}
|
| 41 |
+
agent = SearchAgent(mock_handler, store)
|
| 42 |
+
|
| 43 |
+
response = await agent.run("test query")
|
| 44 |
+
|
| 45 |
+
# Check handler called
|
| 46 |
+
mock_handler.execute.assert_awaited_once_with("test query", max_results_per_tool=10)
|
| 47 |
+
|
| 48 |
+
# Check store updated
|
| 49 |
+
assert len(store["current"]) == 1
|
| 50 |
+
assert store["current"][0].content == "test content"
|
| 51 |
+
|
| 52 |
+
# Check response
|
| 53 |
+
assert response.messages[0].role == Role.ASSISTANT
|
| 54 |
+
assert "Found 1 sources" in response.messages[0].text
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
@pytest.mark.asyncio
|
| 58 |
+
async def test_run_handles_chat_message_input(mock_handler: AsyncMock) -> None:
|
| 59 |
+
"""Test that run handles ChatMessage input."""
|
| 60 |
+
store: dict = {"current": []}
|
| 61 |
+
agent = SearchAgent(mock_handler, store)
|
| 62 |
+
|
| 63 |
+
message = ChatMessage(role=Role.USER, text="test query")
|
| 64 |
+
await agent.run(message)
|
| 65 |
+
|
| 66 |
+
mock_handler.execute.assert_awaited_once_with("test query", max_results_per_tool=10)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
@pytest.mark.asyncio
|
| 70 |
+
async def test_run_handles_list_input(mock_handler: AsyncMock) -> None:
|
| 71 |
+
"""Test that run handles list of messages."""
|
| 72 |
+
store: dict = {"current": []}
|
| 73 |
+
agent = SearchAgent(mock_handler, store)
|
| 74 |
+
|
| 75 |
+
messages = [
|
| 76 |
+
ChatMessage(role=Role.SYSTEM, text="sys"),
|
| 77 |
+
ChatMessage(role=Role.USER, text="test query"),
|
| 78 |
+
]
|
| 79 |
+
await agent.run(messages)
|
| 80 |
+
|
| 81 |
+
mock_handler.execute.assert_awaited_once_with("test query", max_results_per_tool=10)
|