VibecoderMcSwaggins commited on
Commit
d55440b
·
1 Parent(s): e502f0d

feat(search): implement ClinicalTrials.gov filtering (Phase 03)

Browse files

Added filters for Interventional studies and Active/Completed status to ClinicalTrials tool. This removes noise from observational or terminated studies.

src/tools/clinicaltrials.py CHANGED
@@ -19,6 +19,8 @@ class ClinicalTrialsTool:
19
  """
20
 
21
  BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
 
 
22
  FIELDS: ClassVar[list[str]] = [
23
  "NCTId",
24
  "BriefTitle",
@@ -30,6 +32,12 @@ class ClinicalTrialsTool:
30
  "BriefSummary",
31
  ]
32
 
 
 
 
 
 
 
33
  @property
34
  def name(self) -> str:
35
  return "clinicaltrials"
@@ -40,7 +48,7 @@ class ClinicalTrialsTool:
40
  reraise=True,
41
  )
42
  async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
43
- """Search ClinicalTrials.gov for studies.
44
 
45
  Args:
46
  query: Search query (e.g., "metformin alzheimer")
@@ -49,10 +57,16 @@ class ClinicalTrialsTool:
49
  Returns:
50
  List of Evidence objects from clinical trials
51
  """
52
- params: dict[str, str | int] = {
53
- "query.term": query,
 
 
 
 
54
  "pageSize": min(max_results, 100),
55
- "fields": "|".join(self.FIELDS),
 
 
56
  }
57
 
58
  try:
 
19
  """
20
 
21
  BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
22
+
23
+ # Fields to retrieve
24
  FIELDS: ClassVar[list[str]] = [
25
  "NCTId",
26
  "BriefTitle",
 
32
  "BriefSummary",
33
  ]
34
 
35
+ # Status filter: Only active/completed studies with potential data
36
+ STATUS_FILTER = "COMPLETED,ACTIVE_NOT_RECRUITING,RECRUITING,ENROLLING_BY_INVITATION"
37
+
38
+ # Study type filter: Only interventional (drug/treatment studies)
39
+ STUDY_TYPE_FILTER = "INTERVENTIONAL"
40
+
41
  @property
42
  def name(self) -> str:
43
  return "clinicaltrials"
 
48
  reraise=True,
49
  )
50
  async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
51
+ """Search ClinicalTrials.gov for interventional studies.
52
 
53
  Args:
54
  query: Search query (e.g., "metformin alzheimer")
 
57
  Returns:
58
  List of Evidence objects from clinical trials
59
  """
60
+ # Add study type filter to query string (parameter is not supported)
61
+ # AREA[StudyType]INTERVENTIONAL restricts to interventional studies
62
+ final_query = f"{query} AND AREA[StudyType]INTERVENTIONAL"
63
+
64
+ params: dict[str, Any] = {
65
+ "query.term": final_query,
66
  "pageSize": min(max_results, 100),
67
+ "fields": ",".join(self.FIELDS),
68
+ # FILTERS - Only active/completed studies
69
+ "filter.overallStatus": self.STATUS_FILTER,
70
  }
71
 
72
  try:
tests/unit/tools/test_clinicaltrials.py CHANGED
@@ -1,146 +1,152 @@
1
  """Unit tests for ClinicalTrials.gov tool."""
2
 
3
- from collections.abc import Generator
4
- from typing import Any
5
  from unittest.mock import MagicMock, patch
6
 
7
  import pytest
8
- import requests
9
 
10
  from src.tools.clinicaltrials import ClinicalTrialsTool
11
- from src.utils.exceptions import SearchError
12
  from src.utils.models import Evidence
13
 
14
 
15
- @pytest.fixture
16
- def mock_clinicaltrials_response() -> dict[str, Any]:
17
- """Mock ClinicalTrials.gov API response."""
18
- return {
19
- "studies": [
20
- {
21
- "protocolSection": {
22
- "identificationModule": {
23
- "nctId": "NCT04098666",
24
- "briefTitle": "Metformin in Alzheimer's Dementia Prevention",
25
- },
26
- "statusModule": {
27
- "overallStatus": "Recruiting",
28
- "startDateStruct": {"date": "2020-01-15"},
29
- },
30
- "descriptionModule": {
31
- "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
32
- },
33
- "designModule": {"phases": ["PHASE2"]},
34
- "conditionsModule": {"conditions": ["Alzheimer Disease", "Dementia"]},
35
- "armsInterventionsModule": {
36
- "interventions": [{"name": "Metformin", "type": "Drug"}]
37
- },
38
- }
39
- }
40
- ]
41
- }
42
 
 
 
43
 
44
- @pytest.fixture
45
- def mock_requests_get(
46
- mock_clinicaltrials_response: dict[str, Any],
47
- ) -> Generator[MagicMock, None, None]:
48
- """Fixture to mock requests.get with a successful response."""
49
- with patch("src.tools.clinicaltrials.requests.get") as mock_get:
50
  mock_response = MagicMock()
51
- mock_response.json.return_value = mock_clinicaltrials_response
52
  mock_response.raise_for_status = MagicMock()
53
- mock_get.return_value = mock_response
54
- yield mock_get
55
 
 
 
56
 
57
- class TestClinicalTrialsTool:
58
- """Tests for ClinicalTrialsTool."""
59
-
60
- def test_tool_name(self) -> None:
61
- """Tool should have correct name."""
62
- tool = ClinicalTrialsTool()
63
- assert tool.name == "clinicaltrials"
64
 
65
- @pytest.mark.asyncio
66
- async def test_search_returns_evidence(self, mock_requests_get: MagicMock) -> None:
67
- """Search should return Evidence objects."""
68
- tool = ClinicalTrialsTool()
69
- results = await tool.search("metformin alzheimer", max_results=5)
70
 
71
- assert len(results) == 1
72
- assert isinstance(results[0], Evidence)
73
- assert results[0].citation.source == "clinicaltrials"
74
- assert "NCT04098666" in results[0].citation.url
75
- assert "Metformin" in results[0].citation.title
76
 
77
  @pytest.mark.asyncio
78
- async def test_search_extracts_phase(self, mock_requests_get: MagicMock) -> None:
79
- """Search should extract trial phase."""
80
- tool = ClinicalTrialsTool()
81
- results = await tool.search("metformin alzheimer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- assert "PHASE2" in results[0].content
 
 
84
 
85
- @pytest.mark.asyncio
86
- async def test_search_extracts_status(self, mock_requests_get: MagicMock) -> None:
87
- """Search should extract trial status."""
88
- tool = ClinicalTrialsTool()
89
- results = await tool.search("metformin alzheimer")
90
 
91
- assert "Recruiting" in results[0].content
 
 
 
92
 
93
  @pytest.mark.asyncio
94
- async def test_search_empty_results(self) -> None:
95
- """Search should handle empty results gracefully."""
96
- with patch("src.tools.clinicaltrials.requests.get") as mock_get:
97
- mock_response = MagicMock()
98
- mock_response.json.return_value = {"studies": []}
99
- mock_response.raise_for_status = MagicMock()
100
- mock_get.return_value = mock_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- tool = ClinicalTrialsTool()
103
- results = await tool.search("nonexistent query xyz")
 
104
 
105
- assert results == []
 
106
 
107
- @pytest.mark.asyncio
108
- async def test_search_api_error(self) -> None:
109
- """Search should raise SearchError on API failure.
110
 
111
- Note: We patch the retry decorator to avoid 3x backoff delay in tests.
112
- """
113
- with patch("src.tools.clinicaltrials.requests.get") as mock_get:
114
- mock_response = MagicMock()
115
- mock_response.raise_for_status.side_effect = requests.HTTPError("500 Server Error")
116
- mock_get.return_value = mock_response
117
-
118
- tool = ClinicalTrialsTool()
119
- # Patch the retry decorator's stop condition to fail immediately
120
- tool.search.retry.stop = lambda _: True # type: ignore[attr-defined]
121
 
122
- with pytest.raises(SearchError):
123
- await tool.search("metformin alzheimer")
 
124
 
125
 
 
126
  class TestClinicalTrialsIntegration:
127
- """Integration tests (marked for separate run)."""
128
 
129
- @pytest.mark.integration
130
  @pytest.mark.asyncio
131
- async def test_real_api_call(self) -> None:
132
- """Test actual API call (requires network)."""
133
- # Skip at runtime if API unreachable (avoids network call at collection time)
134
- try:
135
- resp = requests.get("https://clinicaltrials.gov/api/v2/studies", timeout=5)
136
- if resp.status_code >= 500:
137
- pytest.skip("ClinicalTrials.gov API not reachable (server error)")
138
- except (requests.RequestException, OSError):
139
- pytest.skip("ClinicalTrials.gov API not reachable (network/SSL issue)")
140
-
141
  tool = ClinicalTrialsTool()
142
- results = await tool.search("metformin diabetes", max_results=3)
143
 
 
144
  assert len(results) > 0
145
- assert all(isinstance(r, Evidence) for r in results)
146
- assert all(r.citation.source == "clinicaltrials" for r in results)
 
 
 
 
 
 
 
 
 
1
  """Unit tests for ClinicalTrials.gov tool."""
2
 
 
 
3
  from unittest.mock import MagicMock, patch
4
 
5
  import pytest
 
6
 
7
  from src.tools.clinicaltrials import ClinicalTrialsTool
 
8
  from src.utils.models import Evidence
9
 
10
 
11
+ @pytest.mark.unit
12
+ class TestClinicalTrialsTool:
13
+ """Tests for ClinicalTrialsTool."""
14
+
15
+ @pytest.fixture
16
+ def tool(self):
17
+ return ClinicalTrialsTool()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ def test_tool_name(self, tool):
20
+ assert tool.name == "clinicaltrials"
21
 
22
+ @pytest.mark.asyncio
23
+ async def test_search_uses_filters(self, tool):
24
+ """Test that search applies status and type filters."""
 
 
 
25
  mock_response = MagicMock()
26
+ mock_response.json.return_value = {"studies": []}
27
  mock_response.raise_for_status = MagicMock()
 
 
28
 
29
+ with patch("requests.get", return_value=mock_response) as mock_get:
30
+ await tool.search("test query", max_results=5)
31
 
32
+ # Verify filters were applied
33
+ call_args = mock_get.call_args
34
+ params = call_args.kwargs.get("params", call_args[1].get("params", {}))
 
 
 
 
35
 
36
+ # Should filter for active/completed studies
37
+ assert "filter.overallStatus" in params
38
+ assert "COMPLETED" in params["filter.overallStatus"]
39
+ assert "RECRUITING" in params["filter.overallStatus"]
 
40
 
41
+ # Should filter for interventional studies via query term
42
+ assert "AREA[StudyType]INTERVENTIONAL" in params["query.term"]
43
+ assert "filter.studyType" not in params
 
 
44
 
45
  @pytest.mark.asyncio
46
+ async def test_search_returns_evidence(self, tool):
47
+ """Test that search returns Evidence objects."""
48
+ mock_study = {
49
+ "protocolSection": {
50
+ "identificationModule": {
51
+ "nctId": "NCT12345678",
52
+ "briefTitle": "Metformin for Long COVID Treatment",
53
+ },
54
+ "statusModule": {
55
+ "overallStatus": "COMPLETED",
56
+ "startDateStruct": {"date": "2023-01-01"},
57
+ },
58
+ "descriptionModule": {
59
+ "briefSummary": "A study examining metformin for Long COVID symptoms.",
60
+ },
61
+ "designModule": {
62
+ "phases": ["PHASE2", "PHASE3"],
63
+ },
64
+ "conditionsModule": {
65
+ "conditions": ["Long COVID", "PASC"],
66
+ },
67
+ "armsInterventionsModule": {
68
+ "interventions": [{"name": "Metformin"}],
69
+ },
70
+ }
71
+ }
72
 
73
+ mock_response = MagicMock()
74
+ mock_response.json.return_value = {"studies": [mock_study]}
75
+ mock_response.raise_for_status = MagicMock()
76
 
77
+ with patch("requests.get", return_value=mock_response):
78
+ results = await tool.search("long covid metformin", max_results=5)
 
 
 
79
 
80
+ assert len(results) == 1
81
+ assert isinstance(results[0], Evidence)
82
+ assert "Metformin" in results[0].citation.title
83
+ assert "PHASE2" in results[0].content or "Phase" in results[0].content
84
 
85
  @pytest.mark.asyncio
86
+ async def test_search_includes_phase_info(self, tool):
87
+ """Test that phase information is included in content."""
88
+ mock_study = {
89
+ "protocolSection": {
90
+ "identificationModule": {
91
+ "nctId": "NCT12345678",
92
+ "briefTitle": "Test Study",
93
+ },
94
+ "statusModule": {
95
+ "overallStatus": "RECRUITING",
96
+ "startDateStruct": {"date": "2024-01-01"},
97
+ },
98
+ "descriptionModule": {
99
+ "briefSummary": "Test summary.",
100
+ },
101
+ "designModule": {
102
+ "phases": ["PHASE3"],
103
+ },
104
+ "conditionsModule": {"conditions": ["Test"]},
105
+ "armsInterventionsModule": {"interventions": []},
106
+ }
107
+ }
108
 
109
+ mock_response = MagicMock()
110
+ mock_response.json.return_value = {"studies": [mock_study]}
111
+ mock_response.raise_for_status = MagicMock()
112
 
113
+ with patch("requests.get", return_value=mock_response):
114
+ results = await tool.search("test", max_results=5)
115
 
116
+ # Phase should be in content
117
+ assert "PHASE3" in results[0].content or "Phase 3" in results[0].content
 
118
 
119
+ @pytest.mark.asyncio
120
+ async def test_search_empty_results(self, tool):
121
+ """Test handling of empty results."""
122
+ mock_response = MagicMock()
123
+ mock_response.json.return_value = {"studies": []}
124
+ mock_response.raise_for_status = MagicMock()
 
 
 
 
125
 
126
+ with patch("requests.get", return_value=mock_response):
127
+ results = await tool.search("nonexistent xyz 12345", max_results=5)
128
+ assert results == []
129
 
130
 
131
+ @pytest.mark.integration
132
  class TestClinicalTrialsIntegration:
133
+ """Integration tests with real API."""
134
 
 
135
  @pytest.mark.asyncio
136
+ async def test_real_api_returns_interventional(self):
137
+ """Test that real API returns interventional studies."""
 
 
 
 
 
 
 
 
138
  tool = ClinicalTrialsTool()
139
+ results = await tool.search("long covid treatment", max_results=3)
140
 
141
+ # Should get results
142
  assert len(results) > 0
143
+
144
+ # Results should mention interventions or treatments
145
+ all_content = " ".join([r.content.lower() for r in results])
146
+ has_intervention = (
147
+ "intervention" in all_content
148
+ or "treatment" in all_content
149
+ or "drug" in all_content
150
+ or "phase" in all_content
151
+ )
152
+ assert has_intervention