Spaces:
Running
Running
Your Name
Claude
commited on
Commit
·
b2b0c37
1
Parent(s):
8df8093
Fix entity extraction parsing to handle brackets and quotes
Browse filesCRITICAL FIX: Entity extraction was returning empty results
Issue: New comprehensive prompt returns lists with brackets like:
DRUGS: [Ianalumab, VAY736, ...]
But old parser expected: DRUGS: Ianalumab, VAY736
Fix:
- Strip brackets: drugs.strip('[]')
- Strip quotes: .strip('"\'')
- Filter empty values: if d.strip()
- Handle missing SEARCH_TERMS with fallback
This fixes the bug where "Ianalumab for Sjogren's" returned 0 entities
and searched irrelevant trials (vaping, HIV, etc.)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- foundation_engine.py +16 -9
foundation_engine.py
CHANGED
|
@@ -1118,22 +1118,29 @@ Be expansive - more synonyms mean better trial matching."""
|
|
| 1118 |
line = line.strip()
|
| 1119 |
if line.startswith('DRUGS:'):
|
| 1120 |
drugs = line.replace('DRUGS:', '').strip()
|
| 1121 |
-
if
|
| 1122 |
-
|
|
|
|
|
|
|
| 1123 |
elif line.startswith('DISEASES:'):
|
| 1124 |
diseases = line.replace('DISEASES:', '').strip()
|
| 1125 |
-
|
| 1126 |
-
|
|
|
|
| 1127 |
elif line.startswith('COMPANIES:'):
|
| 1128 |
companies = line.replace('COMPANIES:', '').strip()
|
| 1129 |
-
|
| 1130 |
-
|
|
|
|
| 1131 |
elif line.startswith('ENDPOINTS:'):
|
| 1132 |
endpoints = line.replace('ENDPOINTS:', '').strip()
|
| 1133 |
-
|
| 1134 |
-
|
|
|
|
| 1135 |
elif line.startswith('SEARCH_TERMS:'):
|
| 1136 |
-
|
|
|
|
|
|
|
| 1137 |
|
| 1138 |
logger.info(f"[QUERY PARSER] ✓ Drugs: {result['drugs']}, Diseases: {result['diseases']}, Companies: {result['companies']}")
|
| 1139 |
return result
|
|
|
|
| 1118 |
line = line.strip()
|
| 1119 |
if line.startswith('DRUGS:'):
|
| 1120 |
drugs = line.replace('DRUGS:', '').strip()
|
| 1121 |
+
# Remove brackets if present: [item1, item2] → item1, item2
|
| 1122 |
+
drugs = drugs.strip('[]')
|
| 1123 |
+
if drugs and drugs.lower() != 'none':
|
| 1124 |
+
result['drugs'] = [d.strip().strip('"\'') for d in drugs.split(',') if d.strip()]
|
| 1125 |
elif line.startswith('DISEASES:'):
|
| 1126 |
diseases = line.replace('DISEASES:', '').strip()
|
| 1127 |
+
diseases = diseases.strip('[]')
|
| 1128 |
+
if diseases and diseases.lower() != 'none':
|
| 1129 |
+
result['diseases'] = [d.strip().strip('"\'') for d in diseases.split(',') if d.strip()]
|
| 1130 |
elif line.startswith('COMPANIES:'):
|
| 1131 |
companies = line.replace('COMPANIES:', '').strip()
|
| 1132 |
+
companies = companies.strip('[]')
|
| 1133 |
+
if companies and companies.lower() != 'none':
|
| 1134 |
+
result['companies'] = [c.strip().strip('"\'') for c in companies.split(',') if c.strip()]
|
| 1135 |
elif line.startswith('ENDPOINTS:'):
|
| 1136 |
endpoints = line.replace('ENDPOINTS:', '').strip()
|
| 1137 |
+
endpoints = endpoints.strip('[]')
|
| 1138 |
+
if endpoints and endpoints.lower() != 'none':
|
| 1139 |
+
result['endpoints'] = [e.strip().strip('"\'') for e in endpoints.split(',') if e.strip()]
|
| 1140 |
elif line.startswith('SEARCH_TERMS:'):
|
| 1141 |
+
terms = line.replace('SEARCH_TERMS:', '').strip()
|
| 1142 |
+
terms = terms.strip('[]')
|
| 1143 |
+
result['search_terms'] = terms if terms else query
|
| 1144 |
|
| 1145 |
logger.info(f"[QUERY PARSER] ✓ Drugs: {result['drugs']}, Diseases: {result['diseases']}, Companies: {result['companies']}")
|
| 1146 |
return result
|