Spaces:
Sleeping
Sleeping
meirk-brd
commited on
Commit
·
859566a
1
Parent(s):
173b4ae
Fix: Read file content when smolagents sends URL as file upload
Browse files
tool.py
CHANGED
|
@@ -231,7 +231,17 @@ class BrightDataDatasetTool(Tool):
|
|
| 231 |
return self._extract_url_from_text(raw)
|
| 232 |
|
| 233 |
if isinstance(raw, dict):
|
| 234 |
-
# Check
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
url_value = raw.get("url")
|
| 236 |
if isinstance(url_value, str):
|
| 237 |
if url_value.startswith(("http://", "https://")):
|
|
|
|
| 231 |
return self._extract_url_from_text(raw)
|
| 232 |
|
| 233 |
if isinstance(raw, dict):
|
| 234 |
+
# Check if this is a Gradio FileData with a path to read
|
| 235 |
+
path_value = raw.get("path")
|
| 236 |
+
if isinstance(path_value, str) and os.path.isfile(path_value):
|
| 237 |
+
# Read the file content (smolagents writes URL as file content)
|
| 238 |
+
file_content = self._read_text_file(path_value)
|
| 239 |
+
if file_content:
|
| 240 |
+
extracted = self._extract_url_from_text(file_content)
|
| 241 |
+
if extracted:
|
| 242 |
+
return extracted
|
| 243 |
+
|
| 244 |
+
# Check for direct url field (common in Gradio FileData from smolagents)
|
| 245 |
url_value = raw.get("url")
|
| 246 |
if isinstance(url_value, str):
|
| 247 |
if url_value.startswith(("http://", "https://")):
|