Spaces:
Running
Running
Joseph Pollack
commited on
Commit
·
60212b7
1
Parent(s):
f73c49f
adds improvements
Browse files- README.md +11 -4
- dev/__init__.py +0 -1
- src/app.py +0 -1
- src/middleware/state_machine.py +0 -6
- src/services/report_file_service.py +11 -7
- src/tools/searchxng_web_search.py +0 -8
- src/tools/serper_web_search.py +0 -8
- src/tools/vendored/crawl_website.py +0 -1
- src/tools/vendored/searchxng_client.py +2 -8
- src/tools/vendored/serper_client.py +2 -8
- src/tools/vendored/web_search_core.py +4 -12
- src/tools/web_search_factory.py +2 -11
README.md
CHANGED
|
@@ -79,9 +79,12 @@ For this hackathon we're proposing a simple yet powerful Deep Research Agent tha
|
|
| 79 |
## Deep Critical In the Medial
|
| 80 |
|
| 81 |
- Social Medial Posts about Deep Critical :
|
| 82 |
-
- []
|
| 83 |
-
- []
|
| 84 |
-
-
|
|
|
|
|
|
|
|
|
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
@@ -123,10 +126,14 @@ For this hackathon we're proposing a simple yet powerful Deep Research Agent tha
|
|
| 123 |
- 🤗 [HuggingFace](https://huggingface.co/SeasonalFall84)
|
| 124 |
- 💼 [LinkedIn](https://www.linkedin.com/in/mario-aderman/)
|
| 125 |
- 𝕏 [X](https://x.com/marioaderman)
|
| 126 |
-
- **Joseph Pollack
|
| 127 |
- 🤗 [HuggingFace](https://huggingface.co/Tonic)
|
| 128 |
- 💼 [LinkedIn](https://www.linkedin.com/in/josephpollack/)
|
| 129 |
- 𝕏 [X](https://x.com/josephpollack)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
## Acknowledgements
|
|
|
|
| 79 |
## Deep Critical In the Medial
|
| 80 |
|
| 81 |
- Social Medial Posts about Deep Critical :
|
| 82 |
+
- 𝕏 []
|
| 83 |
+
- 💼 []
|
| 84 |
+
- 𝕏 []
|
| 85 |
+
x profile: https://x.com/viratzzs/
|
| 86 |
+
linkedin: https://www.linkedin.com/in/viratchauhan/
|
| 87 |
+
hf: https://huggingface.co/ViratChauhan
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
| 126 |
- 🤗 [HuggingFace](https://huggingface.co/SeasonalFall84)
|
| 127 |
- 💼 [LinkedIn](https://www.linkedin.com/in/mario-aderman/)
|
| 128 |
- 𝕏 [X](https://x.com/marioaderman)
|
| 129 |
+
- **Joseph Pollack**
|
| 130 |
- 🤗 [HuggingFace](https://huggingface.co/Tonic)
|
| 131 |
- 💼 [LinkedIn](https://www.linkedin.com/in/josephpollack/)
|
| 132 |
- 𝕏 [X](https://x.com/josephpollack)
|
| 133 |
+
- **Virat Chauran**
|
| 134 |
+
- 𝕏 [X](https://x.com/viratzzs/)
|
| 135 |
+
- 💼 [LinkedIn](https://www.linkedin.com/in/viratchauhan/)
|
| 136 |
+
- 🤗 [HuggingFace](https://huggingface.co/ViratChauhan)
|
| 137 |
|
| 138 |
|
| 139 |
## Acknowledgements
|
dev/__init__.py
CHANGED
|
@@ -1,2 +1 @@
|
|
| 1 |
"""Development utilities and plugins."""
|
| 2 |
-
|
|
|
|
| 1 |
"""Development utilities and plugins."""
|
|
|
src/app.py
CHANGED
|
@@ -925,7 +925,6 @@ def create_demo() -> gr.Blocks:
|
|
| 925 |
gr.ChatInterface(
|
| 926 |
fn=research_agent,
|
| 927 |
multimodal=True, # Enable multimodal input (text + images + audio)
|
| 928 |
-
file_types=["image", "audio", "video"], # Explicitly enable image, audio, and video file types
|
| 929 |
title="🔬 The DETERMINATOR",
|
| 930 |
description=(
|
| 931 |
"*Generalist Deep Research Agent — stops at nothing until finding precise answers to complex questions*\n\n"
|
|
|
|
| 925 |
gr.ChatInterface(
|
| 926 |
fn=research_agent,
|
| 927 |
multimodal=True, # Enable multimodal input (text + images + audio)
|
|
|
|
| 928 |
title="🔬 The DETERMINATOR",
|
| 929 |
description=(
|
| 930 |
"*Generalist Deep Research Agent — stops at nothing until finding precise answers to complex questions*\n\n"
|
src/middleware/state_machine.py
CHANGED
|
@@ -127,9 +127,3 @@ def get_workflow_state() -> WorkflowState:
|
|
| 127 |
logger.debug("Workflow state not found, auto-initializing")
|
| 128 |
return init_workflow_state()
|
| 129 |
return state
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
| 127 |
logger.debug("Workflow state not found, auto-initializing")
|
| 128 |
return init_workflow_state()
|
| 129 |
return state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/services/report_file_service.py
CHANGED
|
@@ -59,7 +59,11 @@ class ReportFileService:
|
|
| 59 |
enabled=self.enabled,
|
| 60 |
)
|
| 61 |
except Exception as e:
|
| 62 |
-
logger.error(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
raise ConfigurationError(f"Failed to create report output directory: {e}") from e
|
| 64 |
|
| 65 |
def _generate_filename(self, query: str | None = None, extension: str = ".md") -> str:
|
|
@@ -238,10 +242,14 @@ class ReportFileService:
|
|
| 238 |
file_path.unlink()
|
| 239 |
deleted_count += 1
|
| 240 |
except Exception as e:
|
| 241 |
-
logger.warning(
|
|
|
|
|
|
|
| 242 |
|
| 243 |
if deleted_count > 0:
|
| 244 |
-
logger.info(
|
|
|
|
|
|
|
| 245 |
|
| 246 |
except Exception as e:
|
| 247 |
logger.error("Failed to cleanup old files", error=str(e))
|
|
@@ -264,7 +272,3 @@ def get_report_file_service() -> ReportFileService:
|
|
| 264 |
return ReportFileService()
|
| 265 |
|
| 266 |
return _get_service()
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
| 59 |
enabled=self.enabled,
|
| 60 |
)
|
| 61 |
except Exception as e:
|
| 62 |
+
logger.error(
|
| 63 |
+
"Failed to create report output directory",
|
| 64 |
+
error=str(e),
|
| 65 |
+
path=str(self.output_directory),
|
| 66 |
+
)
|
| 67 |
raise ConfigurationError(f"Failed to create report output directory: {e}") from e
|
| 68 |
|
| 69 |
def _generate_filename(self, query: str | None = None, extension: str = ".md") -> str:
|
|
|
|
| 242 |
file_path.unlink()
|
| 243 |
deleted_count += 1
|
| 244 |
except Exception as e:
|
| 245 |
+
logger.warning(
|
| 246 |
+
"Failed to delete old file", path=str(file_path), error=str(e)
|
| 247 |
+
)
|
| 248 |
|
| 249 |
if deleted_count > 0:
|
| 250 |
+
logger.info(
|
| 251 |
+
"Cleaned up old report files", deleted=deleted_count, max_age_days=max_age_days
|
| 252 |
+
)
|
| 253 |
|
| 254 |
except Exception as e:
|
| 255 |
logger.error("Failed to cleanup old files", error=str(e))
|
|
|
|
| 272 |
return ReportFileService()
|
| 273 |
|
| 274 |
return _get_service()
|
|
|
|
|
|
|
|
|
|
|
|
src/tools/searchxng_web_search.py
CHANGED
|
@@ -1,11 +1,8 @@
|
|
| 1 |
"""SearchXNG web search tool using SearchXNG API for Google searches."""
|
| 2 |
|
| 3 |
-
from typing import Any
|
| 4 |
-
|
| 5 |
import structlog
|
| 6 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 7 |
|
| 8 |
-
from src.tools.base import SearchTool
|
| 9 |
from src.tools.query_utils import preprocess_query
|
| 10 |
from src.tools.rate_limiter import get_searchxng_limiter
|
| 11 |
from src.tools.vendored.searchxng_client import SearchXNGClient
|
|
@@ -116,8 +113,3 @@ class SearchXNGWebSearchTool:
|
|
| 116 |
except Exception as e:
|
| 117 |
logger.error("Unexpected error in SearchXNG search", error=str(e), query=final_query)
|
| 118 |
raise SearchError(f"SearchXNG search failed: {e}") from e
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
| 1 |
"""SearchXNG web search tool using SearchXNG API for Google searches."""
|
| 2 |
|
|
|
|
|
|
|
| 3 |
import structlog
|
| 4 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 5 |
|
|
|
|
| 6 |
from src.tools.query_utils import preprocess_query
|
| 7 |
from src.tools.rate_limiter import get_searchxng_limiter
|
| 8 |
from src.tools.vendored.searchxng_client import SearchXNGClient
|
|
|
|
| 113 |
except Exception as e:
|
| 114 |
logger.error("Unexpected error in SearchXNG search", error=str(e), query=final_query)
|
| 115 |
raise SearchError(f"SearchXNG search failed: {e}") from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/tools/serper_web_search.py
CHANGED
|
@@ -1,11 +1,8 @@
|
|
| 1 |
"""Serper web search tool using Serper API for Google searches."""
|
| 2 |
|
| 3 |
-
from typing import Any
|
| 4 |
-
|
| 5 |
import structlog
|
| 6 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 7 |
|
| 8 |
-
from src.tools.base import SearchTool
|
| 9 |
from src.tools.query_utils import preprocess_query
|
| 10 |
from src.tools.rate_limiter import get_serper_limiter
|
| 11 |
from src.tools.vendored.serper_client import SerperClient
|
|
@@ -116,8 +113,3 @@ class SerperWebSearchTool:
|
|
| 116 |
except Exception as e:
|
| 117 |
logger.error("Unexpected error in Serper search", error=str(e), query=final_query)
|
| 118 |
raise SearchError(f"Serper search failed: {e}") from e
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
| 1 |
"""Serper web search tool using Serper API for Google searches."""
|
| 2 |
|
|
|
|
|
|
|
| 3 |
import structlog
|
| 4 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 5 |
|
|
|
|
| 6 |
from src.tools.query_utils import preprocess_query
|
| 7 |
from src.tools.rate_limiter import get_serper_limiter
|
| 8 |
from src.tools.vendored.serper_client import SerperClient
|
|
|
|
| 113 |
except Exception as e:
|
| 114 |
logger.error("Unexpected error in Serper search", error=str(e), query=final_query)
|
| 115 |
raise SearchError(f"Serper search failed: {e}") from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/tools/vendored/crawl_website.py
CHANGED
|
@@ -125,4 +125,3 @@ async def crawl_website(starting_url: str) -> list[ScrapeResult] | str:
|
|
| 125 |
# Use scrape_urls to get the content for all discovered pages
|
| 126 |
result = await scrape_urls(pages_to_scrape_snippets)
|
| 127 |
return result
|
| 128 |
-
|
|
|
|
| 125 |
# Use scrape_urls to get the content for all discovered pages
|
| 126 |
result = await scrape_urls(pages_to_scrape_snippets)
|
| 127 |
return result
|
|
|
src/tools/vendored/searchxng_client.py
CHANGED
|
@@ -4,7 +4,6 @@ Vendored and adapted from folder/tools/web_search.py.
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
| 7 |
-
from typing import List, Optional
|
| 8 |
|
| 9 |
import aiohttp
|
| 10 |
import structlog
|
|
@@ -18,7 +17,7 @@ logger = structlog.get_logger()
|
|
| 18 |
class SearchXNGClient:
|
| 19 |
"""A client for the SearchXNG API to perform Google searches."""
|
| 20 |
|
| 21 |
-
def __init__(self, host:
|
| 22 |
"""Initialize SearchXNG client.
|
| 23 |
|
| 24 |
Args:
|
|
@@ -41,7 +40,7 @@ class SearchXNGClient:
|
|
| 41 |
|
| 42 |
async def search(
|
| 43 |
self, query: str, filter_for_relevance: bool = False, max_results: int = 5
|
| 44 |
-
) ->
|
| 45 |
"""Perform a search using SearchXNG API.
|
| 46 |
|
| 47 |
Args:
|
|
@@ -95,8 +94,3 @@ class SearchXNGClient:
|
|
| 95 |
except Exception as e:
|
| 96 |
logger.error("Unexpected error in SearchXNG search", error=str(e), query=query)
|
| 97 |
raise SearchError(f"SearchXNG search failed: {e}") from e
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
|
|
| 7 |
|
| 8 |
import aiohttp
|
| 9 |
import structlog
|
|
|
|
| 17 |
class SearchXNGClient:
|
| 18 |
"""A client for the SearchXNG API to perform Google searches."""
|
| 19 |
|
| 20 |
+
def __init__(self, host: str | None = None) -> None:
|
| 21 |
"""Initialize SearchXNG client.
|
| 22 |
|
| 23 |
Args:
|
|
|
|
| 40 |
|
| 41 |
async def search(
|
| 42 |
self, query: str, filter_for_relevance: bool = False, max_results: int = 5
|
| 43 |
+
) -> list[WebpageSnippet]:
|
| 44 |
"""Perform a search using SearchXNG API.
|
| 45 |
|
| 46 |
Args:
|
|
|
|
| 94 |
except Exception as e:
|
| 95 |
logger.error("Unexpected error in SearchXNG search", error=str(e), query=query)
|
| 96 |
raise SearchError(f"SearchXNG search failed: {e}") from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/tools/vendored/serper_client.py
CHANGED
|
@@ -4,7 +4,6 @@ Vendored and adapted from folder/tools/web_search.py.
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
| 7 |
-
from typing import List, Optional
|
| 8 |
|
| 9 |
import aiohttp
|
| 10 |
import structlog
|
|
@@ -18,7 +17,7 @@ logger = structlog.get_logger()
|
|
| 18 |
class SerperClient:
|
| 19 |
"""A client for the Serper API to perform Google searches."""
|
| 20 |
|
| 21 |
-
def __init__(self, api_key:
|
| 22 |
"""Initialize Serper client.
|
| 23 |
|
| 24 |
Args:
|
|
@@ -40,7 +39,7 @@ class SerperClient:
|
|
| 40 |
|
| 41 |
async def search(
|
| 42 |
self, query: str, filter_for_relevance: bool = False, max_results: int = 5
|
| 43 |
-
) ->
|
| 44 |
"""Perform a Google search using Serper API.
|
| 45 |
|
| 46 |
Args:
|
|
@@ -91,8 +90,3 @@ class SerperClient:
|
|
| 91 |
except Exception as e:
|
| 92 |
logger.error("Unexpected error in Serper search", error=str(e), query=query)
|
| 93 |
raise SearchError(f"Serper search failed: {e}") from e
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
|
|
| 7 |
|
| 8 |
import aiohttp
|
| 9 |
import structlog
|
|
|
|
| 17 |
class SerperClient:
|
| 18 |
"""A client for the Serper API to perform Google searches."""
|
| 19 |
|
| 20 |
+
def __init__(self, api_key: str | None = None) -> None:
|
| 21 |
"""Initialize Serper client.
|
| 22 |
|
| 23 |
Args:
|
|
|
|
| 39 |
|
| 40 |
async def search(
|
| 41 |
self, query: str, filter_for_relevance: bool = False, max_results: int = 5
|
| 42 |
+
) -> list[WebpageSnippet]:
|
| 43 |
"""Perform a Google search using Serper API.
|
| 44 |
|
| 45 |
Args:
|
|
|
|
| 90 |
except Exception as e:
|
| 91 |
logger.error("Unexpected error in Serper search", error=str(e), query=query)
|
| 92 |
raise SearchError(f"Serper search failed: {e}") from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/tools/vendored/web_search_core.py
CHANGED
|
@@ -6,7 +6,6 @@ and HTML text extraction used by web search tools.
|
|
| 6 |
|
| 7 |
import asyncio
|
| 8 |
import ssl
|
| 9 |
-
from typing import List, Optional
|
| 10 |
|
| 11 |
import aiohttp
|
| 12 |
import structlog
|
|
@@ -39,12 +38,10 @@ class WebpageSnippet(BaseModel):
|
|
| 39 |
|
| 40 |
url: str = Field(description="The URL of the webpage")
|
| 41 |
title: str = Field(description="The title of the webpage")
|
| 42 |
-
description:
|
| 43 |
-
default=None, description="A short description of the webpage"
|
| 44 |
-
)
|
| 45 |
|
| 46 |
|
| 47 |
-
async def scrape_urls(items:
|
| 48 |
"""Fetch text content from provided URLs.
|
| 49 |
|
| 50 |
Args:
|
|
@@ -65,7 +62,7 @@ async def scrape_urls(items: List[WebpageSnippet]) -> List[ScrapeResult]:
|
|
| 65 |
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 66 |
|
| 67 |
# Filter out errors and return successful results
|
| 68 |
-
successful_results:
|
| 69 |
for result in results:
|
| 70 |
if isinstance(result, ScrapeResult):
|
| 71 |
successful_results.append(result)
|
|
@@ -127,7 +124,7 @@ async def fetch_and_process_url(
|
|
| 127 |
url=item.url,
|
| 128 |
title=item.title,
|
| 129 |
description=item.description or "",
|
| 130 |
-
text=f"Error fetching content: {
|
| 131 |
)
|
| 132 |
|
| 133 |
|
|
@@ -202,8 +199,3 @@ def is_valid_url(url: str) -> bool:
|
|
| 202 |
if any(ext in url for ext in restricted_extensions):
|
| 203 |
return False
|
| 204 |
return True
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
|
|
|
| 6 |
|
| 7 |
import asyncio
|
| 8 |
import ssl
|
|
|
|
| 9 |
|
| 10 |
import aiohttp
|
| 11 |
import structlog
|
|
|
|
| 38 |
|
| 39 |
url: str = Field(description="The URL of the webpage")
|
| 40 |
title: str = Field(description="The title of the webpage")
|
| 41 |
+
description: str | None = Field(default=None, description="A short description of the webpage")
|
|
|
|
|
|
|
| 42 |
|
| 43 |
|
| 44 |
+
async def scrape_urls(items: list[WebpageSnippet]) -> list[ScrapeResult]:
|
| 45 |
"""Fetch text content from provided URLs.
|
| 46 |
|
| 47 |
Args:
|
|
|
|
| 62 |
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 63 |
|
| 64 |
# Filter out errors and return successful results
|
| 65 |
+
successful_results: list[ScrapeResult] = []
|
| 66 |
for result in results:
|
| 67 |
if isinstance(result, ScrapeResult):
|
| 68 |
successful_results.append(result)
|
|
|
|
| 124 |
url=item.url,
|
| 125 |
title=item.title,
|
| 126 |
description=item.description or "",
|
| 127 |
+
text=f"Error fetching content: {e!s}",
|
| 128 |
)
|
| 129 |
|
| 130 |
|
|
|
|
| 199 |
if any(ext in url for ext in restricted_extensions):
|
| 200 |
return False
|
| 201 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/tools/web_search_factory.py
CHANGED
|
@@ -57,21 +57,12 @@ def create_web_search_tool() -> SearchTool | None:
|
|
| 57 |
return None
|
| 58 |
|
| 59 |
else:
|
| 60 |
-
logger.warning(
|
| 61 |
-
f"Unknown web search provider '{provider}', falling back to DuckDuckGo"
|
| 62 |
-
)
|
| 63 |
return WebSearchTool()
|
| 64 |
|
| 65 |
except ConfigurationError as e:
|
| 66 |
logger.error("Failed to create web search tool", error=str(e), provider=provider)
|
| 67 |
return None
|
| 68 |
except Exception as e:
|
| 69 |
-
logger.error(
|
| 70 |
-
"Unexpected error creating web search tool", error=str(e), provider=provider
|
| 71 |
-
)
|
| 72 |
return None
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
| 57 |
return None
|
| 58 |
|
| 59 |
else:
|
| 60 |
+
logger.warning(f"Unknown web search provider '{provider}', falling back to DuckDuckGo")
|
|
|
|
|
|
|
| 61 |
return WebSearchTool()
|
| 62 |
|
| 63 |
except ConfigurationError as e:
|
| 64 |
logger.error("Failed to create web search tool", error=str(e), provider=provider)
|
| 65 |
return None
|
| 66 |
except Exception as e:
|
| 67 |
+
logger.error("Unexpected error creating web search tool", error=str(e), provider=provider)
|
|
|
|
|
|
|
| 68 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|