Spaces:

AUXteam
/

opencode-api

Sleeping

App Files Files Community

opencode-api / src /opencode_api /tool /webfetch.py

AUXteam

Upload folder using huggingface_hub

1397957 verified 4 days ago

raw

history blame

4.21 kB

	from typing import Dict, Any
	import httpx
	from .tool import BaseTool, ToolContext, ToolResult


	class WebFetchTool(BaseTool):

	@property
	def id(self) -> str:
	return "webfetch"

	@property
	def description(self) -> str:
	return (
	"Fetch content from a URL and convert it to readable text or markdown. "
	"Use this when you need to read the content of a specific web page."
	)

	@property
	def parameters(self) -> Dict[str, Any]:
	return {
	"type": "object",
	"properties": {
	"url": {
	"type": "string",
	"description": "The URL to fetch"
	},
	"format": {
	"type": "string",
	"enum": ["text", "markdown", "html"],
	"description": "Output format (default: markdown)",
	"default": "markdown"
	}
	},
	"required": ["url"]
	}

	async def execute(self, args: Dict[str, Any], ctx: ToolContext) -> ToolResult:
	url = args["url"]
	output_format = args.get("format", "markdown")

	if not url.startswith(("http://", "https://")):
	url = "https://" + url

	try:
	async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client:
	response = await client.get(
	url,
	headers={
	"User-Agent": "Mozilla/5.0 (compatible; OpenCode-API/1.0)"
	}
	)
	response.raise_for_status()
	html_content = response.text

	if output_format == "html":
	content = html_content[:50000] # Limit size
	elif output_format == "text":
	content = self._html_to_text(html_content)
	else: # markdown
	content = self._html_to_markdown(html_content)

	if len(content) > 50000:
	content = content[:50000] + "\n\n[Content truncated...]"

	return ToolResult(
	title=f"Fetched: {url}",
	output=content,
	metadata={"url": url, "format": output_format, "length": len(content)}
	)

	except httpx.HTTPStatusError as e:
	return ToolResult(
	title=f"Fetch failed: {url}",
	output=f"HTTP Error {e.response.status_code}: {e.response.reason_phrase}",
	metadata={"error": "http_error", "status_code": e.response.status_code}
	)
	except httpx.RequestError as e:
	return ToolResult(
	title=f"Fetch failed: {url}",
	output=f"Request error: {str(e)}",
	metadata={"error": "request_error"}
	)
	except Exception as e:
	return ToolResult(
	title=f"Fetch failed: {url}",
	output=f"Error: {str(e)}",
	metadata={"error": str(e)}
	)

	def _html_to_text(self, html: str) -> str:
	try:
	from bs4 import BeautifulSoup
	soup = BeautifulSoup(html, "html.parser")

	for tag in soup(["script", "style", "nav", "footer", "header"]):
	tag.decompose()

	return soup.get_text(separator="\n", strip=True)
	except ImportError:
	import re
	text = re.sub(r"<script[^>]>.?</script>", "", html, flags=re.DOTALL \| re.IGNORECASE)
	text = re.sub(r"<style[^>]>.?</style>", "", text, flags=re.DOTALL \| re.IGNORECASE)
	text = re.sub(r"<[^>]+>", " ", text)
	text = re.sub(r"\s+", " ", text)
	return text.strip()

	def _html_to_markdown(self, html: str) -> str:
	try:
	import html2text
	h = html2text.HTML2Text()
	h.ignore_links = False
	h.ignore_images = True
	h.body_width = 0
	return h.handle(html)
	except ImportError:
	return self._html_to_text(html)