Commit
·
26aec96
1
Parent(s):
4754c75
get_webpage_content handles PDF + LLM decide if query_image should use reasoning model
Browse files- langgraph_dir/agent.py +3 -2
- langgraph_dir/custom_tools.py +27 -13
langgraph_dir/agent.py
CHANGED
|
@@ -28,7 +28,7 @@ class LangGraphAgent:
|
|
| 28 |
community_tools = [
|
| 29 |
BraveSearch.from_api_key( # Web search (more performant than DuckDuckGo)
|
| 30 |
api_key=os.getenv("BRAVE_SEARCH_API_KEY"), # needs BRAVE_SEARCH_API_KEY in env
|
| 31 |
-
search_kwargs={"count":
|
| 32 |
]
|
| 33 |
custom_tools = [
|
| 34 |
multiply, add, subtract, divide, modulus, power, # Basic arithmetic
|
|
@@ -124,7 +124,8 @@ class LangGraphAgent:
|
|
| 124 |
|
| 125 |
# Invoke
|
| 126 |
messages = [HumanMessage(content=question)]
|
| 127 |
-
messages = self.agent.invoke({"messages": messages}
|
|
|
|
| 128 |
for m in messages["messages"]:
|
| 129 |
m.pretty_print()
|
| 130 |
|
|
|
|
| 28 |
community_tools = [
|
| 29 |
BraveSearch.from_api_key( # Web search (more performant than DuckDuckGo)
|
| 30 |
api_key=os.getenv("BRAVE_SEARCH_API_KEY"), # needs BRAVE_SEARCH_API_KEY in env
|
| 31 |
+
search_kwargs={"count": 5}),
|
| 32 |
]
|
| 33 |
custom_tools = [
|
| 34 |
multiply, add, subtract, divide, modulus, power, # Basic arithmetic
|
|
|
|
| 124 |
|
| 125 |
# Invoke
|
| 126 |
messages = [HumanMessage(content=question)]
|
| 127 |
+
messages = self.agent.invoke({"messages": messages},
|
| 128 |
+
{"recursion_limit": 30}) # maximum number of steps before hitting a stop condition
|
| 129 |
for m in messages["messages"]:
|
| 130 |
m.pretty_print()
|
| 131 |
|
langgraph_dir/custom_tools.py
CHANGED
|
@@ -6,6 +6,8 @@ from bs4 import BeautifulSoup
|
|
| 6 |
from markdownify import markdownify as md
|
| 7 |
from langchain_core.tools import tool, Tool
|
| 8 |
from langchain_experimental.utilities import PythonREPL
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
# --- Basic operations --- #
|
|
@@ -81,12 +83,13 @@ def power(a: float, b: float) -> float:
|
|
| 81 |
# --- Functions --- #
|
| 82 |
|
| 83 |
@tool
|
| 84 |
-
def query_image(query: str, image_url: str) -> str:
|
| 85 |
"""Ask anything about an image using a Vision Language Model
|
| 86 |
|
| 87 |
Args:
|
| 88 |
-
query (str):
|
| 89 |
-
image_url (str):
|
|
|
|
| 90 |
"""
|
| 91 |
|
| 92 |
# PROVIDER = 'huggingface'
|
|
@@ -120,11 +123,13 @@ def query_image(query: str, image_url: str) -> str:
|
|
| 120 |
return completion.choices[0].message
|
| 121 |
|
| 122 |
elif PROVIDER == 'openai':
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
| 125 |
client = OpenAI()
|
| 126 |
response = client.responses.create(
|
| 127 |
-
model=
|
| 128 |
input=[{
|
| 129 |
"role": "user",
|
| 130 |
"content": [
|
|
@@ -196,14 +201,23 @@ def get_webpage_content(page_url: str) -> str:
|
|
| 196 |
"""
|
| 197 |
try:
|
| 198 |
r = requests.get(page_url)
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
else:
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
except Exception as e:
|
| 208 |
return f"get_webpage_content failed: {e}"
|
| 209 |
|
|
|
|
| 6 |
from markdownify import markdownify as md
|
| 7 |
from langchain_core.tools import tool, Tool
|
| 8 |
from langchain_experimental.utilities import PythonREPL
|
| 9 |
+
from pypdf import PdfReader
|
| 10 |
+
from io import BytesIO
|
| 11 |
|
| 12 |
|
| 13 |
# --- Basic operations --- #
|
|
|
|
| 83 |
# --- Functions --- #
|
| 84 |
|
| 85 |
@tool
|
| 86 |
+
def query_image(query: str, image_url: str, need_reasoning: bool = False) -> str:
|
| 87 |
"""Ask anything about an image using a Vision Language Model
|
| 88 |
|
| 89 |
Args:
|
| 90 |
+
query (str): The query about the image, e.g. how many persons are on the image?
|
| 91 |
+
image_url (str): The URL to the image
|
| 92 |
+
need_reasoning (bool): Set to True for complex query that require a reasoning model to answer properly. Set to False otherwise.
|
| 93 |
"""
|
| 94 |
|
| 95 |
# PROVIDER = 'huggingface'
|
|
|
|
| 123 |
return completion.choices[0].message
|
| 124 |
|
| 125 |
elif PROVIDER == 'openai':
|
| 126 |
+
if need_reasoning:
|
| 127 |
+
model_name = "o4-mini"
|
| 128 |
+
else:
|
| 129 |
+
model_name = "gpt-4.1-mini"
|
| 130 |
client = OpenAI()
|
| 131 |
response = client.responses.create(
|
| 132 |
+
model=model_name,
|
| 133 |
input=[{
|
| 134 |
"role": "user",
|
| 135 |
"content": [
|
|
|
|
| 201 |
"""
|
| 202 |
try:
|
| 203 |
r = requests.get(page_url)
|
| 204 |
+
r.raise_for_status()
|
| 205 |
+
text = ""
|
| 206 |
+
# special case if page is a PDF file
|
| 207 |
+
if r.headers.get('Content-Type', '') == 'application/pdf':
|
| 208 |
+
pdf_file = BytesIO(r.content)
|
| 209 |
+
reader = PdfReader(pdf_file)
|
| 210 |
+
for page in reader.pages:
|
| 211 |
+
text += page.extract_text()
|
| 212 |
else:
|
| 213 |
+
soup = BeautifulSoup((r.text), 'html.parser')
|
| 214 |
+
if soup.body:
|
| 215 |
+
# convert to markdown
|
| 216 |
+
text = md(str(soup.body))
|
| 217 |
+
else:
|
| 218 |
+
# return the raw content
|
| 219 |
+
text = r.text
|
| 220 |
+
return text
|
| 221 |
except Exception as e:
|
| 222 |
return f"get_webpage_content failed: {e}"
|
| 223 |
|