Working Ctrl F tool
Browse files- app.py +9 -4
- e2bqwen.py +19 -0
app.py
CHANGED
|
@@ -585,6 +585,7 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
|
|
| 585 |
examples=[
|
| 586 |
"Check the commuting time between Bern and Zurich on Google maps",
|
| 587 |
"Write 'Hello World' in a text editor",
|
|
|
|
| 588 |
"Search a flight Rome - Berlin for tomorrow",
|
| 589 |
"What' s the name of the pond just south of Château de Fontainebleau in Google maps?",
|
| 590 |
"Go generate a picture of the Golden Gate bridge on a FLUX1.dev space",
|
|
@@ -657,7 +658,6 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
|
|
| 657 |
label="Header"
|
| 658 |
)
|
| 659 |
|
| 660 |
-
stop_btn = gr.Button("Stop the agent!", variant="stop")
|
| 661 |
|
| 662 |
chatbot_display = gr.Chatbot(
|
| 663 |
elem_id="chatbot",
|
|
@@ -672,6 +672,8 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
|
|
| 672 |
|
| 673 |
agent_ui = EnrichedGradioUI(CodeAgent(tools=[], model=None, name="ok", description="ok"))
|
| 674 |
|
|
|
|
|
|
|
| 675 |
def read_log_content(log_file, tail=4):
|
| 676 |
"""Read the contents of a log file for a specific session"""
|
| 677 |
if not log_file:
|
|
@@ -696,7 +698,7 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
|
|
| 696 |
return update_html(True, request)
|
| 697 |
|
| 698 |
def reactivate_stop_btn():
|
| 699 |
-
return gr.Button("Stop the agent!", variant="
|
| 700 |
|
| 701 |
is_interactive = gr.Checkbox(value=True, visible=False)
|
| 702 |
|
|
@@ -718,8 +720,11 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
|
|
| 718 |
)
|
| 719 |
|
| 720 |
def interrupt_agent(session_state):
|
| 721 |
-
session_state["agent"].
|
| 722 |
-
|
|
|
|
|
|
|
|
|
|
| 723 |
|
| 724 |
stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[stop_btn])
|
| 725 |
|
|
|
|
| 585 |
examples=[
|
| 586 |
"Check the commuting time between Bern and Zurich on Google maps",
|
| 587 |
"Write 'Hello World' in a text editor",
|
| 588 |
+
"When was Temple Grandin introduced to the American Academy of Arts and Sciences, according to Wikipedia?",
|
| 589 |
"Search a flight Rome - Berlin for tomorrow",
|
| 590 |
"What' s the name of the pond just south of Château de Fontainebleau in Google maps?",
|
| 591 |
"Go generate a picture of the Golden Gate bridge on a FLUX1.dev space",
|
|
|
|
| 658 |
label="Header"
|
| 659 |
)
|
| 660 |
|
|
|
|
| 661 |
|
| 662 |
chatbot_display = gr.Chatbot(
|
| 663 |
elem_id="chatbot",
|
|
|
|
| 672 |
|
| 673 |
agent_ui = EnrichedGradioUI(CodeAgent(tools=[], model=None, name="ok", description="ok"))
|
| 674 |
|
| 675 |
+
stop_btn = gr.Button("Stop the agent!", variant="huggingface")
|
| 676 |
+
|
| 677 |
def read_log_content(log_file, tail=4):
|
| 678 |
"""Read the contents of a log file for a specific session"""
|
| 679 |
if not log_file:
|
|
|
|
| 698 |
return update_html(True, request)
|
| 699 |
|
| 700 |
def reactivate_stop_btn():
|
| 701 |
+
return gr.Button("Stop the agent!", variant="huggingface")
|
| 702 |
|
| 703 |
is_interactive = gr.Checkbox(value=True, visible=False)
|
| 704 |
|
|
|
|
| 720 |
)
|
| 721 |
|
| 722 |
def interrupt_agent(session_state):
|
| 723 |
+
if not session_state["agent"].interrupt_switch:
|
| 724 |
+
session_state["agent"].interrupt()
|
| 725 |
+
return gr.Button("Stopping agent... (could take time)", variant="secondary")
|
| 726 |
+
else:
|
| 727 |
+
return gr.Button("Stop the agent!", variant="huggingface")
|
| 728 |
|
| 729 |
stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[stop_btn])
|
| 730 |
|
e2bqwen.py
CHANGED
|
@@ -356,6 +356,24 @@ class E2BVisionAgent(CodeAgent):
|
|
| 356 |
self.logger.log(f"Opening URL: {url}")
|
| 357 |
return f"Opened URL: {url}"
|
| 358 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
|
| 360 |
# Register the tools
|
| 361 |
self.tools["click"] = click
|
|
@@ -369,6 +387,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 369 |
self.tools["open_url"] = open_url
|
| 370 |
self.tools["go_back"] = go_back
|
| 371 |
self.tools["drag_and_drop"] = drag_and_drop
|
|
|
|
| 372 |
|
| 373 |
|
| 374 |
def take_screenshot_callback(self, memory_step: ActionStep, agent=None) -> None:
|
|
|
|
| 356 |
self.logger.log(f"Opening URL: {url}")
|
| 357 |
return f"Opened URL: {url}"
|
| 358 |
|
| 359 |
+
@tool
|
| 360 |
+
def find_on_page_ctrl_f(search_string: str) -> str:
|
| 361 |
+
"""
|
| 362 |
+
Scroll the viewport to the first occurrence of the search string. This is equivalent to Ctrl+F.
|
| 363 |
+
Args:
|
| 364 |
+
search_string: The string to search for on the page.
|
| 365 |
+
"""
|
| 366 |
+
self.desktop.press(["ctrl", "f"])
|
| 367 |
+
time.sleep(0.3)
|
| 368 |
+
clean_text = normalize_text(search_string)
|
| 369 |
+
self.desktop.write(clean_text, delay_in_ms=75)
|
| 370 |
+
time.sleep(0.3)
|
| 371 |
+
self.desktop.press("enter")
|
| 372 |
+
time.sleep(0.3)
|
| 373 |
+
self.desktop.press("esc")
|
| 374 |
+
output_message = f"Scrolled to the first occurrence of '{clean_text}'"
|
| 375 |
+
self.logger.log(output_message)
|
| 376 |
+
return output_message
|
| 377 |
|
| 378 |
# Register the tools
|
| 379 |
self.tools["click"] = click
|
|
|
|
| 387 |
self.tools["open_url"] = open_url
|
| 388 |
self.tools["go_back"] = go_back
|
| 389 |
self.tools["drag_and_drop"] = drag_and_drop
|
| 390 |
+
self.tools["find_on_page_ctrl_f"] = find_on_page_ctrl_f
|
| 391 |
|
| 392 |
|
| 393 |
def take_screenshot_callback(self, memory_step: ActionStep, agent=None) -> None:
|