Switch to agent logging
Browse files- app.py +78 -52
- e2bqwen.py +14 -38
app.py
CHANGED
|
@@ -35,25 +35,17 @@ model = QwenVLAPIModel(
|
|
| 35 |
|
| 36 |
|
| 37 |
custom_css = """
|
| 38 |
-
:root {
|
| 39 |
-
--body-background-fill: black!important;
|
| 40 |
-
--background-fill-secondary: #fad391!important;
|
| 41 |
-
--body-text-color: #f59e0b!important;
|
| 42 |
-
--block-text-color: #f59e0b!important;
|
| 43 |
-
}
|
| 44 |
.sandbox-container {
|
| 45 |
position: relative;
|
| 46 |
width: 910px;
|
| 47 |
overflow: hidden;
|
| 48 |
margin: auto;
|
| 49 |
}
|
| 50 |
-
.
|
| 51 |
height: 800px;
|
| 52 |
}
|
| 53 |
-
.minimal {
|
| 54 |
-
height: 700px;
|
| 55 |
-
}
|
| 56 |
.sandbox-frame {
|
|
|
|
| 57 |
position: absolute;
|
| 58 |
top: 0;
|
| 59 |
left: 0;
|
|
@@ -61,9 +53,6 @@ custom_css = """
|
|
| 61 |
height: 800px;
|
| 62 |
pointer-events:none;
|
| 63 |
}
|
| 64 |
-
.minimal .sandbox-frame {
|
| 65 |
-
display: none;
|
| 66 |
-
}
|
| 67 |
|
| 68 |
.sandbox-iframe, .bsod-image {
|
| 69 |
position: absolute;
|
|
@@ -72,19 +61,6 @@ custom_css = """
|
|
| 72 |
border: 4px solid #444444;
|
| 73 |
transform-origin: 0 0;
|
| 74 |
}
|
| 75 |
-
.cyberpunk .sandbox-iframe, .bsod-image {
|
| 76 |
-
/* top: 73px; */
|
| 77 |
-
top: 99px;
|
| 78 |
-
/* left: 74px; */
|
| 79 |
-
left: 110px;
|
| 80 |
-
}
|
| 81 |
-
.cyberpunk .sandbox-iframe {
|
| 82 |
-
transform: scale(0.535);
|
| 83 |
-
/* transform: scale(0.59); */
|
| 84 |
-
}
|
| 85 |
-
.minimal .sandbox-iframe {
|
| 86 |
-
transform: scale(0.65);
|
| 87 |
-
}
|
| 88 |
|
| 89 |
/* Colored label for task textbox */
|
| 90 |
.primary-color-label label span {
|
|
@@ -100,11 +76,7 @@ custom_css = """
|
|
| 100 |
flex-align:center;
|
| 101 |
z-index: 100;
|
| 102 |
}
|
| 103 |
-
|
| 104 |
-
position: absolute;
|
| 105 |
-
bottom: 86px;
|
| 106 |
-
left: 355px;
|
| 107 |
-
}
|
| 108 |
.status-indicator {
|
| 109 |
width: 15px;
|
| 110 |
height: 15px;
|
|
@@ -117,9 +89,7 @@ custom_css = """
|
|
| 117 |
padding: 0 10px;
|
| 118 |
text-shadow: none;
|
| 119 |
}
|
| 120 |
-
|
| 121 |
-
color: #fed244;
|
| 122 |
-
}
|
| 123 |
.status-interactive {
|
| 124 |
background-color: #2ecc71;
|
| 125 |
animation: blink 2s infinite;
|
|
@@ -186,7 +156,7 @@ sandbox_html_template = """
|
|
| 186 |
<style>
|
| 187 |
@import url('https://fonts.googleapis.com/css2?family=Oxanium:wght@200..800&display=swap');
|
| 188 |
</style>
|
| 189 |
-
<div class="sandbox-container
|
| 190 |
<div class="status-bar">
|
| 191 |
<div class="status-indicator {status_class}"></div>
|
| 192 |
<div class="status-text">{status_text}</div>
|
|
@@ -202,8 +172,9 @@ sandbox_html_template = """
|
|
| 202 |
</div>
|
| 203 |
""".replace("<<WIDTH>>", str(WIDTH+15)).replace("<<HEIGHT>>", str(HEIGHT+10))
|
| 204 |
|
| 205 |
-
custom_js = """
|
| 206 |
-
|
|
|
|
| 207 |
// Function to check if sandbox is timing out
|
| 208 |
const checkSandboxTimeout = function() {
|
| 209 |
const timeElement = document.getElementById('sandbox-creation-time');
|
|
@@ -427,7 +398,7 @@ def get_or_create_sandbox(session_hash):
|
|
| 427 |
|
| 428 |
return desktop
|
| 429 |
|
| 430 |
-
def update_html(interactive_mode: bool,
|
| 431 |
session_hash = request.session_hash
|
| 432 |
desktop = get_or_create_sandbox(session_hash)
|
| 433 |
auth_key = desktop.stream.get_auth_key()
|
|
@@ -443,7 +414,6 @@ def update_html(interactive_mode: bool, theme_checkbox: bool, request: gr.Reques
|
|
| 443 |
creation_time = SANDBOX_METADATA[session_hash]['created_at'] if session_hash in SANDBOX_METADATA else time.time()
|
| 444 |
|
| 445 |
sandbox_html_content = sandbox_html_template.format(
|
| 446 |
-
theme="cyberpunk" if theme_checkbox else "minimal",
|
| 447 |
stream_url=stream_url,
|
| 448 |
status_class=status_class,
|
| 449 |
status_text=status_text,
|
|
@@ -483,7 +453,7 @@ def initialize_session(interactive_mode, request: gr.Request):
|
|
| 483 |
with open(log_path, 'w') as f:
|
| 484 |
f.write(f"Ready to go...\n")
|
| 485 |
# Return HTML and session hash
|
| 486 |
-
return update_html(interactive_mode,
|
| 487 |
|
| 488 |
|
| 489 |
# Function to read log content that gets the path from session hash
|
|
@@ -564,7 +534,7 @@ class EnrichedGradioUI(GradioUI):
|
|
| 564 |
finally:
|
| 565 |
upload_to_hf_and_remove(data_dir)
|
| 566 |
|
| 567 |
-
theme = gr.themes.Default(font=[
|
| 568 |
|
| 569 |
# Create a Gradio app with Blocks
|
| 570 |
with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as demo:
|
|
@@ -576,7 +546,6 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as de
|
|
| 576 |
with gr.Row():
|
| 577 |
sandbox_html = gr.HTML(
|
| 578 |
value=sandbox_html_template.format(
|
| 579 |
-
theme="cyberpunk",
|
| 580 |
stream_url="",
|
| 581 |
status_class="status-interactive",
|
| 582 |
status_text="Interactive"
|
|
@@ -613,7 +582,64 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as de
|
|
| 613 |
)
|
| 614 |
|
| 615 |
update_btn = gr.Button("Let's go!", variant="primary")
|
| 616 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
|
| 618 |
chatbot_display = gr.Chatbot(
|
| 619 |
label="Agent's execution logs",
|
|
@@ -650,35 +676,35 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as de
|
|
| 650 |
# Function to set view-only mode
|
| 651 |
def clear_and_set_view_only(task_input, request: gr.Request):
|
| 652 |
# First clear the results, then set view-only mode
|
| 653 |
-
return "", update_html(False,
|
| 654 |
|
| 655 |
def set_interactive(request: gr.Request):
|
| 656 |
-
return update_html(True,
|
| 657 |
|
| 658 |
is_interactive = gr.Checkbox(value=True, visible=False)
|
| 659 |
|
| 660 |
# Chain the events
|
| 661 |
view_only_event = update_btn.click(
|
| 662 |
fn=clear_and_set_view_only,
|
| 663 |
-
inputs=[task_input
|
| 664 |
outputs=[results_output, sandbox_html, results_container]
|
| 665 |
)
|
| 666 |
view_only_event.then(agent_ui.interact_with_agent, [task_input, stored_messages, session_state, session_hash_state], [chatbot_display]).then(
|
| 667 |
fn=set_interactive,
|
| 668 |
-
inputs=[
|
| 669 |
outputs=sandbox_html
|
| 670 |
)
|
| 671 |
|
| 672 |
-
theme_checkbox.change(
|
| 673 |
-
fn=update_html,
|
| 674 |
-
inputs=[is_interactive, theme_checkbox],
|
| 675 |
-
outputs=[sandbox_html]
|
| 676 |
-
)
|
| 677 |
|
| 678 |
demo.load(
|
| 679 |
fn=initialize_session,
|
| 680 |
inputs=[is_interactive],
|
| 681 |
outputs=[sandbox_html, session_hash_state],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 682 |
)
|
| 683 |
|
| 684 |
# Launch the app
|
|
|
|
| 35 |
|
| 36 |
|
| 37 |
custom_css = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
.sandbox-container {
|
| 39 |
position: relative;
|
| 40 |
width: 910px;
|
| 41 |
overflow: hidden;
|
| 42 |
margin: auto;
|
| 43 |
}
|
| 44 |
+
.sandbox-container {
|
| 45 |
height: 800px;
|
| 46 |
}
|
|
|
|
|
|
|
|
|
|
| 47 |
.sandbox-frame {
|
| 48 |
+
display: none;
|
| 49 |
position: absolute;
|
| 50 |
top: 0;
|
| 51 |
left: 0;
|
|
|
|
| 53 |
height: 800px;
|
| 54 |
pointer-events:none;
|
| 55 |
}
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
.sandbox-iframe, .bsod-image {
|
| 58 |
position: absolute;
|
|
|
|
| 61 |
border: 4px solid #444444;
|
| 62 |
transform-origin: 0 0;
|
| 63 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
/* Colored label for task textbox */
|
| 66 |
.primary-color-label label span {
|
|
|
|
| 76 |
flex-align:center;
|
| 77 |
z-index: 100;
|
| 78 |
}
|
| 79 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
.status-indicator {
|
| 81 |
width: 15px;
|
| 82 |
height: 15px;
|
|
|
|
| 89 |
padding: 0 10px;
|
| 90 |
text-shadow: none;
|
| 91 |
}
|
| 92 |
+
|
|
|
|
|
|
|
| 93 |
.status-interactive {
|
| 94 |
background-color: #2ecc71;
|
| 95 |
animation: blink 2s infinite;
|
|
|
|
| 156 |
<style>
|
| 157 |
@import url('https://fonts.googleapis.com/css2?family=Oxanium:wght@200..800&display=swap');
|
| 158 |
</style>
|
| 159 |
+
<div class="sandbox-container">
|
| 160 |
<div class="status-bar">
|
| 161 |
<div class="status-indicator {status_class}"></div>
|
| 162 |
<div class="status-text">{status_text}</div>
|
|
|
|
| 172 |
</div>
|
| 173 |
""".replace("<<WIDTH>>", str(WIDTH+15)).replace("<<HEIGHT>>", str(HEIGHT+10))
|
| 174 |
|
| 175 |
+
custom_js = """function() {
|
| 176 |
+
document.body.classList.toggle('dark');
|
| 177 |
+
|
| 178 |
// Function to check if sandbox is timing out
|
| 179 |
const checkSandboxTimeout = function() {
|
| 180 |
const timeElement = document.getElementById('sandbox-creation-time');
|
|
|
|
| 398 |
|
| 399 |
return desktop
|
| 400 |
|
| 401 |
+
def update_html(interactive_mode: bool, request: gr.Request):
|
| 402 |
session_hash = request.session_hash
|
| 403 |
desktop = get_or_create_sandbox(session_hash)
|
| 404 |
auth_key = desktop.stream.get_auth_key()
|
|
|
|
| 414 |
creation_time = SANDBOX_METADATA[session_hash]['created_at'] if session_hash in SANDBOX_METADATA else time.time()
|
| 415 |
|
| 416 |
sandbox_html_content = sandbox_html_template.format(
|
|
|
|
| 417 |
stream_url=stream_url,
|
| 418 |
status_class=status_class,
|
| 419 |
status_text=status_text,
|
|
|
|
| 453 |
with open(log_path, 'w') as f:
|
| 454 |
f.write(f"Ready to go...\n")
|
| 455 |
# Return HTML and session hash
|
| 456 |
+
return update_html(interactive_mode, request), session_hash
|
| 457 |
|
| 458 |
|
| 459 |
# Function to read log content that gets the path from session hash
|
|
|
|
| 534 |
finally:
|
| 535 |
upload_to_hf_and_remove(data_dir)
|
| 536 |
|
| 537 |
+
theme = gr.themes.Default(font=["sans-serif"], primary_hue="amber", secondary_hue="blue")
|
| 538 |
|
| 539 |
# Create a Gradio app with Blocks
|
| 540 |
with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as demo:
|
|
|
|
| 546 |
with gr.Row():
|
| 547 |
sandbox_html = gr.HTML(
|
| 548 |
value=sandbox_html_template.format(
|
|
|
|
| 549 |
stream_url="",
|
| 550 |
status_class="status-interactive",
|
| 551 |
status_text="Interactive"
|
|
|
|
| 582 |
)
|
| 583 |
|
| 584 |
update_btn = gr.Button("Let's go!", variant="primary")
|
| 585 |
+
|
| 586 |
+
cyberpunk_toggle = gr.Checkbox(label="Go Cyberpunk!", value=False)
|
| 587 |
+
|
| 588 |
+
def apply_theme(cyberpunk_mode: bool):
|
| 589 |
+
if cyberpunk_mode:
|
| 590 |
+
return """
|
| 591 |
+
<style>
|
| 592 |
+
:root {
|
| 593 |
+
--body-background-fill: black!important;
|
| 594 |
+
--background-fill-secondary: #fad391!important;
|
| 595 |
+
--body-text-color: #f59e0b!important;
|
| 596 |
+
--block-text-color: #f59e0b!important;
|
| 597 |
+
--font: Oxanium;
|
| 598 |
+
}
|
| 599 |
+
.sandbox-frame {
|
| 600 |
+
display: block!important;
|
| 601 |
+
}
|
| 602 |
+
|
| 603 |
+
.sandbox-iframe, .bsod-image {
|
| 604 |
+
/* top: 73px; */
|
| 605 |
+
top: 99px;
|
| 606 |
+
/* left: 74px; */
|
| 607 |
+
left: 110px;
|
| 608 |
+
}
|
| 609 |
+
.sandbox-iframe {
|
| 610 |
+
transform: scale(0.535);
|
| 611 |
+
/* transform: scale(0.59); */
|
| 612 |
+
}
|
| 613 |
+
|
| 614 |
+
.status-bar {
|
| 615 |
+
position: absolute;
|
| 616 |
+
bottom: 86px;
|
| 617 |
+
left: 355px;
|
| 618 |
+
}
|
| 619 |
+
.status-text {
|
| 620 |
+
color: #fed244;
|
| 621 |
+
}
|
| 622 |
+
</style>
|
| 623 |
+
"""
|
| 624 |
+
else:
|
| 625 |
+
return """
|
| 626 |
+
<style>
|
| 627 |
+
.sandbox-container {
|
| 628 |
+
height: 700px!important;
|
| 629 |
+
}
|
| 630 |
+
.sandbox-iframe {
|
| 631 |
+
transform: scale(0.65);
|
| 632 |
+
}
|
| 633 |
+
</style>
|
| 634 |
+
"""
|
| 635 |
+
|
| 636 |
+
# Hidden HTML element to inject CSS dynamically
|
| 637 |
+
theme_styles = gr.HTML(apply_theme(False), visible=False)
|
| 638 |
+
cyberpunk_toggle.change(
|
| 639 |
+
fn=apply_theme,
|
| 640 |
+
inputs=[cyberpunk_toggle],
|
| 641 |
+
outputs=[theme_styles]
|
| 642 |
+
)
|
| 643 |
|
| 644 |
chatbot_display = gr.Chatbot(
|
| 645 |
label="Agent's execution logs",
|
|
|
|
| 676 |
# Function to set view-only mode
|
| 677 |
def clear_and_set_view_only(task_input, request: gr.Request):
|
| 678 |
# First clear the results, then set view-only mode
|
| 679 |
+
return "", update_html(False, request), gr.update(visible=False)
|
| 680 |
|
| 681 |
def set_interactive(request: gr.Request):
|
| 682 |
+
return update_html(True, request)
|
| 683 |
|
| 684 |
is_interactive = gr.Checkbox(value=True, visible=False)
|
| 685 |
|
| 686 |
# Chain the events
|
| 687 |
view_only_event = update_btn.click(
|
| 688 |
fn=clear_and_set_view_only,
|
| 689 |
+
inputs=[task_input],
|
| 690 |
outputs=[results_output, sandbox_html, results_container]
|
| 691 |
)
|
| 692 |
view_only_event.then(agent_ui.interact_with_agent, [task_input, stored_messages, session_state, session_hash_state], [chatbot_display]).then(
|
| 693 |
fn=set_interactive,
|
| 694 |
+
inputs=[],
|
| 695 |
outputs=sandbox_html
|
| 696 |
)
|
| 697 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
|
| 699 |
demo.load(
|
| 700 |
fn=initialize_session,
|
| 701 |
inputs=[is_interactive],
|
| 702 |
outputs=[sandbox_html, session_hash_state],
|
| 703 |
+
js="""
|
| 704 |
+
() => {
|
| 705 |
+
document.body.classList.toggle('dark');
|
| 706 |
+
}
|
| 707 |
+
""",
|
| 708 |
)
|
| 709 |
|
| 710 |
# Launch the app
|
e2bqwen.py
CHANGED
|
@@ -18,26 +18,6 @@ from smolagents import CodeAgent, tool, HfApiModel
|
|
| 18 |
from smolagents.memory import ActionStep
|
| 19 |
from smolagents.models import ChatMessage, MessageRole, Model
|
| 20 |
from smolagents.monitoring import LogLevel
|
| 21 |
-
|
| 22 |
-
def write_to_console_log(log_file_path, message):
|
| 23 |
-
"""
|
| 24 |
-
Appends a message to the specified log file with a newline character.
|
| 25 |
-
|
| 26 |
-
Parameters:
|
| 27 |
-
log_file_path (str): Path to the log file
|
| 28 |
-
message (str): Message to append to the log file
|
| 29 |
-
"""
|
| 30 |
-
if log_file_path is None:
|
| 31 |
-
return False
|
| 32 |
-
try:
|
| 33 |
-
# Open the file in append mode
|
| 34 |
-
with open(log_file_path, 'a') as log_file:
|
| 35 |
-
# Write the message followed by a newline
|
| 36 |
-
log_file.write(f"{message}\n")
|
| 37 |
-
return True
|
| 38 |
-
except Exception as e:
|
| 39 |
-
print(f"Error writing to log file: {str(e)}")
|
| 40 |
-
return False
|
| 41 |
|
| 42 |
E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
|
| 43 |
On top of performing computations in the Python code snippets that you create, you only have access to these tools to interact with the desktop, no additional ones:
|
|
@@ -125,13 +105,10 @@ class E2BVisionAgent(CodeAgent):
|
|
| 125 |
self.desktop = desktop
|
| 126 |
self.data_dir = data_dir
|
| 127 |
self.log_path = log_file
|
| 128 |
-
write_to_console_log(self.log_path, "Booting agent...")
|
| 129 |
self.planning_interval = planning_interval
|
| 130 |
# Initialize Desktop
|
| 131 |
self.width, self.height = self.desktop.get_screen_size()
|
| 132 |
print(f"Screen size: {self.width}x{self.height}")
|
| 133 |
-
write_to_console_log(self.log_path, f"Desktop resolution detected: {self.width}x{self.height}")
|
| 134 |
-
|
| 135 |
|
| 136 |
# Set up temp directory
|
| 137 |
os.makedirs(self.data_dir, exist_ok=True)
|
|
@@ -157,9 +134,9 @@ class E2BVisionAgent(CodeAgent):
|
|
| 157 |
|
| 158 |
# Add default tools
|
| 159 |
self._setup_desktop_tools()
|
| 160 |
-
|
| 161 |
self.step_callbacks.append(self.take_snapshot_callback)
|
| 162 |
-
|
| 163 |
|
| 164 |
def _setup_desktop_tools(self):
|
| 165 |
"""Register all desktop tools"""
|
|
@@ -173,7 +150,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 173 |
"""
|
| 174 |
self.desktop.move_mouse(x, y)
|
| 175 |
self.desktop.left_click()
|
| 176 |
-
|
| 177 |
return f"Clicked at coordinates ({x}, {y})"
|
| 178 |
|
| 179 |
@tool
|
|
@@ -186,7 +163,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 186 |
"""
|
| 187 |
self.desktop.move_mouse(x, y)
|
| 188 |
self.desktop.right_click()
|
| 189 |
-
|
| 190 |
return f"Right-clicked at coordinates ({x}, {y})"
|
| 191 |
|
| 192 |
@tool
|
|
@@ -199,7 +176,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 199 |
"""
|
| 200 |
self.desktop.move_mouse(x, y)
|
| 201 |
self.desktop.double_click()
|
| 202 |
-
|
| 203 |
return f"Double-clicked at coordinates ({x}, {y})"
|
| 204 |
|
| 205 |
@tool
|
|
@@ -211,7 +188,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 211 |
y: The y coordinate (vertical position)
|
| 212 |
"""
|
| 213 |
self.desktop.move_mouse(x, y)
|
| 214 |
-
|
| 215 |
return f"Moved mouse to coordinates ({x}, {y})"
|
| 216 |
|
| 217 |
@tool
|
|
@@ -223,7 +200,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 223 |
delay_in_ms: Delay between keystrokes in milliseconds
|
| 224 |
"""
|
| 225 |
self.desktop.write(text, delay_in_ms=delay_in_ms)
|
| 226 |
-
|
| 227 |
return f"Typed text: '{text}'"
|
| 228 |
|
| 229 |
@tool
|
|
@@ -236,7 +213,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 236 |
if key == "enter":
|
| 237 |
key = "Return"
|
| 238 |
self.desktop.press(key)
|
| 239 |
-
|
| 240 |
return f"Pressed key: {key}"
|
| 241 |
|
| 242 |
@tool
|
|
@@ -246,7 +223,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 246 |
Args:
|
| 247 |
"""
|
| 248 |
self.desktop.press(["alt", "left"])
|
| 249 |
-
|
| 250 |
return "Went back one page"
|
| 251 |
|
| 252 |
@tool
|
|
@@ -261,7 +238,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 261 |
"""
|
| 262 |
self.desktop.drag([x1, y1], [x2, y2])
|
| 263 |
message = f"Dragged and dropped from [{x1}, {y1}] to [{x2}, {y2}]"
|
| 264 |
-
|
| 265 |
return message
|
| 266 |
|
| 267 |
@tool
|
|
@@ -273,7 +250,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 273 |
amount: The amount to scroll. A good amount is 1 or 2.
|
| 274 |
"""
|
| 275 |
self.desktop.scroll(direction=direction, amount=amount)
|
| 276 |
-
|
| 277 |
return f"Scrolled {direction} by {amount}"
|
| 278 |
|
| 279 |
@tool
|
|
@@ -284,7 +261,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 284 |
seconds: Number of seconds to wait
|
| 285 |
"""
|
| 286 |
time.sleep(seconds)
|
| 287 |
-
|
| 288 |
return f"Waited for {seconds} seconds"
|
| 289 |
|
| 290 |
@tool
|
|
@@ -301,7 +278,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 301 |
self.desktop.open(url)
|
| 302 |
# Give it time to load
|
| 303 |
time.sleep(2)
|
| 304 |
-
|
| 305 |
return f"Opened URL: {url}"
|
| 306 |
|
| 307 |
|
|
@@ -330,8 +307,7 @@ class E2BVisionAgent(CodeAgent):
|
|
| 330 |
|
| 331 |
def take_snapshot_callback(self, memory_step: ActionStep, agent=None) -> None:
|
| 332 |
"""Callback that takes a screenshot + memory snapshot after a step completes"""
|
| 333 |
-
|
| 334 |
-
write_to_console_log(self.log_path, "Analyzing screen content...")
|
| 335 |
|
| 336 |
current_step = memory_step.step_number
|
| 337 |
print(f"Taking screenshot for step {current_step}")
|
|
|
|
| 18 |
from smolagents.memory import ActionStep
|
| 19 |
from smolagents.models import ChatMessage, MessageRole, Model
|
| 20 |
from smolagents.monitoring import LogLevel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
|
| 23 |
On top of performing computations in the Python code snippets that you create, you only have access to these tools to interact with the desktop, no additional ones:
|
|
|
|
| 105 |
self.desktop = desktop
|
| 106 |
self.data_dir = data_dir
|
| 107 |
self.log_path = log_file
|
|
|
|
| 108 |
self.planning_interval = planning_interval
|
| 109 |
# Initialize Desktop
|
| 110 |
self.width, self.height = self.desktop.get_screen_size()
|
| 111 |
print(f"Screen size: {self.width}x{self.height}")
|
|
|
|
|
|
|
| 112 |
|
| 113 |
# Set up temp directory
|
| 114 |
os.makedirs(self.data_dir, exist_ok=True)
|
|
|
|
| 134 |
|
| 135 |
# Add default tools
|
| 136 |
self._setup_desktop_tools()
|
| 137 |
+
self.logger.log("Setting up agent tools...")
|
| 138 |
self.step_callbacks.append(self.take_snapshot_callback)
|
| 139 |
+
self.logger.log("Studying an action plan... that will take a bit.")
|
| 140 |
|
| 141 |
def _setup_desktop_tools(self):
|
| 142 |
"""Register all desktop tools"""
|
|
|
|
| 150 |
"""
|
| 151 |
self.desktop.move_mouse(x, y)
|
| 152 |
self.desktop.left_click()
|
| 153 |
+
self.logger.log(self.log_path, f"Clicked at coordinates ({x}, {y})")
|
| 154 |
return f"Clicked at coordinates ({x}, {y})"
|
| 155 |
|
| 156 |
@tool
|
|
|
|
| 163 |
"""
|
| 164 |
self.desktop.move_mouse(x, y)
|
| 165 |
self.desktop.right_click()
|
| 166 |
+
self.logger.log(self.log_path, f"Right-clicked at coordinates ({x}, {y})")
|
| 167 |
return f"Right-clicked at coordinates ({x}, {y})"
|
| 168 |
|
| 169 |
@tool
|
|
|
|
| 176 |
"""
|
| 177 |
self.desktop.move_mouse(x, y)
|
| 178 |
self.desktop.double_click()
|
| 179 |
+
self.logger.log(self.log_path, f"Double-clicked at coordinates ({x}, {y})")
|
| 180 |
return f"Double-clicked at coordinates ({x}, {y})"
|
| 181 |
|
| 182 |
@tool
|
|
|
|
| 188 |
y: The y coordinate (vertical position)
|
| 189 |
"""
|
| 190 |
self.desktop.move_mouse(x, y)
|
| 191 |
+
self.logger.log(self.log_path, f"Moved mouse to coordinates ({x}, {y})")
|
| 192 |
return f"Moved mouse to coordinates ({x}, {y})"
|
| 193 |
|
| 194 |
@tool
|
|
|
|
| 200 |
delay_in_ms: Delay between keystrokes in milliseconds
|
| 201 |
"""
|
| 202 |
self.desktop.write(text, delay_in_ms=delay_in_ms)
|
| 203 |
+
self.logger.log(self.log_path, f"Typed text: '{text}'")
|
| 204 |
return f"Typed text: '{text}'"
|
| 205 |
|
| 206 |
@tool
|
|
|
|
| 213 |
if key == "enter":
|
| 214 |
key = "Return"
|
| 215 |
self.desktop.press(key)
|
| 216 |
+
self.logger.log(self.log_path, f"Pressed key: {key}")
|
| 217 |
return f"Pressed key: {key}"
|
| 218 |
|
| 219 |
@tool
|
|
|
|
| 223 |
Args:
|
| 224 |
"""
|
| 225 |
self.desktop.press(["alt", "left"])
|
| 226 |
+
self.logger.log(self.log_path, "Went back one page")
|
| 227 |
return "Went back one page"
|
| 228 |
|
| 229 |
@tool
|
|
|
|
| 238 |
"""
|
| 239 |
self.desktop.drag([x1, y1], [x2, y2])
|
| 240 |
message = f"Dragged and dropped from [{x1}, {y1}] to [{x2}, {y2}]"
|
| 241 |
+
self.logger.log(self.log_path, message)
|
| 242 |
return message
|
| 243 |
|
| 244 |
@tool
|
|
|
|
| 250 |
amount: The amount to scroll. A good amount is 1 or 2.
|
| 251 |
"""
|
| 252 |
self.desktop.scroll(direction=direction, amount=amount)
|
| 253 |
+
self.logger.log(self.log_path, f"Scrolled {direction} by {amount}")
|
| 254 |
return f"Scrolled {direction} by {amount}"
|
| 255 |
|
| 256 |
@tool
|
|
|
|
| 261 |
seconds: Number of seconds to wait
|
| 262 |
"""
|
| 263 |
time.sleep(seconds)
|
| 264 |
+
self.logger.log(self.log_path, f"Waited for {seconds} seconds")
|
| 265 |
return f"Waited for {seconds} seconds"
|
| 266 |
|
| 267 |
@tool
|
|
|
|
| 278 |
self.desktop.open(url)
|
| 279 |
# Give it time to load
|
| 280 |
time.sleep(2)
|
| 281 |
+
self.logger.log(self.log_path, f"Opening URL: {url}")
|
| 282 |
return f"Opened URL: {url}"
|
| 283 |
|
| 284 |
|
|
|
|
| 307 |
|
| 308 |
def take_snapshot_callback(self, memory_step: ActionStep, agent=None) -> None:
|
| 309 |
"""Callback that takes a screenshot + memory snapshot after a step completes"""
|
| 310 |
+
self.logger.log(self.log_path, "Analyzing screen content...")
|
|
|
|
| 311 |
|
| 312 |
current_step = memory_step.step_number
|
| 313 |
print(f"Taking screenshot for step {current_step}")
|