A-Mahla commited on
Commit
1e72ba8
·
1 Parent(s): 51747e6

NEW agent (#10)

Browse files
cua2-core/src/cua2_core/models/models.py CHANGED
@@ -77,7 +77,11 @@ class AgentAction(FunctionCall):
77
  seconds = args.get("seconds") or args.get("arg_0")
78
  return f"Wait for {seconds} seconds"
79
 
80
- elif action_type == "open":
 
 
 
 
81
  url = args.get("url") or args.get("arg_0")
82
  return f"Open: {url}"
83
 
@@ -100,7 +104,7 @@ class AgentStep(BaseModel):
100
  step_evaluation: Literal["like", "dislike", "neutral"]
101
  error: Optional[str] = None
102
  thought: Optional[str] = None
103
- actions: Optional[list[AgentAction]] = None
104
 
105
  @field_serializer("actions")
106
  def serialize_actions(self, actions: list[AgentAction], _info):
 
77
  seconds = args.get("seconds") or args.get("arg_0")
78
  return f"Wait for {seconds} seconds"
79
 
80
+ elif action_type == "open_url":
81
+ url = args.get("url") or args.get("arg_0")
82
+ return f"Open: {url}"
83
+
84
+ elif action_type == "launch":
85
  url = args.get("url") or args.get("arg_0")
86
  return f"Open: {url}"
87
 
 
104
  step_evaluation: Literal["like", "dislike", "neutral"]
105
  error: Optional[str] = None
106
  thought: Optional[str] = None
107
+ actions: list[AgentAction] = []
108
 
109
  @field_serializer("actions")
110
  def serialize_actions(self, actions: list[AgentAction], _info):
cua2-core/src/cua2_core/services/agent_utils/desktop_agent.py CHANGED
@@ -24,11 +24,13 @@ class E2BVisionAgent(CodeAgent):
24
  verbosity_level: LogLevel = 2,
25
  planning_interval: int | None = None,
26
  use_v1_prompt: bool = False,
 
27
  **kwargs,
28
  ):
29
  self.desktop = desktop
30
  self.data_dir = data_dir
31
  self.planning_interval = planning_interval
 
32
  # Initialize Desktop
33
  self.width, self.height = self.desktop.get_screen_size()
34
  print(f"Screen size: {self.width}x{self.height}")
@@ -60,6 +62,27 @@ class E2BVisionAgent(CodeAgent):
60
  self.logger.log("Setting up agent tools...")
61
  self._setup_desktop_tools()
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def _setup_desktop_tools(self):
64
  """Register all desktop tools"""
65
 
@@ -71,6 +94,9 @@ class E2BVisionAgent(CodeAgent):
71
  x: The x coordinate (horizontal position)
72
  y: The y coordinate (vertical position)
73
  """
 
 
 
74
  self.desktop.move_mouse(x, y)
75
  self.desktop.left_click()
76
  self.click_coordinates = [x, y]
@@ -85,6 +111,9 @@ class E2BVisionAgent(CodeAgent):
85
  x: The x coordinate (horizontal position)
86
  y: The y coordinate (vertical position)
87
  """
 
 
 
88
  self.desktop.move_mouse(x, y)
89
  self.desktop.right_click()
90
  self.click_coordinates = [x, y]
@@ -99,6 +128,9 @@ class E2BVisionAgent(CodeAgent):
99
  x: The x coordinate (horizontal position)
100
  y: The y coordinate (vertical position)
101
  """
 
 
 
102
  self.desktop.move_mouse(x, y)
103
  self.desktop.double_click()
104
  self.click_coordinates = [x, y]
@@ -113,6 +145,9 @@ class E2BVisionAgent(CodeAgent):
113
  x: The x coordinate (horizontal position)
114
  y: The y coordinate (vertical position)
115
  """
 
 
 
116
  self.desktop.move_mouse(x, y)
117
  self.logger.log(f"Moved mouse to coordinates ({x}, {y})")
118
  return f"Moved mouse to coordinates ({x}, {y})"
@@ -167,6 +202,11 @@ class E2BVisionAgent(CodeAgent):
167
  x2: end x coordinate
168
  y2: end y coordinate
169
  """
 
 
 
 
 
170
  self.desktop.drag([x1, y1], [x2, y2])
171
  message = f"Dragged and dropped from [{x1}, {y1}] to [{x2}, {y2}]"
172
  self.logger.log(message)
@@ -182,6 +222,9 @@ class E2BVisionAgent(CodeAgent):
182
  direction: The direction to scroll ("up" or "down"), defaults to "down". For zoom, "up" zooms in, "down" zooms out.
183
  amount: The amount to scroll. A good amount is 1 or 2.
184
  """
 
 
 
185
  self.desktop.move_mouse(x, y)
186
  self.desktop.scroll(direction=direction, amount=amount)
187
  message = f"Scrolled {direction} by {amount}"
@@ -200,18 +243,30 @@ class E2BVisionAgent(CodeAgent):
200
  return f"Waited for {seconds} seconds"
201
 
202
  @tool
203
- def open(url: str) -> str:
204
  """
205
  Directly opens a browser with the specified url: use this at start of web searches rather than trying to click the browser.
206
  Args:
207
  url: The URL to open
208
  """
 
 
209
  self.desktop.open(url)
210
 
211
  time.sleep(2)
212
  self.logger.log(f"Opening URL: {url}")
213
  return f"Opened URL: {url}"
214
 
 
 
 
 
 
 
 
 
 
 
215
  # Register the tools
216
  self.tools["click"] = click
217
  self.tools["right_click"] = right_click
@@ -221,7 +276,8 @@ class E2BVisionAgent(CodeAgent):
221
  self.tools["press"] = press
222
  self.tools["scroll"] = scroll
223
  self.tools["wait"] = wait
224
- self.tools["open"] = open
 
225
  self.tools["go_back"] = go_back
226
  self.tools["drag"] = drag
227
  self.tools["scroll"] = scroll
 
24
  verbosity_level: LogLevel = 2,
25
  planning_interval: int | None = None,
26
  use_v1_prompt: bool = False,
27
+ qwen_normalization: bool = True,
28
  **kwargs,
29
  ):
30
  self.desktop = desktop
31
  self.data_dir = data_dir
32
  self.planning_interval = planning_interval
33
+ self.qwen_normalization = qwen_normalization
34
  # Initialize Desktop
35
  self.width, self.height = self.desktop.get_screen_size()
36
  print(f"Screen size: {self.width}x{self.height}")
 
62
  self.logger.log("Setting up agent tools...")
63
  self._setup_desktop_tools()
64
 
65
+ def _qwen_unnormalization(self, arguments: dict[str, int]) -> dict[str, int]:
66
+ """
67
+ Unnormalize coordinates from 0-999 range to actual screen pixel coordinates.
68
+ Coordinates are identified by keys containing 'x' or 'y'.
69
+
70
+ Args:
71
+ arguments: Dictionary with coordinate parameters (keys containing 'x' or 'y')
72
+
73
+ Returns:
74
+ Dictionary with unnormalized pixel coordinates
75
+ """
76
+ unnormalized: dict[str, int] = {}
77
+ for key, value in arguments.items():
78
+ if "x" in key.lower() and "y" not in key.lower():
79
+ unnormalized[key] = int((value / 1000) * self.width)
80
+ elif "y" in key.lower():
81
+ unnormalized[key] = int((value / 1000) * self.height)
82
+ else:
83
+ unnormalized[key] = value
84
+ return unnormalized
85
+
86
  def _setup_desktop_tools(self):
87
  """Register all desktop tools"""
88
 
 
94
  x: The x coordinate (horizontal position)
95
  y: The y coordinate (vertical position)
96
  """
97
+ if self.qwen_normalization:
98
+ coords = self._qwen_unnormalization({"x": x, "y": y})
99
+ x, y = coords["x"], coords["y"]
100
  self.desktop.move_mouse(x, y)
101
  self.desktop.left_click()
102
  self.click_coordinates = [x, y]
 
111
  x: The x coordinate (horizontal position)
112
  y: The y coordinate (vertical position)
113
  """
114
+ if self.qwen_normalization:
115
+ coords = self._qwen_unnormalization({"x": x, "y": y})
116
+ x, y = coords["x"], coords["y"]
117
  self.desktop.move_mouse(x, y)
118
  self.desktop.right_click()
119
  self.click_coordinates = [x, y]
 
128
  x: The x coordinate (horizontal position)
129
  y: The y coordinate (vertical position)
130
  """
131
+ if self.qwen_normalization:
132
+ coords = self._qwen_unnormalization({"x": x, "y": y})
133
+ x, y = coords["x"], coords["y"]
134
  self.desktop.move_mouse(x, y)
135
  self.desktop.double_click()
136
  self.click_coordinates = [x, y]
 
145
  x: The x coordinate (horizontal position)
146
  y: The y coordinate (vertical position)
147
  """
148
+ if self.qwen_normalization:
149
+ coords = self._qwen_unnormalization({"x": x, "y": y})
150
+ x, y = coords["x"], coords["y"]
151
  self.desktop.move_mouse(x, y)
152
  self.logger.log(f"Moved mouse to coordinates ({x}, {y})")
153
  return f"Moved mouse to coordinates ({x}, {y})"
 
202
  x2: end x coordinate
203
  y2: end y coordinate
204
  """
205
+ if self.qwen_normalization:
206
+ coords = self._qwen_unnormalization(
207
+ {"x1": x1, "y1": y1, "x2": x2, "y2": y2}
208
+ )
209
+ x1, y1, x2, y2 = coords["x1"], coords["y1"], coords["x2"], coords["y2"]
210
  self.desktop.drag([x1, y1], [x2, y2])
211
  message = f"Dragged and dropped from [{x1}, {y1}] to [{x2}, {y2}]"
212
  self.logger.log(message)
 
222
  direction: The direction to scroll ("up" or "down"), defaults to "down". For zoom, "up" zooms in, "down" zooms out.
223
  amount: The amount to scroll. A good amount is 1 or 2.
224
  """
225
+ if self.qwen_normalization:
226
+ coords = self._qwen_unnormalization({"x": x, "y": y})
227
+ x, y = coords["x"], coords["y"]
228
  self.desktop.move_mouse(x, y)
229
  self.desktop.scroll(direction=direction, amount=amount)
230
  message = f"Scrolled {direction} by {amount}"
 
243
  return f"Waited for {seconds} seconds"
244
 
245
  @tool
246
+ def open_url(url: str) -> str:
247
  """
248
  Directly opens a browser with the specified url: use this at start of web searches rather than trying to click the browser.
249
  Args:
250
  url: The URL to open
251
  """
252
+ if not url.startswith("http") and not url.startswith("https"):
253
+ url = f"https://{url}"
254
  self.desktop.open(url)
255
 
256
  time.sleep(2)
257
  self.logger.log(f"Opening URL: {url}")
258
  return f"Opened URL: {url}"
259
 
260
+ @tool
261
+ def launch(app: str) -> str:
262
+ """
263
+ Launches the specified application
264
+ Args:
265
+ app: The application to launch
266
+ """
267
+ self.desktop.commands.run(f"{app}", background=True)
268
+ return f"Launched application: {app}"
269
+
270
  # Register the tools
271
  self.tools["click"] = click
272
  self.tools["right_click"] = right_click
 
276
  self.tools["press"] = press
277
  self.tools["scroll"] = scroll
278
  self.tools["wait"] = wait
279
+ self.tools["open_url"] = open_url
280
+ self.tools["launch"] = launch
281
  self.tools["go_back"] = go_back
282
  self.tools["drag"] = drag
283
  self.tools["scroll"] = scroll
cua2-core/src/cua2_core/services/agent_utils/prompt.py CHANGED
@@ -32,6 +32,32 @@ click(x, y)
32
 
33
  <environment>
34
  The desktop resolution is <<resolution_x>>x<<resolution_y>> pixels.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  You can only interact through the following tools:
36
 
37
  {%- for tool in tools.values() %}
@@ -42,7 +68,11 @@ You can only interact through the following tools:
42
 
43
  If a task requires a specific application or website, **use**:
44
  ```python
45
- open("app_or_url")
 
 
 
 
46
  ```
47
  to launch it before interacting.
48
  Never manually click the browser icon — use `open_url()` directly for web pages.
@@ -51,8 +81,9 @@ Never manually click the browser icon — use `open_url()` directly for web page
51
  ---
52
 
53
  <click_guidelines>
54
- - Always click using **real, visible coordinates** based on the current screenshot.
55
  - Click precisely **in the center** of the intended target (button, text, icon).
 
56
  - Avoid random or approximate coordinates.
57
  - If nothing changes after a click, check if you misclicked (green crosshair = last click position).
58
  - If a menu item shows a ▶ (triangle), it means it expands—click directly on the text, not the icon.
@@ -64,7 +95,7 @@ Never manually click the browser icon — use `open_url()` directly for web page
64
  <workflow_guidelines>
65
  - **ALWAYS START** by analyzing if the task requires opening an application or URL. If so, your **first action** must be:
66
  - For websites: `open_url("https://google.com")`
67
- - For applications: `open("app_name")`
68
  - Never manually navigate to apps via clicking icons—use the open tools directly.
69
  - Complete one atomic action per step: e.g., **click**, **type**, or **wait**.
70
  - Never combine multiple tool calls in one step.
@@ -89,7 +120,7 @@ What I see: “Text Editor” visible under Accessories.
89
  Reflection: Clicking directly on “Text Editor”.
90
  Action:
91
  ```python
92
- open("text_editor")
93
  ```<end_code>
94
 
95
  Step 2
@@ -98,7 +129,7 @@ What I see: Text editor page.
98
  Reflection: Click on the text editor page to write "Hello World".
99
  Action:
100
  ```python
101
- click(52, 10)
102
  ```<end_code>
103
 
104
  Step 3
 
32
 
33
  <environment>
34
  The desktop resolution is <<resolution_x>>x<<resolution_y>> pixels.
35
+
36
+ **Coordinate System:**
37
+ - **IMPORTANT**: All coordinates must be specified in a **normalized range from 0 to 1000**.
38
+ - The x-axis goes from 0 (left edge) to 1000 (right edge).
39
+ - The y-axis goes from 0 (top edge) to 1000 (bottom edge).
40
+ - The system will automatically convert these normalized coordinates to actual screen pixels.
41
+ - Example: To click the center of the screen, use `click(500, 500)`.
42
+
43
+ **System Information:**
44
+ You are running on **Xubuntu** (Ubuntu with XFCE desktop environment).
45
+ This is a lightweight setup with essential applications.
46
+
47
+ **Available Default Applications:**
48
+ - **File Manager**: Use terminal to browse and manage files (file browsing and management)
49
+ - **Document/Calc Editor**: LibreOffice (document/calculator editor)
50
+ - **Note-taking**: mousepad
51
+ - **Terminal**: xfce4-terminal (command-line interface)
52
+ - **Web Browser**: Firefox (use `open_url()` for websites)
53
+ - **Image Viewer**: ristretto (image viewer)
54
+ - **PDF Viewer**: xpdf (pdf viewer)
55
+
56
+ **Important Notes:**
57
+ - This is a **lightweight desktop environment** — do not assume specialized software is installed.
58
+ - For tasks requiring specific applications not listed above, you may need to adapt or use available alternatives.
59
+ - Always verify what's actually visible on the screen rather than assuming applications exist.
60
+
61
  You can only interact through the following tools:
62
 
63
  {%- for tool in tools.values() %}
 
68
 
69
  If a task requires a specific application or website, **use**:
70
  ```python
71
+ open_url("https://google.com")
72
+ launch("xfce4-terminal")
73
+ launch("libreoffice --writer")
74
+ launch("libreoffice --calc")
75
+ launch("mousepad")
76
  ```
77
  to launch it before interacting.
78
  Never manually click the browser icon — use `open_url()` directly for web pages.
 
81
  ---
82
 
83
  <click_guidelines>
84
+ - Always use **normalized coordinates (0-1000 range)** based on the current screenshot.
85
  - Click precisely **in the center** of the intended target (button, text, icon).
86
+ - Coordinates must be integers between 0 and 1000 for both x and y axes.
87
  - Avoid random or approximate coordinates.
88
  - If nothing changes after a click, check if you misclicked (green crosshair = last click position).
89
  - If a menu item shows a ▶ (triangle), it means it expands—click directly on the text, not the icon.
 
95
  <workflow_guidelines>
96
  - **ALWAYS START** by analyzing if the task requires opening an application or URL. If so, your **first action** must be:
97
  - For websites: `open_url("https://google.com")`
98
+ - For applications: `launch("app_name")`
99
  - Never manually navigate to apps via clicking icons—use the open tools directly.
100
  - Complete one atomic action per step: e.g., **click**, **type**, or **wait**.
101
  - Never combine multiple tool calls in one step.
 
120
  Reflection: Clicking directly on “Text Editor”.
121
  Action:
122
  ```python
123
+ launch("text_editor")
124
  ```<end_code>
125
 
126
  Step 2
 
129
  Reflection: Click on the text editor page to write "Hello World".
130
  Action:
131
  ```python
132
+ click(150, 100)
133
  ```<end_code>
134
 
135
  Step 3
cua2-core/src/cua2_core/services/instruction_service.py CHANGED
@@ -14,12 +14,9 @@ class InstructionService:
14
  available_models = AVAILABLE_MODELS
15
  seed_topics = [
16
  "web browsing",
17
- "email management",
18
- "calendar scheduling",
19
- "file management",
20
  "note-taking",
21
- "system settings",
22
- "text editing",
23
  "terminal commands",
24
  ]
25
 
@@ -60,32 +57,37 @@ class InstructionService:
60
  (
61
  "Generate a clear and specific web browsing task instruction for a desktop automation agent. "
62
  "The task should be goal-centric, focused on retrieving information or performing an action online. "
63
- "You can specify a URL or website to visit. "
 
64
  "Return only the task instruction, nothing else. Keep it simple and focused on a single goal."
65
  ),
66
  (
67
  "Create a practical web browsing task for desktop automation. "
68
  "The task should focus on finding specific information or completing an online action. "
69
- "Include a specific URL or website name if relevant to the goal. "
 
70
  "Provide only the task description without any additional explanation."
71
  ),
72
  (
73
  "Generate a specific web browsing task that a desktop automation agent can perform. "
74
  "The task should be about retrieving information or performing an action on a website. "
75
- "You may specify URLs or web addresses. Keep it concrete and single-purpose. "
76
- "Return just the task instruction."
 
77
  ),
78
  (
79
  "Provide a goal-oriented web browsing task instruction for a desktop agent. "
80
  "Focus on what information to find or what action to perform online. "
81
- "Specify a URL or website if it helps achieve the goal. "
 
82
  "Output only the instruction."
83
  ),
84
  (
85
  "Think of a realistic web browsing task suitable for desktop automation. "
86
  "The task should be about accessing online information or performing a web-based action. "
87
- "Include specific URLs or websites as needed. Keep it simple and goal-focused. "
88
- "Return only the task."
 
89
  ),
90
  ]
91
 
 
14
  available_models = AVAILABLE_MODELS
15
  seed_topics = [
16
  "web browsing",
17
+ "file management (linux)",
 
 
18
  "note-taking",
19
+ "text/document editing (no existing document required, create a new one if needed, use libreoffice)",
 
20
  "terminal commands",
21
  ]
22
 
 
57
  (
58
  "Generate a clear and specific web browsing task instruction for a desktop automation agent. "
59
  "The task should be goal-centric, focused on retrieving information or performing an action online. "
60
+ "Directly specify a URL or website to visit (e.g., 'Go to google.com and search for...'). "
61
+ "Do NOT instruct the agent to open a browser application first - just specify the URL or web task directly. "
62
  "Return only the task instruction, nothing else. Keep it simple and focused on a single goal."
63
  ),
64
  (
65
  "Create a practical web browsing task for desktop automation. "
66
  "The task should focus on finding specific information or completing an online action. "
67
+ "Include a specific URL or website name and what to do there (e.g., 'Visit github.com and...'). "
68
+ "Do NOT include steps about opening a browser - just specify the web task directly. "
69
  "Provide only the task description without any additional explanation."
70
  ),
71
  (
72
  "Generate a specific web browsing task that a desktop automation agent can perform. "
73
  "The task should be about retrieving information or performing an action on a website. "
74
+ "Specify URLs or web addresses directly (e.g., 'Navigate to wikipedia.org and...'). "
75
+ "Do NOT mention opening a browser application - assume the agent will handle that automatically. "
76
+ "Keep it concrete and single-purpose. Return just the task instruction."
77
  ),
78
  (
79
  "Provide a goal-oriented web browsing task instruction for a desktop agent. "
80
  "Focus on what information to find or what action to perform online. "
81
+ "Specify a URL or website directly as part of the task (e.g., 'Go to amazon.com and...'). "
82
+ "Do NOT instruct to open a browser first - just state the URL and the web task. "
83
  "Output only the instruction."
84
  ),
85
  (
86
  "Think of a realistic web browsing task suitable for desktop automation. "
87
  "The task should be about accessing online information or performing a web-based action. "
88
+ "Include specific URLs or websites with the action to perform (e.g., 'Visit youtube.com and...'). "
89
+ "Do NOT include opening a browser as a separate step - just specify the web task directly. "
90
+ "Keep it simple and goal-focused. Return only the task."
91
  ),
92
  ]
93
 
cua2-core/src/cua2_core/services/sandbox_service.py CHANGED
@@ -88,3 +88,32 @@ class SandboxService:
88
  await asyncio.to_thread(self.sandboxes[session_hash].kill)
89
  del self.sandboxes[session_hash]
90
  del self.sandbox_metadata[session_hash]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  await asyncio.to_thread(self.sandboxes[session_hash].kill)
89
  del self.sandboxes[session_hash]
90
  del self.sandbox_metadata[session_hash]
91
+
92
+
93
+ if __name__ == "__main__":
94
+ desktop: Sandbox = Sandbox.create(
95
+ api_key=os.getenv("E2B_API_KEY"),
96
+ resolution=(WIDTH, HEIGHT),
97
+ dpi=96,
98
+ timeout=SANDBOX_TIMEOUT,
99
+ template="k0wmnzir0zuzye6dndlw",
100
+ )
101
+ desktop.stream.start(require_auth=True)
102
+ setup_cmd = """sudo mkdir -p /usr/lib/firefox-esr/distribution && echo '{"policies":{"OverrideFirstRunPage":"","OverridePostUpdatePage":"","DisableProfileImport":true,"DontCheckDefaultBrowser":true}}' | sudo tee /usr/lib/firefox-esr/distribution/policies.json > /dev/null"""
103
+ desktop.commands.run(setup_cmd)
104
+ print(
105
+ desktop.stream.get_url(
106
+ auto_connect=True,
107
+ view_only=False,
108
+ resize="scale",
109
+ auth_key=desktop.stream.get_auth_key(),
110
+ )
111
+ )
112
+ try:
113
+ while True:
114
+ application = input("Enter application to launch: ")
115
+ desktop.commands.run(f"{application} &")
116
+ except (KeyboardInterrupt, Exception):
117
+ pass
118
+
119
+ desktop.kill()
cua2-core/src/cua2_core/websocket/websocket_manager.py CHANGED
@@ -52,7 +52,7 @@ class WebSocketManager:
52
  try:
53
  await websocket.send_text(
54
  json.dumps(
55
- message.model_dump(mode="json", context={"actions_as_json": False})
56
  )
57
  )
58
  except Exception as e:
 
52
  try:
53
  await websocket.send_text(
54
  json.dumps(
55
+ message.model_dump(mode="json", context={"actions_as_json": True})
56
  )
57
  )
58
  except Exception as e:
cua2-front/src/components/WelcomeScreen.tsx CHANGED
@@ -1,12 +1,12 @@
1
- import React, { useState, useEffect, useRef } from 'react';
2
- import { Box, Typography, Button, Container, Paper, TextField, IconButton, Select, MenuItem, FormControl, InputLabel, CircularProgress } from '@mui/material';
3
- import ShuffleIcon from '@mui/icons-material/Shuffle';
4
- import SendIcon from '@mui/icons-material/Send';
5
- import LightModeOutlined from '@mui/icons-material/LightModeOutlined';
6
  import DarkModeOutlined from '@mui/icons-material/DarkModeOutlined';
 
 
 
7
  import SmartToyIcon from '@mui/icons-material/SmartToy';
8
- import { useAgentStore, selectSelectedModelId, selectIsDarkMode, selectAvailableModels, selectIsLoadingModels } from '@/stores/agentStore';
9
- import { fetchAvailableModels, generateRandomQuestion } from '@/services/api';
10
 
11
  interface WelcomeScreenProps {
12
  onStartTask: (instruction: string, modelId: string) => void;
@@ -147,7 +147,7 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onStartTask, isCon
147
  color: 'text.primary',
148
  }}
149
  >
150
- CUA2 Agent
151
  </Typography>
152
 
153
  {/* Powered by smolagents */}
 
1
+ import { fetchAvailableModels, generateRandomQuestion } from '@/services/api';
2
+ import { selectAvailableModels, selectIsDarkMode, selectIsLoadingModels, selectSelectedModelId, useAgentStore } from '@/stores/agentStore';
 
 
 
3
  import DarkModeOutlined from '@mui/icons-material/DarkModeOutlined';
4
+ import LightModeOutlined from '@mui/icons-material/LightModeOutlined';
5
+ import SendIcon from '@mui/icons-material/Send';
6
+ import ShuffleIcon from '@mui/icons-material/Shuffle';
7
  import SmartToyIcon from '@mui/icons-material/SmartToy';
8
+ import { Box, Button, CircularProgress, Container, FormControl, IconButton, InputLabel, MenuItem, Paper, Select, TextField, Typography } from '@mui/material';
9
+ import React, { useEffect, useRef, useState } from 'react';
10
 
11
  interface WelcomeScreenProps {
12
  onStartTask: (instruction: string, modelId: string) => void;
 
147
  color: 'text.primary',
148
  }}
149
  >
150
+ Computer Use Agent
151
  </Typography>
152
 
153
  {/* Powered by smolagents */}
cua2-front/src/stores/agentStore.ts CHANGED
@@ -1,6 +1,6 @@
 
1
  import { create } from 'zustand';
2
  import { devtools } from 'zustand/middleware';
3
- import { AgentTrace, AgentStep, AgentTraceMetadata, FinalStep } from '@/types/agent';
4
 
5
  interface AgentState {
6
  // State
@@ -39,7 +39,7 @@ const initialState = {
39
  isAgentProcessing: false,
40
  isConnectingToE2B: false,
41
  vncUrl: '',
42
- selectedModelId: 'Qwen/Qwen3-VL-8B-Instruct',
43
  availableModels: [],
44
  isLoadingModels: false,
45
  isConnected: false,
 
1
+ import { AgentStep, AgentTrace, AgentTraceMetadata, FinalStep } from '@/types/agent';
2
  import { create } from 'zustand';
3
  import { devtools } from 'zustand/middleware';
 
4
 
5
  interface AgentState {
6
  // State
 
39
  isAgentProcessing: false,
40
  isConnectingToE2B: false,
41
  vncUrl: '',
42
+ selectedModelId: 'Qwen/Qwen3-VL-30B-A3B-Instruct',
43
  availableModels: [],
44
  isLoadingModels: false,
45
  isConnected: false,