tfrere HF Staff Cursor commited on
Commit
3dd281d
·
1 Parent(s): 9dbf093

Refactor tool lifecycle: single state field, proper state machine

Browse files

- Added ToolState type: calling → pending_approval → approved → running → completed/failed/rejected/timed_out
- TraceLog.state is now the single source of truth (legacy fields kept for backward compat)
- Backend sends tool_state_change events immediately after approval decisions
- Backend validates malformed tool arguments before execution (json.loads try/except)
- Frontend optimistic update uses state: 'approved' (not completed: false)
- ToolCallGroup resolves state from new field with legacy fallback
- StatusIcon and statusLabel driven by resolved state, not field combinations

Co-authored-by: Cursor <cursoragent@cursor.com>

agent/core/agent_loop.py CHANGED
@@ -495,7 +495,31 @@ class Handlers:
495
 
496
  for tc in tool_calls:
497
  tool_name = tc.function.name
498
- tool_args = json.loads(tc.function.arguments)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
  approval_decision = approval_map.get(tc.id, {"approved": False})
500
 
501
  if approval_decision.get("approved", False):
@@ -503,6 +527,30 @@ class Handlers:
503
  else:
504
  rejected_tasks.append((tc, tool_name, approval_decision))
505
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
  # Execute all approved tools concurrently
507
  async def execute_tool(tc, tool_name, tool_args):
508
  """Execute a single tool and return its result"""
 
495
 
496
  for tc in tool_calls:
497
  tool_name = tc.function.name
498
+ try:
499
+ tool_args = json.loads(tc.function.arguments)
500
+ except (json.JSONDecodeError, TypeError) as e:
501
+ # Malformed arguments — treat as failed, notify agent
502
+ logger.warning(f"Malformed tool arguments for {tool_name}: {e}")
503
+ tool_msg = Message(
504
+ role="tool",
505
+ content=f"Malformed arguments: {e}",
506
+ tool_call_id=tc.id,
507
+ name=tool_name,
508
+ )
509
+ session.context_manager.add_message(tool_msg)
510
+ await session.send_event(
511
+ Event(
512
+ event_type="tool_output",
513
+ data={
514
+ "tool": tool_name,
515
+ "tool_call_id": tc.id,
516
+ "output": f"Malformed arguments: {e}",
517
+ "success": False,
518
+ },
519
+ )
520
+ )
521
+ continue
522
+
523
  approval_decision = approval_map.get(tc.id, {"approved": False})
524
 
525
  if approval_decision.get("approved", False):
 
527
  else:
528
  rejected_tasks.append((tc, tool_name, approval_decision))
529
 
530
+ # Notify frontend of approval decisions immediately (before execution)
531
+ for tc, tool_name, tool_args in approved_tasks:
532
+ await session.send_event(
533
+ Event(
534
+ event_type="tool_state_change",
535
+ data={
536
+ "tool_call_id": tc.id,
537
+ "tool": tool_name,
538
+ "state": "approved",
539
+ },
540
+ )
541
+ )
542
+ for tc, tool_name, approval_decision in rejected_tasks:
543
+ await session.send_event(
544
+ Event(
545
+ event_type="tool_state_change",
546
+ data={
547
+ "tool_call_id": tc.id,
548
+ "tool": tool_name,
549
+ "state": "rejected",
550
+ },
551
+ )
552
+ )
553
+
554
  # Execute all approved tools concurrently
555
  async def execute_tool(tc, tool_name, tool_args):
556
  """Execute a single tool and return its result"""
frontend/src/components/Chat/ToolCallGroup.tsx CHANGED
@@ -12,72 +12,88 @@ import { useLayoutStore } from '@/store/layoutStore';
12
  import { useSessionStore } from '@/store/sessionStore';
13
  import { apiFetch } from '@/utils/api';
14
  import { logger } from '@/utils/logger';
15
- import type { TraceLog } from '@/types/agent';
16
 
17
  interface ToolCallGroupProps {
18
  tools: TraceLog[];
19
  }
20
 
21
- /** Check if a running tool has been stuck for too long (5 minutes). */
22
  const TOOL_TIMEOUT_MS = 5 * 60 * 1000;
23
- function isTimedOut(log: TraceLog): boolean {
24
- if (log.completed || log.approvalStatus === 'pending') return false;
 
 
 
 
 
 
 
 
 
 
 
 
25
  const elapsed = Date.now() - new Date(log.timestamp).getTime();
26
- return elapsed > TOOL_TIMEOUT_MS;
 
 
27
  }
28
 
29
- // ── Status icon based on tool state ─────────────────────────────────
30
- function StatusIcon({ log }: { log: TraceLog }) {
31
- // Awaiting approval
32
- if (log.approvalStatus === 'pending') {
33
- return <HourglassEmptyIcon sx={{ fontSize: 16, color: 'var(--accent-yellow)' }} />;
34
- }
35
- // Rejected
36
- if (log.approvalStatus === 'rejected') {
37
- return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'error.main' }} />;
38
- }
39
- // Timed out
40
- if (isTimedOut(log)) {
41
- return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'var(--muted-text)' }} />;
42
- }
43
- // Running (not completed yet)
44
- if (!log.completed) {
45
- return (
46
- <MoreHorizIcon
47
- sx={{
48
- fontSize: 16,
49
- color: 'var(--muted-text)',
50
- animation: 'pulse 1.5s ease-in-out infinite',
51
- '@keyframes pulse': {
52
- '0%, 100%': { opacity: 0.4 },
53
- '50%': { opacity: 1 },
54
- },
55
- }}
56
- />
57
- );
58
- }
59
- // Failed
60
- if (log.success === false) {
61
- return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'error.main' }} />;
62
  }
63
- // Completed successfully
64
- return <CheckCircleOutlineIcon sx={{ fontSize: 16, color: 'success.main' }} />;
65
  }
66
 
67
  // ── Status chip label ───────────────────────────────────────────────
68
- function statusLabel(log: TraceLog): string | null {
69
- if (log.approvalStatus === 'pending') return 'awaiting approval';
70
- if (log.approvalStatus === 'rejected') return 'rejected';
71
- if (isTimedOut(log)) return 'timed out';
72
- if (!log.completed) return 'running';
73
- return null;
 
 
 
 
74
  }
75
 
76
- function statusColor(log: TraceLog): string {
77
- if (log.approvalStatus === 'pending') return 'var(--accent-yellow)';
78
- if (log.approvalStatus === 'rejected') return 'var(--accent-red)';
79
- if (isTimedOut(log)) return 'var(--muted-text)';
80
- return 'var(--accent-yellow)';
 
 
 
 
81
  }
82
 
83
  // ── Inline approval UI ──────────────────────────────────────────────
@@ -218,8 +234,9 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
218
  return;
219
  }
220
 
221
- // Show output if completed, or args if still running
222
- if (log.completed && log.output) {
 
223
  showToolOutput(log);
224
  } else if (log.args) {
225
  const content = JSON.stringify(log.args, null, 2);
@@ -249,11 +266,11 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
249
  });
250
 
251
  if (res.ok) {
252
- // Optimistic update: immediately reflect approval status in the UI
253
  const { updateTraceLog, updateCurrentTurnTrace, setProcessing } = useAgentStore.getState();
254
  updateTraceLog(toolCallId, '', {
255
- approvalStatus: approved ? 'approved' : 'rejected',
256
- completed: !approved, // Rejected tools are done; approved ones will run
257
  });
258
  updateCurrentTurnTrace(activeSessionId);
259
  if (approved) setProcessing(true);
@@ -277,9 +294,10 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
277
  >
278
  <Stack divider={<Box sx={{ borderBottom: '1px solid var(--tool-border)' }} />}>
279
  {tools.map((log) => {
280
- const clickable = (log.completed && !!log.output) || !!log.args;
281
- const label = statusLabel(log);
282
- const isPendingApproval = log.approvalStatus === 'pending';
 
283
 
284
  return (
285
  <Box key={log.id}>
@@ -297,7 +315,7 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
297
  '&:hover': clickable && !isPendingApproval ? { bgcolor: 'var(--hover-bg)' } : {},
298
  }}
299
  >
300
- <StatusIcon log={log} />
301
 
302
  <Typography
303
  variant="body2"
@@ -325,7 +343,7 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
325
  fontSize: '0.65rem',
326
  fontWeight: 600,
327
  bgcolor: 'var(--accent-yellow-weak)',
328
- color: statusColor(log),
329
  letterSpacing: '0.03em',
330
  }}
331
  />
 
12
  import { useSessionStore } from '@/store/sessionStore';
13
  import { apiFetch } from '@/utils/api';
14
  import { logger } from '@/utils/logger';
15
+ import type { TraceLog, ToolState } from '@/types/agent';
16
 
17
  interface ToolCallGroupProps {
18
  tools: TraceLog[];
19
  }
20
 
 
21
  const TOOL_TIMEOUT_MS = 5 * 60 * 1000;
22
+
23
+ /**
24
+ * Resolve the effective state of a TraceLog.
25
+ * Uses `state` field if present, otherwise infers from legacy fields
26
+ * (backward compat with data persisted before the state refactor).
27
+ */
28
+ function resolveState(log: TraceLog): ToolState {
29
+ if (log.state) return log.state;
30
+ // Legacy inference
31
+ if (log.approvalStatus === 'pending') return 'pending_approval';
32
+ if (log.approvalStatus === 'rejected') return 'rejected';
33
+ if (log.completed && log.success === false) return 'failed';
34
+ if (log.completed) return 'completed';
35
+ // Check timeout
36
  const elapsed = Date.now() - new Date(log.timestamp).getTime();
37
+ if (elapsed > TOOL_TIMEOUT_MS) return 'timed_out';
38
+ if (log.approvalStatus === 'approved') return 'running';
39
+ return 'calling';
40
  }
41
 
42
+ // ── Status icon based on resolved state ──────────────────────────────
43
+ function StatusIcon({ state }: { state: ToolState }) {
44
+ switch (state) {
45
+ case 'pending_approval':
46
+ return <HourglassEmptyIcon sx={{ fontSize: 16, color: 'var(--accent-yellow)' }} />;
47
+ case 'approved':
48
+ return <HourglassEmptyIcon sx={{ fontSize: 16, color: 'var(--accent-green)', opacity: 0.7 }} />;
49
+ case 'rejected':
50
+ case 'failed':
51
+ return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'error.main' }} />;
52
+ case 'timed_out':
53
+ return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'var(--muted-text)' }} />;
54
+ case 'completed':
55
+ return <CheckCircleOutlineIcon sx={{ fontSize: 16, color: 'success.main' }} />;
56
+ case 'calling':
57
+ case 'running':
58
+ default:
59
+ return (
60
+ <MoreHorizIcon
61
+ sx={{
62
+ fontSize: 16,
63
+ color: 'var(--muted-text)',
64
+ animation: 'pulse 1.5s ease-in-out infinite',
65
+ '@keyframes pulse': {
66
+ '0%, 100%': { opacity: 0.4 },
67
+ '50%': { opacity: 1 },
68
+ },
69
+ }}
70
+ />
71
+ );
 
 
 
72
  }
 
 
73
  }
74
 
75
  // ── Status chip label ───────────────────────────────────────────────
76
+ function statusLabel(state: ToolState): string | null {
77
+ switch (state) {
78
+ case 'pending_approval': return 'awaiting approval';
79
+ case 'approved': return 'approved';
80
+ case 'rejected': return 'rejected';
81
+ case 'timed_out': return 'timed out';
82
+ case 'calling':
83
+ case 'running': return 'running';
84
+ default: return null;
85
+ }
86
  }
87
 
88
+ function statusColor(state: ToolState): string {
89
+ switch (state) {
90
+ case 'pending_approval': return 'var(--accent-yellow)';
91
+ case 'approved': return 'var(--accent-green)';
92
+ case 'rejected':
93
+ case 'failed': return 'var(--accent-red)';
94
+ case 'timed_out': return 'var(--muted-text)';
95
+ default: return 'var(--accent-yellow)';
96
+ }
97
  }
98
 
99
  // ── Inline approval UI ──────────────────────────────────────────────
 
234
  return;
235
  }
236
 
237
+ // Show output if completed/failed, or args if still running
238
+ const s = resolveState(log);
239
+ if ((s === 'completed' || s === 'failed') && log.output) {
240
  showToolOutput(log);
241
  } else if (log.args) {
242
  const content = JSON.stringify(log.args, null, 2);
 
266
  });
267
 
268
  if (res.ok) {
269
+ // Optimistic update with proper state transitions
270
  const { updateTraceLog, updateCurrentTurnTrace, setProcessing } = useAgentStore.getState();
271
  updateTraceLog(toolCallId, '', {
272
+ state: approved ? 'approved' : 'rejected',
273
+ approvalStatus: approved ? 'approved' : 'rejected', // legacy compat
274
  });
275
  updateCurrentTurnTrace(activeSessionId);
276
  if (approved) setProcessing(true);
 
294
  >
295
  <Stack divider={<Box sx={{ borderBottom: '1px solid var(--tool-border)' }} />}>
296
  {tools.map((log) => {
297
+ const state = resolveState(log);
298
+ const clickable = state === 'completed' || state === 'failed' || !!log.args;
299
+ const label = statusLabel(state);
300
+ const isPendingApproval = state === 'pending_approval';
301
 
302
  return (
303
  <Box key={log.id}>
 
315
  '&:hover': clickable && !isPendingApproval ? { bgcolor: 'var(--hover-bg)' } : {},
316
  }}
317
  >
318
+ <StatusIcon state={state} />
319
 
320
  <Typography
321
  variant="body2"
 
343
  fontSize: '0.65rem',
344
  fontWeight: 600,
345
  bgcolor: 'var(--accent-yellow-weak)',
346
+ color: statusColor(state),
347
  letterSpacing: '0.03em',
348
  }}
349
  />
frontend/src/hooks/useAgentWebSocket.ts CHANGED
@@ -187,7 +187,8 @@ export function useAgentWebSocket({
187
  text: `Agent is executing ${toolName}...`,
188
  tool: toolName,
189
  timestamp: new Date().toISOString(),
190
- completed: false,
 
191
  args,
192
  };
193
  addTraceLog(log);
@@ -248,17 +249,13 @@ export function useAgentWebSocket({
248
  const output = (event.data?.output as string) || '';
249
  const success = event.data?.success as boolean;
250
 
251
- // Mark the corresponding trace log as completed and store the output.
252
- // If it had a pending approval, mark it as approved (tool_output means it ran).
253
- const prevLog = useAgentStore.getState().traceLogs.find(
254
- (l) => l.toolCallId === toolCallId
255
- );
256
- const wasApproval = prevLog?.approvalStatus === 'pending';
257
  updateTraceLog(toolCallId, toolName, {
258
- completed: true,
 
259
  output,
260
  success,
261
- ...(wasApproval ? { approvalStatus: 'approved' as const } : {}),
262
  });
263
  updateCurrentTurnTrace(sessionId);
264
 
@@ -367,13 +364,15 @@ export function useAgentWebSocket({
367
  text: `Approval required for ${t.tool}`,
368
  tool: t.tool,
369
  timestamp: new Date().toISOString(),
370
- completed: false,
 
371
  args: t.arguments as Record<string, unknown>,
372
- approvalStatus: 'pending',
373
  });
374
  } else {
375
  updateTraceLog(t.tool_call_id, t.tool, {
376
- approvalStatus: 'pending',
 
377
  args: t.arguments as Record<string, unknown>,
378
  });
379
  }
@@ -444,9 +443,27 @@ export function useAgentWebSocket({
444
  break;
445
  }
446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
  case 'turn_complete':
448
  setProcessing(false);
449
- setCurrentTurnMessageId(null); // Clear the current turn
450
  break;
451
 
452
  case 'compacted': {
 
187
  text: `Agent is executing ${toolName}...`,
188
  tool: toolName,
189
  timestamp: new Date().toISOString(),
190
+ state: 'running',
191
+ completed: false, // legacy compat
192
  args,
193
  };
194
  addTraceLog(log);
 
249
  const output = (event.data?.output as string) || '';
250
  const success = event.data?.success as boolean;
251
 
252
+ // Mark the tool as completed/failed with its output
 
 
 
 
 
253
  updateTraceLog(toolCallId, toolName, {
254
+ state: success ? 'completed' : 'failed',
255
+ completed: true, // legacy
256
  output,
257
  success,
258
+ approvalStatus: 'approved', // legacy: if we got output, it was approved/auto
259
  });
260
  updateCurrentTurnTrace(sessionId);
261
 
 
364
  text: `Approval required for ${t.tool}`,
365
  tool: t.tool,
366
  timestamp: new Date().toISOString(),
367
+ state: 'pending_approval',
368
+ completed: false, // legacy
369
  args: t.arguments as Record<string, unknown>,
370
+ approvalStatus: 'pending', // legacy
371
  });
372
  } else {
373
  updateTraceLog(t.tool_call_id, t.tool, {
374
+ state: 'pending_approval',
375
+ approvalStatus: 'pending', // legacy
376
  args: t.arguments as Record<string, unknown>,
377
  });
378
  }
 
443
  break;
444
  }
445
 
446
+ // ── Tool state change (sent by backend after approval decisions) ──
447
+ case 'tool_state_change': {
448
+ const tcId = (event.data?.tool_call_id as string) || '';
449
+ const tcTool = (event.data?.tool as string) || '';
450
+ const newState = (event.data?.state as string) || '';
451
+
452
+ if (tcId && newState) {
453
+ updateTraceLog(tcId, tcTool, {
454
+ state: newState as import('@/types/agent').ToolState,
455
+ // Legacy compat
456
+ ...(newState === 'approved' ? { approvalStatus: 'approved' as const } : {}),
457
+ ...(newState === 'rejected' ? { approvalStatus: 'rejected' as const, completed: true } : {}),
458
+ });
459
+ if (sessionId) updateCurrentTurnTrace(sessionId);
460
+ }
461
+ break;
462
+ }
463
+
464
  case 'turn_complete':
465
  setProcessing(false);
466
+ setCurrentTurnMessageId(null);
467
  break;
468
 
469
  case 'compacted': {
frontend/src/types/agent.ts CHANGED
@@ -52,20 +52,37 @@ export interface ApprovalBatch {
52
  count: number;
53
  }
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  export type ApprovalStatus = 'none' | 'pending' | 'approved' | 'rejected';
56
 
57
  export interface TraceLog {
58
  id: string;
59
- toolCallId?: string; // Backend tool_call_id for reliable matching
60
  type: 'call' | 'output';
61
  text: string;
62
  tool: string;
63
  timestamp: string;
 
 
 
 
 
 
64
  completed?: boolean;
65
- args?: Record<string, unknown>; // Store args for auto-exec jobs
66
- output?: string; // Store tool output for display
67
- success?: boolean; // Whether the tool call succeeded
68
- /** Approval state for tools that need user confirmation */
69
  approvalStatus?: ApprovalStatus;
70
  /** Parsed job info (URL, status, logs) for hf_jobs */
71
  jobUrl?: string;
 
52
  count: number;
53
  }
54
 
55
+ /**
56
+ * Single state field for each tool call lifecycle.
57
+ * Follows the Vercel AI SDK pattern: clear, non-overlapping states.
58
+ */
59
+ export type ToolState =
60
+ | 'calling' // tool_call received, execution starting
61
+ | 'pending_approval' // waiting for user to approve/reject
62
+ | 'approved' // user approved, waiting for execution to start
63
+ | 'running' // execution in progress
64
+ | 'completed' // execution finished successfully
65
+ | 'failed' // execution finished with error
66
+ | 'rejected' // user rejected the tool call
67
+ | 'timed_out'; // no response after timeout
68
+
69
+ // Keep backward compat alias
70
  export type ApprovalStatus = 'none' | 'pending' | 'approved' | 'rejected';
71
 
72
  export interface TraceLog {
73
  id: string;
74
+ toolCallId?: string;
75
  type: 'call' | 'output';
76
  text: string;
77
  tool: string;
78
  timestamp: string;
79
+ /** Single source of truth for tool lifecycle state */
80
+ state: ToolState;
81
+ args?: Record<string, unknown>;
82
+ output?: string;
83
+ success?: boolean;
84
+ // Legacy fields — kept for backward compat with persisted data
85
  completed?: boolean;
 
 
 
 
86
  approvalStatus?: ApprovalStatus;
87
  /** Parsed job info (URL, status, logs) for hf_jobs */
88
  jobUrl?: string;
frontend/src/types/events.ts CHANGED
@@ -12,6 +12,7 @@ export type EventType =
12
  | 'tool_output'
13
  | 'tool_log'
14
  | 'approval_required'
 
15
  | 'turn_complete'
16
  | 'compacted'
17
  | 'error'
 
12
  | 'tool_output'
13
  | 'tool_log'
14
  | 'approval_required'
15
+ | 'tool_state_change'
16
  | 'turn_complete'
17
  | 'compacted'
18
  | 'error'