Spaces:

smolagents
/

ml-agent

Running

tfrere HF Staff Cursor commited on 5 days ago

Commit

3dd281d

1 Parent(s): 9dbf093

Refactor tool lifecycle: single state field, proper state machine

- Added ToolState type: calling → pending_approval → approved → running → completed/failed/rejected/timed_out
- TraceLog.state is now the single source of truth (legacy fields kept for backward compat)
- Backend sends tool_state_change events immediately after approval decisions
- Backend validates malformed tool arguments before execution (json.loads try/except)
- Frontend optimistic update uses state: 'approved' (not completed: false)
- ToolCallGroup resolves state from new field with legacy fallback
- StatusIcon and statusLabel driven by resolved state, not field combinations

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (5) hide show

agent/core/agent_loop.py +49 -1
frontend/src/components/Chat/ToolCallGroup.tsx +79 -61
frontend/src/hooks/useAgentWebSocket.ts +30 -13
frontend/src/types/agent.ts +22 -5
frontend/src/types/events.ts +1 -0

agent/core/agent_loop.py CHANGED Viewed

@@ -495,7 +495,31 @@ class Handlers:
         for tc in tool_calls:
             tool_name = tc.function.name
-            tool_args = json.loads(tc.function.arguments)
             approval_decision = approval_map.get(tc.id, {"approved": False})
             if approval_decision.get("approved", False):
@@ -503,6 +527,30 @@ class Handlers:
             else:
                 rejected_tasks.append((tc, tool_name, approval_decision))
         # Execute all approved tools concurrently
         async def execute_tool(tc, tool_name, tool_args):
             """Execute a single tool and return its result"""

         for tc in tool_calls:
             tool_name = tc.function.name
+            try:
+                tool_args = json.loads(tc.function.arguments)
+            except (json.JSONDecodeError, TypeError) as e:
+                # Malformed arguments — treat as failed, notify agent
+                logger.warning(f"Malformed tool arguments for {tool_name}: {e}")
+                tool_msg = Message(
+                    role="tool",
+                    content=f"Malformed arguments: {e}",
+                    tool_call_id=tc.id,
+                    name=tool_name,
+                )
+                session.context_manager.add_message(tool_msg)
+                await session.send_event(
+                    Event(
+                        event_type="tool_output",
+                        data={
+                            "tool": tool_name,
+                            "tool_call_id": tc.id,
+                            "output": f"Malformed arguments: {e}",
+                            "success": False,
+                        },
+                    )
+                )
+                continue
             approval_decision = approval_map.get(tc.id, {"approved": False})
             if approval_decision.get("approved", False):
             else:
                 rejected_tasks.append((tc, tool_name, approval_decision))
+        # Notify frontend of approval decisions immediately (before execution)
+        for tc, tool_name, tool_args in approved_tasks:
+            await session.send_event(
+                Event(
+                    event_type="tool_state_change",
+                    data={
+                        "tool_call_id": tc.id,
+                        "tool": tool_name,
+                        "state": "approved",
+                    },
+                )
+            )
+        for tc, tool_name, approval_decision in rejected_tasks:
+            await session.send_event(
+                Event(
+                    event_type="tool_state_change",
+                    data={
+                        "tool_call_id": tc.id,
+                        "tool": tool_name,
+                        "state": "rejected",
+                    },
+                )
+            )
         # Execute all approved tools concurrently
         async def execute_tool(tc, tool_name, tool_args):
             """Execute a single tool and return its result"""

frontend/src/components/Chat/ToolCallGroup.tsx CHANGED Viewed

@@ -12,72 +12,88 @@ import { useLayoutStore } from '@/store/layoutStore';
 import { useSessionStore } from '@/store/sessionStore';
 import { apiFetch } from '@/utils/api';
 import { logger } from '@/utils/logger';
-import type { TraceLog } from '@/types/agent';
 interface ToolCallGroupProps {
   tools: TraceLog[];
 }
-/** Check if a running tool has been stuck for too long (5 minutes). */
 const TOOL_TIMEOUT_MS = 5 * 60 * 1000;
-function isTimedOut(log: TraceLog): boolean {
-  if (log.completed || log.approvalStatus === 'pending') return false;
   const elapsed = Date.now() - new Date(log.timestamp).getTime();
-  return elapsed > TOOL_TIMEOUT_MS;
 }
-// ── Status icon based on tool state ─────────────────────────────────
-function StatusIcon({ log }: { log: TraceLog }) {
-  // Awaiting approval
-  if (log.approvalStatus === 'pending') {
-    return <HourglassEmptyIcon sx={{ fontSize: 16, color: 'var(--accent-yellow)' }} />;
-  }
-  // Rejected
-  if (log.approvalStatus === 'rejected') {
-    return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'error.main' }} />;
-  }
-  // Timed out
-  if (isTimedOut(log)) {
-    return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'var(--muted-text)' }} />;
-  }
-  // Running (not completed yet)
-  if (!log.completed) {
-    return (
-      <MoreHorizIcon
-        sx={{
-          fontSize: 16,
-          color: 'var(--muted-text)',
-          animation: 'pulse 1.5s ease-in-out infinite',
-          '@keyframes pulse': {
-            '0%, 100%': { opacity: 0.4 },
-            '50%': { opacity: 1 },
-          },
-        }}
-      />
-    );
-  }
-  // Failed
-  if (log.success === false) {
-    return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'error.main' }} />;
   }
-  // Completed successfully
-  return <CheckCircleOutlineIcon sx={{ fontSize: 16, color: 'success.main' }} />;
 }
 // ── Status chip label ───────────────────────────────────────────────
-function statusLabel(log: TraceLog): string | null {
-  if (log.approvalStatus === 'pending') return 'awaiting approval';
-  if (log.approvalStatus === 'rejected') return 'rejected';
-  if (isTimedOut(log)) return 'timed out';
-  if (!log.completed) return 'running';
-  return null;
 }
-function statusColor(log: TraceLog): string {
-  if (log.approvalStatus === 'pending') return 'var(--accent-yellow)';
-  if (log.approvalStatus === 'rejected') return 'var(--accent-red)';
-  if (isTimedOut(log)) return 'var(--muted-text)';
-  return 'var(--accent-yellow)';
 }
 // ── Inline approval UI ──────────────────────────────────────────────
@@ -218,8 +234,9 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
         return;
       }
-      // Show output if completed, or args if still running
-      if (log.completed && log.output) {
         showToolOutput(log);
       } else if (log.args) {
         const content = JSON.stringify(log.args, null, 2);
@@ -249,11 +266,11 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
         });
         if (res.ok) {
-          // Optimistic update: immediately reflect approval status in the UI
           const { updateTraceLog, updateCurrentTurnTrace, setProcessing } = useAgentStore.getState();
           updateTraceLog(toolCallId, '', {
-            approvalStatus: approved ? 'approved' : 'rejected',
-            completed: !approved, // Rejected tools are done; approved ones will run
           });
           updateCurrentTurnTrace(activeSessionId);
           if (approved) setProcessing(true);
@@ -277,9 +294,10 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
     >
       <Stack divider={<Box sx={{ borderBottom: '1px solid var(--tool-border)' }} />}>
         {tools.map((log) => {
-          const clickable = (log.completed && !!log.output) || !!log.args;
-          const label = statusLabel(log);
-          const isPendingApproval = log.approvalStatus === 'pending';
           return (
             <Box key={log.id}>
@@ -297,7 +315,7 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
                   '&:hover': clickable && !isPendingApproval ? { bgcolor: 'var(--hover-bg)' } : {},
                 }}
               >
-                <StatusIcon log={log} />
                 <Typography
                   variant="body2"
@@ -325,7 +343,7 @@ export default function ToolCallGroup({ tools }: ToolCallGroupProps) {
                       fontSize: '0.65rem',
                       fontWeight: 600,
                       bgcolor: 'var(--accent-yellow-weak)',
-                      color: statusColor(log),
                       letterSpacing: '0.03em',
                     }}
                   />

 import { useSessionStore } from '@/store/sessionStore';
 import { apiFetch } from '@/utils/api';
 import { logger } from '@/utils/logger';
+import type { TraceLog, ToolState } from '@/types/agent';
 interface ToolCallGroupProps {
   tools: TraceLog[];
 }
 const TOOL_TIMEOUT_MS = 5 * 60 * 1000;
+/**
+ * Resolve the effective state of a TraceLog.
+ * Uses `state` field if present, otherwise infers from legacy fields
+ * (backward compat with data persisted before the state refactor).
+ */
+function resolveState(log: TraceLog): ToolState {
+  if (log.state) return log.state;
+  // Legacy inference
+  if (log.approvalStatus === 'pending') return 'pending_approval';
+  if (log.approvalStatus === 'rejected') return 'rejected';
+  if (log.completed && log.success === false) return 'failed';
+  if (log.completed) return 'completed';
+  // Check timeout
   const elapsed = Date.now() - new Date(log.timestamp).getTime();
+  if (elapsed > TOOL_TIMEOUT_MS) return 'timed_out';
+  if (log.approvalStatus === 'approved') return 'running';
+  return 'calling';
 }
+// ── Status icon based on resolved state ──────────────────────────────
+function StatusIcon({ state }: { state: ToolState }) {
+  switch (state) {
+    case 'pending_approval':
+      return <HourglassEmptyIcon sx={{ fontSize: 16, color: 'var(--accent-yellow)' }} />;
+    case 'approved':
+      return <HourglassEmptyIcon sx={{ fontSize: 16, color: 'var(--accent-green)', opacity: 0.7 }} />;
+    case 'rejected':
+    case 'failed':
+      return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'error.main' }} />;
+    case 'timed_out':
+      return <ErrorOutlineIcon sx={{ fontSize: 16, color: 'var(--muted-text)' }} />;
+    case 'completed':
+      return <CheckCircleOutlineIcon sx={{ fontSize: 16, color: 'success.main' }} />;
+    case 'calling':
+    case 'running':
+    default:
+      return (
+        <MoreHorizIcon
+          sx={{
+            fontSize: 16,
+            color: 'var(--muted-text)',
+            animation: 'pulse 1.5s ease-in-out infinite',
+            '@keyframes pulse': {
+              '0%, 100%': { opacity: 0.4 },
+              '50%': { opacity: 1 },
+            },
+          }}
+        />
+      );
   }
 }
 // ── Status chip label ───────────────────────────────────────────────
+function statusLabel(state: ToolState): string | null {
+  switch (state) {
+    case 'pending_approval': return 'awaiting approval';
+    case 'approved': return 'approved';
+    case 'rejected': return 'rejected';
+    case 'timed_out': return 'timed out';
+    case 'calling':
+    case 'running': return 'running';
+    default: return null;
+  }
 }
+function statusColor(state: ToolState): string {
+  switch (state) {
+    case 'pending_approval': return 'var(--accent-yellow)';
+    case 'approved': return 'var(--accent-green)';
+    case 'rejected':
+    case 'failed': return 'var(--accent-red)';
+    case 'timed_out': return 'var(--muted-text)';
+    default: return 'var(--accent-yellow)';
+  }
 }
 // ── Inline approval UI ──────────────────────────────────────────────
         return;
       }
+      // Show output if completed/failed, or args if still running
+      const s = resolveState(log);
+      if ((s === 'completed' || s === 'failed') && log.output) {
         showToolOutput(log);
       } else if (log.args) {
         const content = JSON.stringify(log.args, null, 2);
         });
         if (res.ok) {
+          // Optimistic update with proper state transitions
           const { updateTraceLog, updateCurrentTurnTrace, setProcessing } = useAgentStore.getState();
           updateTraceLog(toolCallId, '', {
+            state: approved ? 'approved' : 'rejected',
+            approvalStatus: approved ? 'approved' : 'rejected', // legacy compat
           });
           updateCurrentTurnTrace(activeSessionId);
           if (approved) setProcessing(true);
     >
       <Stack divider={<Box sx={{ borderBottom: '1px solid var(--tool-border)' }} />}>
         {tools.map((log) => {
+          const state = resolveState(log);
+          const clickable = state === 'completed' || state === 'failed' || !!log.args;
+          const label = statusLabel(state);
+          const isPendingApproval = state === 'pending_approval';
           return (
             <Box key={log.id}>
                   '&:hover': clickable && !isPendingApproval ? { bgcolor: 'var(--hover-bg)' } : {},
                 }}
               >
+                <StatusIcon state={state} />
                 <Typography
                   variant="body2"
                       fontSize: '0.65rem',
                       fontWeight: 600,
                       bgcolor: 'var(--accent-yellow-weak)',
+                      color: statusColor(state),
                       letterSpacing: '0.03em',
                     }}
                   />

frontend/src/hooks/useAgentWebSocket.ts CHANGED Viewed

@@ -187,7 +187,8 @@ export function useAgentWebSocket({
               text: `Agent is executing ${toolName}...`,
               tool: toolName,
               timestamp: new Date().toISOString(),
-              completed: false,
               args,
             };
             addTraceLog(log);
@@ -248,17 +249,13 @@ export function useAgentWebSocket({
           const output = (event.data?.output as string) || '';
           const success = event.data?.success as boolean;
-          // Mark the corresponding trace log as completed and store the output.
-          // If it had a pending approval, mark it as approved (tool_output means it ran).
-          const prevLog = useAgentStore.getState().traceLogs.find(
-            (l) => l.toolCallId === toolCallId
-          );
-          const wasApproval = prevLog?.approvalStatus === 'pending';
           updateTraceLog(toolCallId, toolName, {
-            completed: true,
             output,
             success,
-            ...(wasApproval ? { approvalStatus: 'approved' as const } : {}),
           });
           updateCurrentTurnTrace(sessionId);
@@ -367,13 +364,15 @@ export function useAgentWebSocket({
                   text: `Approval required for ${t.tool}`,
                   tool: t.tool,
                   timestamp: new Date().toISOString(),
-                  completed: false,
                   args: t.arguments as Record<string, unknown>,
-                  approvalStatus: 'pending',
                 });
               } else {
                 updateTraceLog(t.tool_call_id, t.tool, {
-                  approvalStatus: 'pending',
                   args: t.arguments as Record<string, unknown>,
                 });
               }
@@ -444,9 +443,27 @@ export function useAgentWebSocket({
           break;
         }
         case 'turn_complete':
           setProcessing(false);
-          setCurrentTurnMessageId(null); // Clear the current turn
           break;
         case 'compacted': {

               text: `Agent is executing ${toolName}...`,
               tool: toolName,
               timestamp: new Date().toISOString(),
+              state: 'running',
+              completed: false, // legacy compat
               args,
             };
             addTraceLog(log);
           const output = (event.data?.output as string) || '';
           const success = event.data?.success as boolean;
+          // Mark the tool as completed/failed with its output
           updateTraceLog(toolCallId, toolName, {
+            state: success ? 'completed' : 'failed',
+            completed: true, // legacy
             output,
             success,
+            approvalStatus: 'approved', // legacy: if we got output, it was approved/auto
           });
           updateCurrentTurnTrace(sessionId);
                   text: `Approval required for ${t.tool}`,
                   tool: t.tool,
                   timestamp: new Date().toISOString(),
+                  state: 'pending_approval',
+                  completed: false, // legacy
                   args: t.arguments as Record<string, unknown>,
+                  approvalStatus: 'pending', // legacy
                 });
               } else {
                 updateTraceLog(t.tool_call_id, t.tool, {
+                  state: 'pending_approval',
+                  approvalStatus: 'pending', // legacy
                   args: t.arguments as Record<string, unknown>,
                 });
               }
           break;
         }
+        // ── Tool state change (sent by backend after approval decisions) ──
+        case 'tool_state_change': {
+          const tcId = (event.data?.tool_call_id as string) || '';
+          const tcTool = (event.data?.tool as string) || '';
+          const newState = (event.data?.state as string) || '';
+          if (tcId && newState) {
+            updateTraceLog(tcId, tcTool, {
+              state: newState as import('@/types/agent').ToolState,
+              // Legacy compat
+              ...(newState === 'approved' ? { approvalStatus: 'approved' as const } : {}),
+              ...(newState === 'rejected' ? { approvalStatus: 'rejected' as const, completed: true } : {}),
+            });
+            if (sessionId) updateCurrentTurnTrace(sessionId);
+          }
+          break;
+        }
         case 'turn_complete':
           setProcessing(false);
+          setCurrentTurnMessageId(null);
           break;
         case 'compacted': {

frontend/src/types/agent.ts CHANGED Viewed

@@ -52,20 +52,37 @@ export interface ApprovalBatch {
   count: number;
 }
 export type ApprovalStatus = 'none' | 'pending' | 'approved' | 'rejected';
 export interface TraceLog {
   id: string;
-  toolCallId?: string; // Backend tool_call_id for reliable matching
   type: 'call' | 'output';
   text: string;
   tool: string;
   timestamp: string;
   completed?: boolean;
-  args?: Record<string, unknown>; // Store args for auto-exec jobs
-  output?: string; // Store tool output for display
-  success?: boolean; // Whether the tool call succeeded
-  /** Approval state for tools that need user confirmation */
   approvalStatus?: ApprovalStatus;
   /** Parsed job info (URL, status, logs) for hf_jobs */
   jobUrl?: string;

   count: number;
 }
+/**
+ * Single state field for each tool call lifecycle.
+ * Follows the Vercel AI SDK pattern: clear, non-overlapping states.
+ */
+export type ToolState =
+  | 'calling'           // tool_call received, execution starting
+  | 'pending_approval'  // waiting for user to approve/reject
+  | 'approved'          // user approved, waiting for execution to start
+  | 'running'           // execution in progress
+  | 'completed'         // execution finished successfully
+  | 'failed'            // execution finished with error
+  | 'rejected'          // user rejected the tool call
+  | 'timed_out';        // no response after timeout
+// Keep backward compat alias
 export type ApprovalStatus = 'none' | 'pending' | 'approved' | 'rejected';
 export interface TraceLog {
   id: string;
+  toolCallId?: string;
   type: 'call' | 'output';
   text: string;
   tool: string;
   timestamp: string;
+  /** Single source of truth for tool lifecycle state */
+  state: ToolState;
+  args?: Record<string, unknown>;
+  output?: string;
+  success?: boolean;
+  // Legacy fields — kept for backward compat with persisted data
   completed?: boolean;
   approvalStatus?: ApprovalStatus;
   /** Parsed job info (URL, status, logs) for hf_jobs */
   jobUrl?: string;

frontend/src/types/events.ts CHANGED Viewed

@@ -12,6 +12,7 @@ export type EventType =
   | 'tool_output'
   | 'tool_log'
   | 'approval_required'
   | 'turn_complete'
   | 'compacted'
   | 'error'

   | 'tool_output'
   | 'tool_log'
   | 'approval_required'
+  | 'tool_state_change'
   | 'turn_complete'
   | 'compacted'
   | 'error'