Maheen001 commited on
Commit
da51f27
Β·
verified Β·
1 Parent(s): 135bd9e

Update ui/voice_agent_ui.py

Browse files
Files changed (1) hide show
  1. ui/voice_agent_ui.py +123 -232
ui/voice_agent_ui.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Voice Agent UI - Autonomous voice-controlled agent
3
  """
4
 
5
  import gradio as gr
@@ -10,309 +10,200 @@ import time
10
 
11
 
12
  def create_voice_agent_ui(agent):
13
- """Create voice agent interface"""
14
-
15
  with gr.Row():
16
- # Left column - Voice control
 
 
17
  with gr.Column(scale=1):
18
  gr.Markdown("""
19
  ### 🎀 Voice Control
20
 
21
- Click the microphone button and speak your command.
22
- The agent will autonomously execute your request.
23
  """)
24
-
25
- # Audio input
26
  audio_input = gr.Audio(
27
  sources=["microphone"],
28
  type="filepath",
29
- label="Speak Your Command"
30
  )
31
-
32
- # Manual text input as fallback
33
  text_input = gr.Textbox(
34
- label="Or Type Your Command",
35
- placeholder="Example: Extract deadlines from my PDFs and create calendar events",
36
  lines=3
37
  )
38
-
39
  # Execute button
40
  execute_btn = gr.Button(
41
  "πŸš€ Execute Command",
42
- variant="primary",
43
- size="lg"
44
  )
45
-
46
- # Status indicator
47
  status_box = gr.Textbox(
48
  label="Status",
49
  value="Ready",
50
  interactive=False
51
  )
52
-
53
  gr.Markdown("---")
54
-
55
- # Upload files for agent to process
56
- voice_file_upload = gr.File(
57
- label="Upload Files for Agent",
58
  file_count="multiple",
59
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
60
  )
61
-
62
  uploaded_files_list = gr.Textbox(
63
  label="Available Files",
64
- placeholder="No files uploaded",
65
- interactive=False,
66
- lines=4
67
  )
68
-
69
- # Right column - Agent execution trace
 
 
 
70
  with gr.Column(scale=2):
71
- gr.Markdown("### πŸ€– Agent Thoughts & Execution")
72
-
73
- # Chat-like interface for agent thoughts
74
  thought_trace = gr.Chatbot(
75
  label="Agent Reasoning",
76
- height=400,
77
- type="messages"
78
  )
79
-
80
- # Final response
81
  final_response = gr.Textbox(
82
  label="Final Response",
83
  lines=6,
84
- placeholder="Agent's final answer will appear here..."
85
  )
86
-
87
- # Audio output
88
  audio_output = gr.Audio(
89
  label="Voice Response",
90
  type="filepath",
91
  autoplay=True
92
  )
93
-
94
- # Download outputs
95
  with gr.Accordion("πŸ“₯ Generated Files", open=False):
96
- outputs_files = gr.File(
97
- label="Download Generated Files",
98
  file_count="multiple"
99
  )
100
-
101
- # State variables
102
  uploaded_files_state = gr.State([])
103
-
104
- # Example commands
105
- with gr.Row():
106
- gr.Markdown("""
107
- ### πŸ’‘ Example Commands
108
-
109
- Try these voice commands:
110
- - "Extract all deadlines from my PDFs and add them to my calendar"
111
- - "Summarize this document and send me a professional email summary"
112
- - "Organize my uploaded files by type"
113
- - "Find all documents mentioning invoices and extract amounts"
114
- - "Create a calendar event for tomorrow at 2 PM titled Team Meeting"
115
- - "Draft a friendly email to John about the project update"
116
- """)
117
-
118
- # Event handlers
119
  async def handle_voice_file_upload(files):
120
- """Handle file uploads for voice agent"""
121
  if not files:
122
  return "No files uploaded", []
123
-
124
- file_list = []
125
  file_info_text = []
126
-
 
 
127
  for file in files:
128
- from utils.file_utils import copy_file, get_file_info
129
-
130
  dest_path = f"data/uploads/{Path(file.name).name}"
131
  copy_file(file.name, dest_path)
132
-
133
  info = get_file_info(dest_path)
134
- file_list.append(dest_path)
135
- file_info_text.append(f"βœ“ {info['name']} ({info['size_mb']} MB)")
136
-
137
- # Add to RAG
138
- await agent.process_files_to_rag([{'path': dest_path, 'name': info['name']}])
139
-
140
- return "\n".join(file_info_text), file_list
141
-
 
 
 
 
142
  async def process_audio_command(audio_file, text_command, files_list):
143
- """Process voice or text command"""
144
-
145
- # Determine input
146
  if audio_file and not text_command:
147
- # Transcribe audio
148
- yield [], "🎀 Transcribing audio...", "", None, None
149
- command_text = await speech_to_text(audio_file)
150
-
151
- if not command_text:
152
- yield [], "❌ Failed to transcribe audio", "", None, None
153
  return
154
-
155
- yield [], f"βœ“ Transcribed: {command_text}", "", None, None
156
- await asyncio.sleep(0.5)
157
-
158
  elif text_command:
159
- command_text = text_command
160
-
161
  else:
162
- yield [], "⚠️ Please provide a voice or text command", "", None, None
163
  return
164
-
165
- # Update status
166
- yield [], f"πŸ€– Planning: {command_text}", "", None, None
167
-
168
- # Execute with agent
169
- thoughts_display = []
170
- final_answer = ""
171
-
172
  try:
173
- # Stream agent execution
174
- async for thought in agent.execute(command_text, files_list, stream_thoughts=True):
175
  if thought:
176
- # Format thought for display
177
- thought_msg = format_thought_message(thought)
178
- thoughts_display.append(thought_msg)
179
-
180
- # Update UI
181
- status = get_status_from_thought(thought)
182
- yield thoughts_display, status, "", None, None
183
-
184
- await asyncio.sleep(0.1) # Small delay for UI update
185
-
186
- # Get final answer
187
- final_answer, all_thoughts = await agent.execute(command_text, files_list, stream_thoughts=False)
188
-
189
- # Generate voice response
190
- yield thoughts_display, "πŸ”Š Generating voice response...", final_answer, None, None
191
-
192
- if final_answer:
193
- audio_path = await text_to_speech(final_answer)
194
-
195
- # Collect generated files
196
- output_files = collect_output_files()
197
-
198
- yield thoughts_display, "βœ“ Complete!", final_answer, audio_path, output_files
199
- else:
200
- yield thoughts_display, "βœ“ Complete!", "Task executed successfully.", None, None
201
-
202
  except Exception as e:
203
- error_msg = f"❌ Error: {str(e)}"
204
- yield thoughts_display, error_msg, error_msg, None, None
205
-
206
- def format_thought_message(thought):
207
- """Format thought as chat message"""
208
- thought_type = thought.type
209
- content = thought.content
210
-
211
- # Choose role and styling based on thought type
212
- if thought_type == 'planning':
213
- role = "assistant"
214
- icon = "🧠"
215
- metadata = {"title": "🧠 Planning"}
216
- elif thought_type == 'tool_call':
217
- role = "assistant"
218
- icon = "πŸ”§"
219
- tool_name = thought.tool_name or "unknown"
220
- metadata = {"title": f"πŸ”§ Using Tool: {tool_name}"}
221
- elif thought_type == 'reflection':
222
- role = "assistant"
223
- icon = "πŸ’­"
224
- metadata = {"title": "πŸ’­ Reflecting"}
225
- elif thought_type == 'answer':
226
- role = "assistant"
227
- icon = "βœ…"
228
- metadata = {"title": "βœ… Final Answer"}
229
- else:
230
- role = "assistant"
231
- icon = "ℹ️"
232
- metadata = {"title": "ℹ️ Info"}
233
-
234
- return {
235
- "role": role,
236
- "content": f"{icon} {content}",
237
- "metadata": metadata
238
- }
239
-
240
- def get_status_from_thought(thought):
241
- """Get status message from thought"""
242
- if thought.type == 'planning':
243
- return "🧠 Planning execution..."
244
- elif thought.type == 'tool_call':
245
- return f"πŸ”§ Executing: {thought.tool_name or 'tool'}..."
246
- elif thought.type == 'reflection':
247
- return "πŸ’­ Analyzing results..."
248
- elif thought.type == 'answer':
249
- return "βœ… Complete!"
250
- else:
251
- return "πŸ€– Processing..."
252
-
253
  def collect_output_files():
254
- """Collect generated output files"""
255
  output_dir = Path("data/outputs")
256
- if not output_dir.exists():
257
- return None
258
-
259
- # Get recent files (last 5 minutes)
260
- recent_files = []
261
- cutoff_time = time.time() - 300
262
-
263
- for file_path in output_dir.glob("*"):
264
- if file_path.is_file() and file_path.stat().st_mtime > cutoff_time:
265
- recent_files.append(str(file_path))
266
-
267
- return recent_files if recent_files else None
268
-
269
- # Wire up events
270
  voice_file_upload.change(
271
  fn=handle_voice_file_upload,
272
  inputs=[voice_file_upload],
273
  outputs=[uploaded_files_list, uploaded_files_state]
274
  )
275
-
276
  execute_btn.click(
277
  fn=process_audio_command,
278
  inputs=[audio_input, text_input, uploaded_files_state],
279
  outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
280
  )
281
-
282
- # Quick action buttons
283
- gr.Markdown("### ⚑ Quick Actions")
284
-
285
- with gr.Row():
286
- quick_summarize = gr.Button("πŸ“ Summarize All Documents", size="sm")
287
- quick_calendar = gr.Button("πŸ“… Extract & Create Events", size="sm")
288
- quick_organize = gr.Button("πŸ—‚οΈ Organize Files", size="sm")
289
- quick_search = gr.Button("πŸ” Search Documents", size="sm")
290
-
291
- async def quick_action(action_text, files_list):
292
- """Execute quick action"""
293
- async for update in process_audio_command(None, action_text, files_list):
294
- yield update
295
-
296
- quick_summarize.click(
297
- fn=lambda f: quick_action("Summarize all my uploaded documents", f),
298
- inputs=[uploaded_files_state],
299
- outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
300
- )
301
-
302
- quick_calendar.click(
303
- fn=lambda f: quick_action("Extract all dates and deadlines from my documents and create calendar events", f),
304
- inputs=[uploaded_files_state],
305
- outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
306
- )
307
-
308
- quick_organize.click(
309
- fn=lambda f: quick_action("Organize all my files by type", f),
310
- inputs=[uploaded_files_state],
311
- outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
312
- )
313
-
314
- quick_search.click(
315
- fn=lambda f: quick_action("Search my documents for important information and summarize findings", f),
316
- inputs=[uploaded_files_state],
317
- outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
318
- )
 
1
  """
2
+ Voice Agent UI - Autonomous voice-controlled agent (Gradio 6 Safe Version)
3
  """
4
 
5
  import gradio as gr
 
10
 
11
 
12
  def create_voice_agent_ui(agent):
13
+ """Create voice agent interface (error-free for Gradio 6)"""
14
+
15
  with gr.Row():
16
+ # --------------------------------------
17
+ # LEFT COLUMN β€” INPUTS
18
+ # --------------------------------------
19
  with gr.Column(scale=1):
20
  gr.Markdown("""
21
  ### 🎀 Voice Control
22
 
23
+ Speak your command or type it manually.
24
+ The agent will autonomously execute tasks using MCP tools.
25
  """)
26
+
27
+ # Audio input (microphone)
28
  audio_input = gr.Audio(
29
  sources=["microphone"],
30
  type="filepath",
31
+ label="πŸŽ™οΈ Speak Your Command"
32
  )
33
+
34
+ # Manual command input
35
  text_input = gr.Textbox(
36
+ label="⌨️ Or Type Command",
37
+ placeholder="Example: Extract deadlines from all PDFs",
38
  lines=3
39
  )
40
+
41
  # Execute button
42
  execute_btn = gr.Button(
43
  "πŸš€ Execute Command",
44
+ variant="primary"
 
45
  )
46
+
47
+ # Status
48
  status_box = gr.Textbox(
49
  label="Status",
50
  value="Ready",
51
  interactive=False
52
  )
53
+
54
  gr.Markdown("---")
55
+
56
+ # File uploader
57
+ voice_file_upload = gr.Files(
58
+ label="πŸ“ Upload Files for Agent",
59
  file_count="multiple",
60
  file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
61
  )
62
+
63
  uploaded_files_list = gr.Textbox(
64
  label="Available Files",
65
+ placeholder="No files uploaded yet",
66
+ lines=4,
67
+ interactive=False
68
  )
69
+
70
+ # --------------------------------------
71
+ # RIGHT COLUMN β€” AGENT EXECUTION TRACE
72
+ # --------------------------------------
73
+ with gr.Row():
74
  with gr.Column(scale=2):
75
+ gr.Markdown("### πŸ€– Agent Reasoning & Execution Trace")
76
+
77
+ # FIX: No "type=messages" (removed)
78
  thought_trace = gr.Chatbot(
79
  label="Agent Reasoning",
80
+ height=400
 
81
  )
82
+
 
83
  final_response = gr.Textbox(
84
  label="Final Response",
85
  lines=6,
 
86
  )
87
+
 
88
  audio_output = gr.Audio(
89
  label="Voice Response",
90
  type="filepath",
91
  autoplay=True
92
  )
93
+
 
94
  with gr.Accordion("πŸ“₯ Generated Files", open=False):
95
+ outputs_files = gr.Files(
96
+ label="Download Outputs",
97
  file_count="multiple"
98
  )
99
+
100
+ # STATE: store uploaded files
101
  uploaded_files_state = gr.State([])
102
+
103
+ # ---------------------------------------------------------
104
+ # FILE UPLOAD HANDLER
105
+ # ---------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
106
  async def handle_voice_file_upload(files):
107
+ """Handle file uploads"""
108
  if not files:
109
  return "No files uploaded", []
110
+
111
+ file_paths = []
112
  file_info_text = []
113
+
114
+ from utils.file_utils import copy_file, get_file_info
115
+
116
  for file in files:
 
 
117
  dest_path = f"data/uploads/{Path(file.name).name}"
118
  copy_file(file.name, dest_path)
119
+
120
  info = get_file_info(dest_path)
121
+ file_paths.append(dest_path)
122
+ file_info_text.append(f"β€’ {info['name']} ({info['size_mb']} MB)")
123
+
124
+ await agent.process_files_to_rag([
125
+ {"path": dest_path, "name": info['name']}
126
+ ])
127
+
128
+ return "\n".join(file_info_text), file_paths
129
+
130
+ # ---------------------------------------------------------
131
+ # MAIN COMMAND PROCESSOR
132
+ # ---------------------------------------------------------
133
  async def process_audio_command(audio_file, text_command, files_list):
134
+ """Process both voice + text commands"""
135
+
136
+ # Step 1 β€” Identify user command
137
  if audio_file and not text_command:
138
+ yield [], "🎀 Transcribing...", "", None, None
139
+ cmd = await speech_to_text(audio_file)
140
+
141
+ if not cmd:
142
+ yield [], "❌ Failed to transcribe", "", None, None
 
143
  return
144
+
145
+ yield [], f"🎀 Transcribed: {cmd}", "", None, None
 
 
146
  elif text_command:
147
+ cmd = text_command
 
148
  else:
149
+ yield [], "⚠️ Provide voice or text", "", None, None
150
  return
151
+
152
+ # Step 2 β€” Show planning
153
+ yield [], "🧠 Planning...", "", None, None
154
+
155
+ thoughts_ui = []
156
+
157
+ # Step 3 β€” Stream agent thoughts
 
158
  try:
159
+ async for thought in agent.execute(cmd, files_list, stream_thoughts=True):
 
160
  if thought:
161
+ msg = {
162
+ "role": "assistant",
163
+ "content": f"🧠 {thought.content}"
164
+ }
165
+ thoughts_ui.append(msg)
166
+ yield thoughts_ui, "πŸ”§ Working...", "", None, None
167
+
168
+ # Step 4 β€” Final answer
169
+ final_answer, _ = await agent.execute(cmd, files_list, stream_thoughts=False)
170
+
171
+ yield thoughts_ui, "πŸ”Š Generating voice...", final_answer, None, None
172
+
173
+ audio_file_path = await text_to_speech(final_answer)
174
+ files_generated = collect_output_files()
175
+
176
+ yield thoughts_ui, "βœ… Complete!", final_answer, audio_file_path, files_generated
177
+
 
 
 
 
 
 
 
 
 
178
  except Exception as e:
179
+ err = f"❌ Error: {str(e)}"
180
+ yield thoughts_ui, err, err, None, None
181
+
182
+ # ---------------------------------------------------------
183
+ # COLLECT OUTPUT FILES
184
+ # ---------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def collect_output_files():
 
186
  output_dir = Path("data/outputs")
187
+ if not output_dir.exists(): return None
188
+
189
+ cutoff = time.time() - 300
190
+ files = [
191
+ str(f) for f in output_dir.glob("*")
192
+ if f.is_file() and f.stat().st_mtime > cutoff
193
+ ]
194
+ return files or None
195
+
196
+ # ---------------------------------------------------------
197
+ # CONNECT EVENTS
198
+ # ---------------------------------------------------------
 
 
199
  voice_file_upload.change(
200
  fn=handle_voice_file_upload,
201
  inputs=[voice_file_upload],
202
  outputs=[uploaded_files_list, uploaded_files_state]
203
  )
204
+
205
  execute_btn.click(
206
  fn=process_audio_command,
207
  inputs=[audio_input, text_input, uploaded_files_state],
208
  outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
209
  )