Maheen001 commited on
Commit
3163565
Β·
verified Β·
1 Parent(s): 398cd77

Update ui/voice_agent_ui.py

Browse files
Files changed (1) hide show
  1. ui/voice_agent_ui.py +62 -39
ui/voice_agent_ui.py CHANGED
@@ -1,5 +1,6 @@
1
  """
2
- Voice Agent UI - Autonomous voice-controlled agent (Gradio 6.0 compatible)
 
3
  """
4
 
5
  import gradio as gr
@@ -18,8 +19,16 @@ def create_voice_agent_ui(agent):
18
  gr.Markdown("""
19
  ### 🎀 Voice Control
20
 
21
- Speak your command or type it manually.
22
- The agent will autonomously execute tasks using MCP tools.
 
 
 
 
 
 
 
 
23
  """)
24
 
25
  # Audio input
@@ -39,13 +48,14 @@ def create_voice_agent_ui(agent):
39
  # Execute button
40
  execute_btn = gr.Button(
41
  "πŸš€ Execute Command",
42
- variant="primary"
 
43
  )
44
 
45
  # Status
46
  status_box = gr.Textbox(
47
- label="Status",
48
- value="Ready",
49
  interactive=False
50
  )
51
 
@@ -59,33 +69,39 @@ def create_voice_agent_ui(agent):
59
  )
60
 
61
  uploaded_files_list = gr.Textbox(
62
- label="Available Files",
63
  placeholder="No files uploaded yet",
64
  lines=4,
65
  interactive=False
66
  )
67
 
68
- # RIGHT COLUMN β€” AGENT EXECUTION TRACE
69
  with gr.Column(scale=2):
70
- gr.Markdown("### πŸ€– Agent Reasoning & Execution Trace")
71
 
72
- # Chatbot (Gradio 6.0 - no type parameter)
73
  thought_trace = gr.Chatbot(
74
- label="Agent Reasoning",
75
- height=400
 
 
76
  )
77
 
 
78
  final_response = gr.Textbox(
79
- label="Final Response",
80
  lines=6,
 
81
  )
82
 
 
83
  audio_output = gr.Audio(
84
- label="πŸ”Š Voice Response",
85
  type="filepath",
86
  autoplay=True
87
  )
88
 
 
89
  with gr.Accordion("πŸ“₯ Generated Files", open=False):
90
  outputs_files = gr.Files(
91
  label="Download Outputs",
@@ -126,33 +142,34 @@ def create_voice_agent_ui(agent):
126
 
127
  # MAIN COMMAND PROCESSOR
128
  async def process_audio_command(audio_file, text_command, files_list):
129
- """Process voice + text commands"""
130
 
131
  # Step 1 β€” Identify user command
132
  if audio_file and not text_command:
133
  # Transcribe
134
- yield [], "🎀 Transcribing...", "", None, None
135
  cmd = await speech_to_text(audio_file)
136
 
137
  if not cmd:
138
- yield [], "⚠️ Failed to transcribe", "", None, None
139
  return
140
  else:
141
  yield [], f"🎀 Transcribed: {cmd}", "", None, None
142
  elif text_command:
143
  cmd = text_command
144
  else:
145
- yield [], "⚠️ Provide voice or text command", "", None, None
146
  return
147
 
148
  # Show planning state
149
- yield [], "🧠 Planning...", "", None, None
150
 
151
  try:
152
  # Call agent (non-streaming)
153
  final_answer, thoughts = await agent.execute(cmd, files_list)
154
 
155
- # Convert AgentThought objects into Gradio 6.0 format (tuples)
 
156
  messages = []
157
  for t in thoughts:
158
  # Handle both AgentThought objects and dicts
@@ -169,31 +186,32 @@ def create_voice_agent_ui(agent):
169
  t_content = str(t)
170
  t_tool = None
171
 
 
172
  icon = "ℹ️"
173
  title = ""
174
  if t_type == "planning":
175
  icon = "🧠"
176
- title = " Planning"
177
  elif t_type == "tool_call":
178
  icon = "πŸ”§"
179
- title = f" Tool: {t_tool}" if t_tool else " Tool"
180
  elif t_type == "reflection":
181
  icon = "πŸ’­"
182
- title = " Reflection"
183
  elif t_type == "answer":
184
  icon = "βœ…"
185
- title = " Answer"
186
 
187
- # Gradio 6.0 format: tuple (role, content)
188
- messages.append((
189
- "assistant",
190
- f"{icon}{title} β€” {t_content}"
191
- ))
192
 
193
  # Show results
194
- yield messages, "πŸ”Š Generating voice...", final_answer, None, None
195
 
196
- # TTS
197
  try:
198
  audio_path = await text_to_speech(final_answer)
199
  except Exception:
@@ -203,18 +221,23 @@ def create_voice_agent_ui(agent):
203
  output_dir = Path("data/outputs")
204
  files_generated = []
205
  if output_dir.exists():
206
- cutoff = time.time() - 300
207
  files_generated = [str(f) for f in output_dir.glob("*") if f.is_file() and f.stat().st_mtime > cutoff]
208
 
209
  yield messages, "βœ… Complete!", final_answer, audio_path, files_generated
210
 
211
  except Exception as e:
212
- err = f"⚠️ Error: {str(e)}"
213
- # Error message in Gradio 6.0 format: tuple
214
- error_messages = [("assistant", err)]
215
- yield error_messages, err, err, None, None
216
-
217
- # CONNECT EVENTS (using run_sync wrapper for async functions)
 
 
 
 
 
218
  def handle_voice_file_upload_sync(files):
219
  """Sync wrapper for async function"""
220
  return asyncio.run(handle_voice_file_upload(files))
@@ -231,4 +254,4 @@ def create_voice_agent_ui(agent):
231
  outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
232
  )
233
 
234
- return gr.Column() # Return a component to avoid Gradio warning
 
1
  """
2
+ Voice Agent UI - Autonomous voice-controlled agent
3
+ FIXED: Proper Gradio 6.0 messages format with type="messages"
4
  """
5
 
6
  import gradio as gr
 
19
  gr.Markdown("""
20
  ### 🎀 Voice Control
21
 
22
+ **How to use:**
23
+ 1. Upload files (optional)
24
+ 2. Speak OR type your command
25
+ 3. Click Execute
26
+ 4. Watch agent work!
27
+
28
+ **Example commands:**
29
+ - "Extract text from my PDF"
30
+ - "Summarize this document"
31
+ - "Organize my files"
32
  """)
33
 
34
  # Audio input
 
48
  # Execute button
49
  execute_btn = gr.Button(
50
  "πŸš€ Execute Command",
51
+ variant="primary",
52
+ size="lg"
53
  )
54
 
55
  # Status
56
  status_box = gr.Textbox(
57
+ label="πŸ“Š Status",
58
+ value="Ready to execute commands",
59
  interactive=False
60
  )
61
 
 
69
  )
70
 
71
  uploaded_files_list = gr.Textbox(
72
+ label="πŸ“‚ Available Files",
73
  placeholder="No files uploaded yet",
74
  lines=4,
75
  interactive=False
76
  )
77
 
78
+ # RIGHT COLUMN β€” AGENT EXECUTION
79
  with gr.Column(scale=2):
80
+ gr.Markdown("### πŸ€– Agent Execution & Results")
81
 
82
+ # Agent Reasoning Trace (Chatbot with type="messages")
83
  thought_trace = gr.Chatbot(
84
+ label="🧠 Agent Reasoning Steps",
85
+ height=400,
86
+ type="messages", # REQUIRED for dict format
87
+ show_copy_button=True
88
  )
89
 
90
+ # Final Response
91
  final_response = gr.Textbox(
92
+ label="βœ… Final Answer",
93
  lines=6,
94
+ show_copy_button=True
95
  )
96
 
97
+ # Voice Output
98
  audio_output = gr.Audio(
99
+ label="πŸ”Š Voice Response (if available)",
100
  type="filepath",
101
  autoplay=True
102
  )
103
 
104
+ # Generated Files
105
  with gr.Accordion("πŸ“₯ Generated Files", open=False):
106
  outputs_files = gr.Files(
107
  label="Download Outputs",
 
142
 
143
  # MAIN COMMAND PROCESSOR
144
  async def process_audio_command(audio_file, text_command, files_list):
145
+ """Process voice + text commands - FIXED for Gradio 6.0"""
146
 
147
  # Step 1 β€” Identify user command
148
  if audio_file and not text_command:
149
  # Transcribe
150
+ yield [], "🎀 Transcribing audio...", "", None, None
151
  cmd = await speech_to_text(audio_file)
152
 
153
  if not cmd:
154
+ yield [], "⚠️ Failed to transcribe audio", "", None, None
155
  return
156
  else:
157
  yield [], f"🎀 Transcribed: {cmd}", "", None, None
158
  elif text_command:
159
  cmd = text_command
160
  else:
161
+ yield [], "⚠️ Please provide a voice or text command", "", None, None
162
  return
163
 
164
  # Show planning state
165
+ yield [], "🧠 Agent is planning...", "", None, None
166
 
167
  try:
168
  # Call agent (non-streaming)
169
  final_answer, thoughts = await agent.execute(cmd, files_list)
170
 
171
+ # Convert AgentThought objects to Gradio 6.0 messages format
172
+ # MUST be dict with "role" and "content" keys
173
  messages = []
174
  for t in thoughts:
175
  # Handle both AgentThought objects and dicts
 
186
  t_content = str(t)
187
  t_tool = None
188
 
189
+ # Format message with icon
190
  icon = "ℹ️"
191
  title = ""
192
  if t_type == "planning":
193
  icon = "🧠"
194
+ title = "Planning"
195
  elif t_type == "tool_call":
196
  icon = "πŸ”§"
197
+ title = f"Tool: {t_tool}" if t_tool else "Tool Call"
198
  elif t_type == "reflection":
199
  icon = "πŸ’­"
200
+ title = "Reflection"
201
  elif t_type == "answer":
202
  icon = "βœ…"
203
+ title = "Answer"
204
 
205
+ # Gradio 6.0 format: dict with "role" and "content"
206
+ messages.append({
207
+ "role": "assistant",
208
+ "content": f"{icon} **{title}**\n\n{t_content}"
209
+ })
210
 
211
  # Show results
212
+ yield messages, "πŸ”Š Generating voice response...", final_answer, None, None
213
 
214
+ # TTS (optional - may fail if no API key)
215
  try:
216
  audio_path = await text_to_speech(final_answer)
217
  except Exception:
 
221
  output_dir = Path("data/outputs")
222
  files_generated = []
223
  if output_dir.exists():
224
+ cutoff = time.time() - 300 # Last 5 minutes
225
  files_generated = [str(f) for f in output_dir.glob("*") if f.is_file() and f.stat().st_mtime > cutoff]
226
 
227
  yield messages, "βœ… Complete!", final_answer, audio_path, files_generated
228
 
229
  except Exception as e:
230
+ import traceback
231
+ err_msg = f"⚠️ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
232
+
233
+ # Error message in Gradio 6.0 format
234
+ error_messages = [{
235
+ "role": "assistant",
236
+ "content": f"❌ **Error**\n\n{str(e)}"
237
+ }]
238
+ yield error_messages, f"❌ Error: {str(e)}", err_msg, None, None
239
+
240
+ # CONNECT EVENTS
241
  def handle_voice_file_upload_sync(files):
242
  """Sync wrapper for async function"""
243
  return asyncio.run(handle_voice_file_upload(files))
 
254
  outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
255
  )
256
 
257
+ return gr.Column() # Return component to satisfy Gradio