Maheen001 commited on
Commit
61fccc8
Β·
verified Β·
1 Parent(s): 72aa415

Update ui/voice_agent_ui.py

Browse files
Files changed (1) hide show
  1. ui/voice_agent_ui.py +42 -38
ui/voice_agent_ui.py CHANGED
@@ -1,7 +1,5 @@
1
  """
2
  Voice Agent UI - Autonomous voice-controlled agent (Gradio-compatible)
3
- This version calls the non-streaming agent.execute(...) and converts AgentThought
4
- objects into Chatbot messages for display.
5
  """
6
 
7
  import gradio as gr
@@ -12,12 +10,10 @@ import time
12
 
13
 
14
  def create_voice_agent_ui(agent):
15
- """Create voice agent interface (error-free for Gradio 6)"""
16
 
17
  with gr.Row():
18
- # --------------------------------------
19
  # LEFT COLUMN β€” INPUTS
20
- # --------------------------------------
21
  with gr.Column(scale=1):
22
  gr.Markdown("""
23
  ### 🎀 Voice Control
@@ -26,7 +22,7 @@ def create_voice_agent_ui(agent):
26
  The agent will autonomously execute tasks using MCP tools.
27
  """)
28
 
29
- # Audio input (microphone)
30
  audio_input = gr.Audio(
31
  sources=["microphone"],
32
  type="filepath",
@@ -69,17 +65,15 @@ def create_voice_agent_ui(agent):
69
  interactive=False
70
  )
71
 
72
- # --------------------------------------
73
- # RIGHT COLUMN β€” AGENT EXECUTION TRACE
74
- # --------------------------------------
75
- with gr.Row():
76
  with gr.Column(scale=2):
77
  gr.Markdown("### πŸ€– Agent Reasoning & Execution Trace")
78
 
79
- # Chatbot
80
  thought_trace = gr.Chatbot(
81
  label="Agent Reasoning",
82
- height=400
 
83
  )
84
 
85
  final_response = gr.Textbox(
@@ -88,7 +82,7 @@ def create_voice_agent_ui(agent):
88
  )
89
 
90
  audio_output = gr.Audio(
91
- label="Voice Response",
92
  type="filepath",
93
  autoplay=True
94
  )
@@ -102,9 +96,7 @@ def create_voice_agent_ui(agent):
102
  # STATE: store uploaded files
103
  uploaded_files_state = gr.State([])
104
 
105
- # ---------------------------------------------------------
106
  # FILE UPLOAD HANDLER
107
- # ---------------------------------------------------------
108
  async def handle_voice_file_upload(files):
109
  """Handle file uploads"""
110
  if not files:
@@ -114,16 +106,18 @@ def create_voice_agent_ui(agent):
114
  file_info_text = []
115
 
116
  from utils.file_utils import copy_file, get_file_info
 
117
 
118
  for file in files:
119
- dest_path = f"data/uploads/{Path(file.name).name}"
120
- copy_file(file.name, dest_path)
 
121
 
122
  info = get_file_info(dest_path)
123
  file_paths.append(dest_path)
124
  file_info_text.append(f"β€’ {info['name']} ({info['size_mb']} MB)")
125
 
126
- # Add to RAG (best-effort)
127
  try:
128
  await agent.process_files_to_rag([{"path": dest_path, "name": info['name']}])
129
  except Exception:
@@ -131,28 +125,25 @@ def create_voice_agent_ui(agent):
131
 
132
  return "\n".join(file_info_text), file_paths
133
 
134
- # ---------------------------------------------------------
135
- # MAIN COMMAND PROCESSOR (non-streaming agent)
136
- # ---------------------------------------------------------
137
  async def process_audio_command(audio_file, text_command, files_list):
138
- """Process both voice + text commands"""
139
 
140
  # Step 1 β€” Identify user command
141
  if audio_file and not text_command:
142
- # transcribe
143
- status_msg = "🎀 Transcribing..."
144
- yield [], status_msg, "", None, None
145
  cmd = await speech_to_text(audio_file)
146
 
147
  if not cmd:
148
- yield [], "❌ Failed to transcribe", "", None, None
149
  return
150
  else:
151
  yield [], f"🎀 Transcribed: {cmd}", "", None, None
152
  elif text_command:
153
  cmd = text_command
154
  else:
155
- yield [], "⚠️ Provide voice or text", "", None, None
156
  return
157
 
158
  # Show planning state
@@ -162,10 +153,10 @@ def create_voice_agent_ui(agent):
162
  # Call agent (non-streaming)
163
  final_answer, thoughts = await agent.execute(cmd, files_list)
164
 
165
- # Convert AgentThought objects into chatbot messages (role, content)
166
  messages = []
167
  for t in thoughts:
168
- # t might be AgentThought instance or dict (if agent serialized); handle both
169
  if hasattr(t, "type"):
170
  t_type = t.type
171
  t_content = t.content
@@ -194,9 +185,13 @@ def create_voice_agent_ui(agent):
194
  icon = "βœ…"
195
  title = " Answer"
196
 
197
- messages.append(( "assistant", f"{icon}{title} β€” {t_content}" ))
 
 
 
 
198
 
199
- # show results
200
  yield messages, "πŸ”Š Generating voice...", final_answer, None, None
201
 
202
  # TTS
@@ -205,7 +200,7 @@ def create_voice_agent_ui(agent):
205
  except Exception:
206
  audio_path = None
207
 
208
- # collect recent outputs
209
  output_dir = Path("data/outputs")
210
  files_generated = []
211
  if output_dir.exists():
@@ -215,14 +210,21 @@ def create_voice_agent_ui(agent):
215
  yield messages, "βœ… Complete!", final_answer, audio_path, files_generated
216
 
217
  except Exception as e:
218
- err = f"❌ Error: {str(e)}"
219
- yield [], err, err, None, None
 
 
 
 
 
 
 
 
 
 
220
 
221
- # ---------------------------------------------------------
222
- # CONNECT EVENTS
223
- # ---------------------------------------------------------
224
  voice_file_upload.change(
225
- fn=handle_voice_file_upload,
226
  inputs=[voice_file_upload],
227
  outputs=[uploaded_files_list, uploaded_files_state]
228
  )
@@ -232,3 +234,5 @@ def create_voice_agent_ui(agent):
232
  inputs=[audio_input, text_input, uploaded_files_state],
233
  outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
234
  )
 
 
 
1
  """
2
  Voice Agent UI - Autonomous voice-controlled agent (Gradio-compatible)
 
 
3
  """
4
 
5
  import gradio as gr
 
10
 
11
 
12
  def create_voice_agent_ui(agent):
13
+ """Create voice agent interface"""
14
 
15
  with gr.Row():
 
16
  # LEFT COLUMN β€” INPUTS
 
17
  with gr.Column(scale=1):
18
  gr.Markdown("""
19
  ### 🎀 Voice Control
 
22
  The agent will autonomously execute tasks using MCP tools.
23
  """)
24
 
25
+ # Audio input
26
  audio_input = gr.Audio(
27
  sources=["microphone"],
28
  type="filepath",
 
65
  interactive=False
66
  )
67
 
68
+ # RIGHT COLUMN β€” AGENT EXECUTION TRACE
 
 
 
69
  with gr.Column(scale=2):
70
  gr.Markdown("### πŸ€– Agent Reasoning & Execution Trace")
71
 
72
+ # Chatbot (FIXED FORMAT)
73
  thought_trace = gr.Chatbot(
74
  label="Agent Reasoning",
75
+ height=400,
76
+ type="messages" # Use messages format
77
  )
78
 
79
  final_response = gr.Textbox(
 
82
  )
83
 
84
  audio_output = gr.Audio(
85
+ label="πŸ”Š Voice Response",
86
  type="filepath",
87
  autoplay=True
88
  )
 
96
  # STATE: store uploaded files
97
  uploaded_files_state = gr.State([])
98
 
 
99
  # FILE UPLOAD HANDLER
 
100
  async def handle_voice_file_upload(files):
101
  """Handle file uploads"""
102
  if not files:
 
106
  file_info_text = []
107
 
108
  from utils.file_utils import copy_file, get_file_info
109
+ import os
110
 
111
  for file in files:
112
+ filename = os.path.basename(file)
113
+ dest_path = f"data/uploads/{filename}"
114
+ copy_file(file, dest_path)
115
 
116
  info = get_file_info(dest_path)
117
  file_paths.append(dest_path)
118
  file_info_text.append(f"β€’ {info['name']} ({info['size_mb']} MB)")
119
 
120
+ # Add to RAG
121
  try:
122
  await agent.process_files_to_rag([{"path": dest_path, "name": info['name']}])
123
  except Exception:
 
125
 
126
  return "\n".join(file_info_text), file_paths
127
 
128
+ # MAIN COMMAND PROCESSOR (FIXED FORMAT)
 
 
129
  async def process_audio_command(audio_file, text_command, files_list):
130
+ """Process voice + text commands"""
131
 
132
  # Step 1 β€” Identify user command
133
  if audio_file and not text_command:
134
+ # Transcribe
135
+ yield [], "🎀 Transcribing...", "", None, None
 
136
  cmd = await speech_to_text(audio_file)
137
 
138
  if not cmd:
139
+ yield [], "⚠️ Failed to transcribe", "", None, None
140
  return
141
  else:
142
  yield [], f"🎀 Transcribed: {cmd}", "", None, None
143
  elif text_command:
144
  cmd = text_command
145
  else:
146
+ yield [], "⚠️ Provide voice or text command", "", None, None
147
  return
148
 
149
  # Show planning state
 
153
  # Call agent (non-streaming)
154
  final_answer, thoughts = await agent.execute(cmd, files_list)
155
 
156
+ # Convert AgentThought objects into CORRECT chatbot messages format
157
  messages = []
158
  for t in thoughts:
159
+ # Handle both AgentThought objects and dicts
160
  if hasattr(t, "type"):
161
  t_type = t.type
162
  t_content = t.content
 
185
  icon = "βœ…"
186
  title = " Answer"
187
 
188
+ # CORRECT FORMAT: dict with 'role' and 'content'
189
+ messages.append({
190
+ "role": "assistant",
191
+ "content": f"{icon}{title} β€” {t_content}"
192
+ })
193
 
194
+ # Show results
195
  yield messages, "πŸ”Š Generating voice...", final_answer, None, None
196
 
197
  # TTS
 
200
  except Exception:
201
  audio_path = None
202
 
203
+ # Collect recent outputs
204
  output_dir = Path("data/outputs")
205
  files_generated = []
206
  if output_dir.exists():
 
210
  yield messages, "βœ… Complete!", final_answer, audio_path, files_generated
211
 
212
  except Exception as e:
213
+ err = f"⚠️ Error: {str(e)}"
214
+ # Error message in correct format
215
+ error_messages = [{
216
+ "role": "assistant",
217
+ "content": err
218
+ }]
219
+ yield error_messages, err, err, None, None
220
+
221
+ # CONNECT EVENTS (using run_sync wrapper for async functions)
222
+ def handle_voice_file_upload_sync(files):
223
+ """Sync wrapper for async function"""
224
+ return asyncio.run(handle_voice_file_upload(files))
225
 
 
 
 
226
  voice_file_upload.change(
227
+ fn=handle_voice_file_upload_sync,
228
  inputs=[voice_file_upload],
229
  outputs=[uploaded_files_list, uploaded_files_state]
230
  )
 
234
  inputs=[audio_input, text_input, uploaded_files_state],
235
  outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
236
  )
237
+
238
+ return ui