Maheen001 commited on
Commit
02476c0
Β·
verified Β·
1 Parent(s): a3d5221

Create ui/voice_agent_ui.py

Browse files
Files changed (1) hide show
  1. ui/voice_agent_ui.py +318 -0
ui/voice_agent_ui.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Voice Agent UI - Autonomous voice-controlled agent
3
+ """
4
+
5
+ import gradio as gr
6
+ import asyncio
7
+ from pathlib import Path
8
+ from utils.audio_utils import speech_to_text, text_to_speech
9
+ import time
10
+
11
+
12
+ def create_voice_agent_ui(agent):
13
+ """Create voice agent interface"""
14
+
15
+ with gr.Row():
16
+ # Left column - Voice control
17
+ with gr.Column(scale=1):
18
+ gr.Markdown("""
19
+ ### 🎀 Voice Control
20
+
21
+ Click the microphone button and speak your command.
22
+ The agent will autonomously execute your request.
23
+ """)
24
+
25
+ # Audio input
26
+ audio_input = gr.Audio(
27
+ sources=["microphone"],
28
+ type="filepath",
29
+ label="Speak Your Command"
30
+ )
31
+
32
+ # Manual text input as fallback
33
+ text_input = gr.Textbox(
34
+ label="Or Type Your Command",
35
+ placeholder="Example: Extract deadlines from my PDFs and create calendar events",
36
+ lines=3
37
+ )
38
+
39
+ # Execute button
40
+ execute_btn = gr.Button(
41
+ "πŸš€ Execute Command",
42
+ variant="primary",
43
+ size="lg"
44
+ )
45
+
46
+ # Status indicator
47
+ status_box = gr.Textbox(
48
+ label="Status",
49
+ value="Ready",
50
+ interactive=False
51
+ )
52
+
53
+ gr.Markdown("---")
54
+
55
+ # Upload files for agent to process
56
+ voice_file_upload = gr.File(
57
+ label="Upload Files for Agent",
58
+ file_count="multiple",
59
+ file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
60
+ )
61
+
62
+ uploaded_files_list = gr.Textbox(
63
+ label="Available Files",
64
+ placeholder="No files uploaded",
65
+ interactive=False,
66
+ lines=4
67
+ )
68
+
69
+ # Right column - Agent execution trace
70
+ with gr.Column(scale=2):
71
+ gr.Markdown("### πŸ€– Agent Thoughts & Execution")
72
+
73
+ # Chat-like interface for agent thoughts
74
+ thought_trace = gr.Chatbot(
75
+ label="Agent Reasoning",
76
+ height=400,
77
+ type="messages"
78
+ )
79
+
80
+ # Final response
81
+ final_response = gr.Textbox(
82
+ label="Final Response",
83
+ lines=6,
84
+ placeholder="Agent's final answer will appear here..."
85
+ )
86
+
87
+ # Audio output
88
+ audio_output = gr.Audio(
89
+ label="Voice Response",
90
+ type="filepath",
91
+ autoplay=True
92
+ )
93
+
94
+ # Download outputs
95
+ with gr.Accordion("πŸ“₯ Generated Files", open=False):
96
+ outputs_files = gr.File(
97
+ label="Download Generated Files",
98
+ file_count="multiple"
99
+ )
100
+
101
+ # State variables
102
+ uploaded_files_state = gr.State([])
103
+
104
+ # Example commands
105
+ with gr.Row():
106
+ gr.Markdown("""
107
+ ### πŸ’‘ Example Commands
108
+
109
+ Try these voice commands:
110
+ - "Extract all deadlines from my PDFs and add them to my calendar"
111
+ - "Summarize this document and send me a professional email summary"
112
+ - "Organize my uploaded files by type"
113
+ - "Find all documents mentioning invoices and extract amounts"
114
+ - "Create a calendar event for tomorrow at 2 PM titled Team Meeting"
115
+ - "Draft a friendly email to John about the project update"
116
+ """)
117
+
118
+ # Event handlers
119
+ async def handle_voice_file_upload(files):
120
+ """Handle file uploads for voice agent"""
121
+ if not files:
122
+ return "No files uploaded", []
123
+
124
+ file_list = []
125
+ file_info_text = []
126
+
127
+ for file in files:
128
+ from utils.file_utils import copy_file, get_file_info
129
+
130
+ dest_path = f"data/uploads/{Path(file.name).name}"
131
+ copy_file(file.name, dest_path)
132
+
133
+ info = get_file_info(dest_path)
134
+ file_list.append(dest_path)
135
+ file_info_text.append(f"βœ“ {info['name']} ({info['size_mb']} MB)")
136
+
137
+ # Add to RAG
138
+ await agent.process_files_to_rag([{'path': dest_path, 'name': info['name']}])
139
+
140
+ return "\n".join(file_info_text), file_list
141
+
142
+ async def process_audio_command(audio_file, text_command, files_list):
143
+ """Process voice or text command"""
144
+
145
+ # Determine input
146
+ if audio_file and not text_command:
147
+ # Transcribe audio
148
+ yield [], "🎀 Transcribing audio...", "", None, None
149
+ command_text = await speech_to_text(audio_file)
150
+
151
+ if not command_text:
152
+ yield [], "❌ Failed to transcribe audio", "", None, None
153
+ return
154
+
155
+ yield [], f"βœ“ Transcribed: {command_text}", "", None, None
156
+ await asyncio.sleep(0.5)
157
+
158
+ elif text_command:
159
+ command_text = text_command
160
+
161
+ else:
162
+ yield [], "⚠️ Please provide a voice or text command", "", None, None
163
+ return
164
+
165
+ # Update status
166
+ yield [], f"πŸ€– Planning: {command_text}", "", None, None
167
+
168
+ # Execute with agent
169
+ thoughts_display = []
170
+ final_answer = ""
171
+
172
+ try:
173
+ # Stream agent execution
174
+ async for thought in agent.execute(command_text, files_list, stream_thoughts=True):
175
+ if thought:
176
+ # Format thought for display
177
+ thought_msg = format_thought_message(thought)
178
+ thoughts_display.append(thought_msg)
179
+
180
+ # Update UI
181
+ status = get_status_from_thought(thought)
182
+ yield thoughts_display, status, "", None, None
183
+
184
+ await asyncio.sleep(0.1) # Small delay for UI update
185
+
186
+ # Get final answer
187
+ final_answer, all_thoughts = await agent.execute(command_text, files_list, stream_thoughts=False)
188
+
189
+ # Generate voice response
190
+ yield thoughts_display, "πŸ”Š Generating voice response...", final_answer, None, None
191
+
192
+ if final_answer:
193
+ audio_path = await text_to_speech(final_answer)
194
+
195
+ # Collect generated files
196
+ output_files = collect_output_files()
197
+
198
+ yield thoughts_display, "βœ“ Complete!", final_answer, audio_path, output_files
199
+ else:
200
+ yield thoughts_display, "βœ“ Complete!", "Task executed successfully.", None, None
201
+
202
+ except Exception as e:
203
+ error_msg = f"❌ Error: {str(e)}"
204
+ yield thoughts_display, error_msg, error_msg, None, None
205
+
206
+ def format_thought_message(thought):
207
+ """Format thought as chat message"""
208
+ thought_type = thought.type
209
+ content = thought.content
210
+
211
+ # Choose role and styling based on thought type
212
+ if thought_type == 'planning':
213
+ role = "assistant"
214
+ icon = "🧠"
215
+ metadata = {"title": "🧠 Planning"}
216
+ elif thought_type == 'tool_call':
217
+ role = "assistant"
218
+ icon = "πŸ”§"
219
+ tool_name = thought.tool_name or "unknown"
220
+ metadata = {"title": f"πŸ”§ Using Tool: {tool_name}"}
221
+ elif thought_type == 'reflection':
222
+ role = "assistant"
223
+ icon = "πŸ’­"
224
+ metadata = {"title": "πŸ’­ Reflecting"}
225
+ elif thought_type == 'answer':
226
+ role = "assistant"
227
+ icon = "βœ…"
228
+ metadata = {"title": "βœ… Final Answer"}
229
+ else:
230
+ role = "assistant"
231
+ icon = "ℹ️"
232
+ metadata = {"title": "ℹ️ Info"}
233
+
234
+ return {
235
+ "role": role,
236
+ "content": f"{icon} {content}",
237
+ "metadata": metadata
238
+ }
239
+
240
+ def get_status_from_thought(thought):
241
+ """Get status message from thought"""
242
+ if thought.type == 'planning':
243
+ return "🧠 Planning execution..."
244
+ elif thought.type == 'tool_call':
245
+ return f"πŸ”§ Executing: {thought.tool_name or 'tool'}..."
246
+ elif thought.type == 'reflection':
247
+ return "πŸ’­ Analyzing results..."
248
+ elif thought.type == 'answer':
249
+ return "βœ… Complete!"
250
+ else:
251
+ return "πŸ€– Processing..."
252
+
253
+ def collect_output_files():
254
+ """Collect generated output files"""
255
+ output_dir = Path("data/outputs")
256
+ if not output_dir.exists():
257
+ return None
258
+
259
+ # Get recent files (last 5 minutes)
260
+ recent_files = []
261
+ cutoff_time = time.time() - 300
262
+
263
+ for file_path in output_dir.glob("*"):
264
+ if file_path.is_file() and file_path.stat().st_mtime > cutoff_time:
265
+ recent_files.append(str(file_path))
266
+
267
+ return recent_files if recent_files else None
268
+
269
+ # Wire up events
270
+ voice_file_upload.change(
271
+ fn=handle_voice_file_upload,
272
+ inputs=[voice_file_upload],
273
+ outputs=[uploaded_files_list, uploaded_files_state]
274
+ )
275
+
276
+ execute_btn.click(
277
+ fn=process_audio_command,
278
+ inputs=[audio_input, text_input, uploaded_files_state],
279
+ outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
280
+ )
281
+
282
+ # Quick action buttons
283
+ gr.Markdown("### ⚑ Quick Actions")
284
+
285
+ with gr.Row():
286
+ quick_summarize = gr.Button("πŸ“ Summarize All Documents", size="sm")
287
+ quick_calendar = gr.Button("πŸ“… Extract & Create Events", size="sm")
288
+ quick_organize = gr.Button("πŸ—‚οΈ Organize Files", size="sm")
289
+ quick_search = gr.Button("πŸ” Search Documents", size="sm")
290
+
291
+ async def quick_action(action_text, files_list):
292
+ """Execute quick action"""
293
+ async for update in process_audio_command(None, action_text, files_list):
294
+ yield update
295
+
296
+ quick_summarize.click(
297
+ fn=lambda f: quick_action("Summarize all my uploaded documents", f),
298
+ inputs=[uploaded_files_state],
299
+ outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
300
+ )
301
+
302
+ quick_calendar.click(
303
+ fn=lambda f: quick_action("Extract all dates and deadlines from my documents and create calendar events", f),
304
+ inputs=[uploaded_files_state],
305
+ outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
306
+ )
307
+
308
+ quick_organize.click(
309
+ fn=lambda f: quick_action("Organize all my files by type", f),
310
+ inputs=[uploaded_files_state],
311
+ outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
312
+ )
313
+
314
+ quick_search.click(
315
+ fn=lambda f: quick_action("Search my documents for important information and summarize findings", f),
316
+ inputs=[uploaded_files_state],
317
+ outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
318
+ )