Spaces:
Running
Running
| """ | |
| Voice Agent UI - Autonomous voice-controlled agent | |
| """ | |
| import gradio as gr | |
| import asyncio | |
| from pathlib import Path | |
| from utils.audio_utils import speech_to_text, text_to_speech | |
| import time | |
| def create_voice_agent_ui(agent): | |
| """Create voice agent interface""" | |
| with gr.Row(): | |
| # Left column - Voice control | |
| with gr.Column(scale=1): | |
| gr.Markdown(""" | |
| ### π€ Voice Control | |
| Click the microphone button and speak your command. | |
| The agent will autonomously execute your request. | |
| """) | |
| # Audio input | |
| audio_input = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="Speak Your Command" | |
| ) | |
| # Manual text input as fallback | |
| text_input = gr.Textbox( | |
| label="Or Type Your Command", | |
| placeholder="Example: Extract deadlines from my PDFs and create calendar events", | |
| lines=3 | |
| ) | |
| # Execute button | |
| execute_btn = gr.Button( | |
| "π Execute Command", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Status indicator | |
| status_box = gr.Textbox( | |
| label="Status", | |
| value="Ready", | |
| interactive=False | |
| ) | |
| gr.Markdown("---") | |
| # Upload files for agent to process | |
| voice_file_upload = gr.File( | |
| label="Upload Files for Agent", | |
| file_count="multiple", | |
| file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"] | |
| ) | |
| uploaded_files_list = gr.Textbox( | |
| label="Available Files", | |
| placeholder="No files uploaded", | |
| interactive=False, | |
| lines=4 | |
| ) | |
| # Right column - Agent execution trace | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π€ Agent Thoughts & Execution") | |
| # Chat-like interface for agent thoughts | |
| thought_trace = gr.Chatbot( | |
| label="Agent Reasoning", | |
| height=400, | |
| type="messages" | |
| ) | |
| # Final response | |
| final_response = gr.Textbox( | |
| label="Final Response", | |
| lines=6, | |
| placeholder="Agent's final answer will appear here..." | |
| ) | |
| # Audio output | |
| audio_output = gr.Audio( | |
| label="Voice Response", | |
| type="filepath", | |
| autoplay=True | |
| ) | |
| # Download outputs | |
| with gr.Accordion("π₯ Generated Files", open=False): | |
| outputs_files = gr.File( | |
| label="Download Generated Files", | |
| file_count="multiple" | |
| ) | |
| # State variables | |
| uploaded_files_state = gr.State([]) | |
| # Example commands | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| ### π‘ Example Commands | |
| Try these voice commands: | |
| - "Extract all deadlines from my PDFs and add them to my calendar" | |
| - "Summarize this document and send me a professional email summary" | |
| - "Organize my uploaded files by type" | |
| - "Find all documents mentioning invoices and extract amounts" | |
| - "Create a calendar event for tomorrow at 2 PM titled Team Meeting" | |
| - "Draft a friendly email to John about the project update" | |
| """) | |
| # Event handlers | |
| async def handle_voice_file_upload(files): | |
| """Handle file uploads for voice agent""" | |
| if not files: | |
| return "No files uploaded", [] | |
| file_list = [] | |
| file_info_text = [] | |
| for file in files: | |
| from utils.file_utils import copy_file, get_file_info | |
| dest_path = f"data/uploads/{Path(file.name).name}" | |
| copy_file(file.name, dest_path) | |
| info = get_file_info(dest_path) | |
| file_list.append(dest_path) | |
| file_info_text.append(f"β {info['name']} ({info['size_mb']} MB)") | |
| # Add to RAG | |
| await agent.process_files_to_rag([{'path': dest_path, 'name': info['name']}]) | |
| return "\n".join(file_info_text), file_list | |
| async def process_audio_command(audio_file, text_command, files_list): | |
| """Process voice or text command""" | |
| # Determine input | |
| if audio_file and not text_command: | |
| # Transcribe audio | |
| yield [], "π€ Transcribing audio...", "", None, None | |
| command_text = await speech_to_text(audio_file) | |
| if not command_text: | |
| yield [], "β Failed to transcribe audio", "", None, None | |
| return | |
| yield [], f"β Transcribed: {command_text}", "", None, None | |
| await asyncio.sleep(0.5) | |
| elif text_command: | |
| command_text = text_command | |
| else: | |
| yield [], "β οΈ Please provide a voice or text command", "", None, None | |
| return | |
| # Update status | |
| yield [], f"π€ Planning: {command_text}", "", None, None | |
| # Execute with agent | |
| thoughts_display = [] | |
| final_answer = "" | |
| try: | |
| # Stream agent execution | |
| async for thought in agent.execute(command_text, files_list, stream_thoughts=True): | |
| if thought: | |
| # Format thought for display | |
| thought_msg = format_thought_message(thought) | |
| thoughts_display.append(thought_msg) | |
| # Update UI | |
| status = get_status_from_thought(thought) | |
| yield thoughts_display, status, "", None, None | |
| await asyncio.sleep(0.1) # Small delay for UI update | |
| # Get final answer | |
| final_answer, all_thoughts = await agent.execute(command_text, files_list, stream_thoughts=False) | |
| # Generate voice response | |
| yield thoughts_display, "π Generating voice response...", final_answer, None, None | |
| if final_answer: | |
| audio_path = await text_to_speech(final_answer) | |
| # Collect generated files | |
| output_files = collect_output_files() | |
| yield thoughts_display, "β Complete!", final_answer, audio_path, output_files | |
| else: | |
| yield thoughts_display, "β Complete!", "Task executed successfully.", None, None | |
| except Exception as e: | |
| error_msg = f"β Error: {str(e)}" | |
| yield thoughts_display, error_msg, error_msg, None, None | |
| def format_thought_message(thought): | |
| """Format thought as chat message""" | |
| thought_type = thought.type | |
| content = thought.content | |
| # Choose role and styling based on thought type | |
| if thought_type == 'planning': | |
| role = "assistant" | |
| icon = "π§ " | |
| metadata = {"title": "π§ Planning"} | |
| elif thought_type == 'tool_call': | |
| role = "assistant" | |
| icon = "π§" | |
| tool_name = thought.tool_name or "unknown" | |
| metadata = {"title": f"π§ Using Tool: {tool_name}"} | |
| elif thought_type == 'reflection': | |
| role = "assistant" | |
| icon = "π" | |
| metadata = {"title": "π Reflecting"} | |
| elif thought_type == 'answer': | |
| role = "assistant" | |
| icon = "β " | |
| metadata = {"title": "β Final Answer"} | |
| else: | |
| role = "assistant" | |
| icon = "βΉοΈ" | |
| metadata = {"title": "βΉοΈ Info"} | |
| return { | |
| "role": role, | |
| "content": f"{icon} {content}", | |
| "metadata": metadata | |
| } | |
| def get_status_from_thought(thought): | |
| """Get status message from thought""" | |
| if thought.type == 'planning': | |
| return "π§ Planning execution..." | |
| elif thought.type == 'tool_call': | |
| return f"π§ Executing: {thought.tool_name or 'tool'}..." | |
| elif thought.type == 'reflection': | |
| return "π Analyzing results..." | |
| elif thought.type == 'answer': | |
| return "β Complete!" | |
| else: | |
| return "π€ Processing..." | |
| def collect_output_files(): | |
| """Collect generated output files""" | |
| output_dir = Path("data/outputs") | |
| if not output_dir.exists(): | |
| return None | |
| # Get recent files (last 5 minutes) | |
| recent_files = [] | |
| cutoff_time = time.time() - 300 | |
| for file_path in output_dir.glob("*"): | |
| if file_path.is_file() and file_path.stat().st_mtime > cutoff_time: | |
| recent_files.append(str(file_path)) | |
| return recent_files if recent_files else None | |
| # Wire up events | |
| voice_file_upload.change( | |
| fn=handle_voice_file_upload, | |
| inputs=[voice_file_upload], | |
| outputs=[uploaded_files_list, uploaded_files_state] | |
| ) | |
| execute_btn.click( | |
| fn=process_audio_command, | |
| inputs=[audio_input, text_input, uploaded_files_state], | |
| outputs=[thought_trace, status_box, final_response, audio_output, outputs_files] | |
| ) | |
| # Quick action buttons | |
| gr.Markdown("### β‘ Quick Actions") | |
| with gr.Row(): | |
| quick_summarize = gr.Button("π Summarize All Documents", size="sm") | |
| quick_calendar = gr.Button("π Extract & Create Events", size="sm") | |
| quick_organize = gr.Button("ποΈ Organize Files", size="sm") | |
| quick_search = gr.Button("π Search Documents", size="sm") | |
| async def quick_action(action_text, files_list): | |
| """Execute quick action""" | |
| async for update in process_audio_command(None, action_text, files_list): | |
| yield update | |
| quick_summarize.click( | |
| fn=lambda f: quick_action("Summarize all my uploaded documents", f), | |
| inputs=[uploaded_files_state], | |
| outputs=[thought_trace, status_box, final_response, audio_output, outputs_files] | |
| ) | |
| quick_calendar.click( | |
| fn=lambda f: quick_action("Extract all dates and deadlines from my documents and create calendar events", f), | |
| inputs=[uploaded_files_state], | |
| outputs=[thought_trace, status_box, final_response, audio_output, outputs_files] | |
| ) | |
| quick_organize.click( | |
| fn=lambda f: quick_action("Organize all my files by type", f), | |
| inputs=[uploaded_files_state], | |
| outputs=[thought_trace, status_box, final_response, audio_output, outputs_files] | |
| ) | |
| quick_search.click( | |
| fn=lambda f: quick_action("Search my documents for important information and summarize findings", f), | |
| inputs=[uploaded_files_state], | |
| outputs=[thought_trace, status_box, final_response, audio_output, outputs_files] | |
| ) |