Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

App Files Files Community

Maheen001 commited on 11 days ago

Commit

02476c0

verified ·

1 Parent(s): a3d5221

Create ui/voice_agent_ui.py

Browse files

Files changed (1) hide show

ui/voice_agent_ui.py +318 -0

ui/voice_agent_ui.py ADDED Viewed

	@@ -0,0 +1,318 @@

+"""
+Voice Agent UI - Autonomous voice-controlled agent
+"""
+import gradio as gr
+import asyncio
+from pathlib import Path
+from utils.audio_utils import speech_to_text, text_to_speech
+import time
+def create_voice_agent_ui(agent):
+    """Create voice agent interface"""
+    with gr.Row():
+        # Left column - Voice control
+        with gr.Column(scale=1):
+            gr.Markdown("""
+            ### 🎤 Voice Control
+            Click the microphone button and speak your command.
+            The agent will autonomously execute your request.
+            """)
+            # Audio input
+            audio_input = gr.Audio(
+                sources=["microphone"],
+                type="filepath",
+                label="Speak Your Command"
+            )
+            # Manual text input as fallback
+            text_input = gr.Textbox(
+                label="Or Type Your Command",
+                placeholder="Example: Extract deadlines from my PDFs and create calendar events",
+                lines=3
+            )
+            # Execute button
+            execute_btn = gr.Button(
+                "🚀 Execute Command",
+                variant="primary",
+                size="lg"
+            )
+            # Status indicator
+            status_box = gr.Textbox(
+                label="Status",
+                value="Ready",
+                interactive=False
+            )
+            gr.Markdown("---")
+            # Upload files for agent to process
+            voice_file_upload = gr.File(
+                label="Upload Files for Agent",
+                file_count="multiple",
+                file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
+            )
+            uploaded_files_list = gr.Textbox(
+                label="Available Files",
+                placeholder="No files uploaded",
+                interactive=False,
+                lines=4
+            )
+        # Right column - Agent execution trace
+        with gr.Column(scale=2):
+            gr.Markdown("### 🤖 Agent Thoughts & Execution")
+            # Chat-like interface for agent thoughts
+            thought_trace = gr.Chatbot(
+                label="Agent Reasoning",
+                height=400,
+                type="messages"
+            )
+            # Final response
+            final_response = gr.Textbox(
+                label="Final Response",
+                lines=6,
+                placeholder="Agent's final answer will appear here..."
+            )
+            # Audio output
+            audio_output = gr.Audio(
+                label="Voice Response",
+                type="filepath",
+                autoplay=True
+            )
+            # Download outputs
+            with gr.Accordion("📥 Generated Files", open=False):
+                outputs_files = gr.File(
+                    label="Download Generated Files",
+                    file_count="multiple"
+                )
+    # State variables
+    uploaded_files_state = gr.State([])
+    # Example commands
+    with gr.Row():
+        gr.Markdown("""
+        ### 💡 Example Commands
+        Try these voice commands:
+        - "Extract all deadlines from my PDFs and add them to my calendar"
+        - "Summarize this document and send me a professional email summary"
+        - "Organize my uploaded files by type"
+        - "Find all documents mentioning invoices and extract amounts"
+        - "Create a calendar event for tomorrow at 2 PM titled Team Meeting"
+        - "Draft a friendly email to John about the project update"
+        """)
+    # Event handlers
+    async def handle_voice_file_upload(files):
+        """Handle file uploads for voice agent"""
+        if not files:
+            return "No files uploaded", []
+        file_list = []
+        file_info_text = []
+        for file in files:
+            from utils.file_utils import copy_file, get_file_info
+            dest_path = f"data/uploads/{Path(file.name).name}"
+            copy_file(file.name, dest_path)
+            info = get_file_info(dest_path)
+            file_list.append(dest_path)
+            file_info_text.append(f"✓ {info['name']} ({info['size_mb']} MB)")
+            # Add to RAG
+            await agent.process_files_to_rag([{'path': dest_path, 'name': info['name']}])
+        return "\n".join(file_info_text), file_list
+    async def process_audio_command(audio_file, text_command, files_list):
+        """Process voice or text command"""
+        # Determine input
+        if audio_file and not text_command:
+            # Transcribe audio
+            yield [], "🎤 Transcribing audio...", "", None, None
+            command_text = await speech_to_text(audio_file)
+            if not command_text:
+                yield [], "❌ Failed to transcribe audio", "", None, None
+                return
+            yield [], f"✓ Transcribed: {command_text}", "", None, None
+            await asyncio.sleep(0.5)
+        elif text_command:
+            command_text = text_command
+        else:
+            yield [], "⚠️ Please provide a voice or text command", "", None, None
+            return
+        # Update status
+        yield [], f"🤖 Planning: {command_text}", "", None, None
+        # Execute with agent
+        thoughts_display = []
+        final_answer = ""
+        try:
+            # Stream agent execution
+            async for thought in agent.execute(command_text, files_list, stream_thoughts=True):
+                if thought:
+                    # Format thought for display
+                    thought_msg = format_thought_message(thought)
+                    thoughts_display.append(thought_msg)
+                    # Update UI
+                    status = get_status_from_thought(thought)
+                    yield thoughts_display, status, "", None, None
+                    await asyncio.sleep(0.1)  # Small delay for UI update
+            # Get final answer
+            final_answer, all_thoughts = await agent.execute(command_text, files_list, stream_thoughts=False)
+            # Generate voice response
+            yield thoughts_display, "🔊 Generating voice response...", final_answer, None, None
+            if final_answer:
+                audio_path = await text_to_speech(final_answer)
+                # Collect generated files
+                output_files = collect_output_files()
+                yield thoughts_display, "✓ Complete!", final_answer, audio_path, output_files
+            else:
+                yield thoughts_display, "✓ Complete!", "Task executed successfully.", None, None
+        except Exception as e:
+            error_msg = f"❌ Error: {str(e)}"
+            yield thoughts_display, error_msg, error_msg, None, None
+    def format_thought_message(thought):
+        """Format thought as chat message"""
+        thought_type = thought.type
+        content = thought.content
+        # Choose role and styling based on thought type
+        if thought_type == 'planning':
+            role = "assistant"
+            icon = "🧠"
+            metadata = {"title": "🧠 Planning"}
+        elif thought_type == 'tool_call':
+            role = "assistant"
+            icon = "🔧"
+            tool_name = thought.tool_name or "unknown"
+            metadata = {"title": f"🔧 Using Tool: {tool_name}"}
+        elif thought_type == 'reflection':
+            role = "assistant"
+            icon = "💭"
+            metadata = {"title": "💭 Reflecting"}
+        elif thought_type == 'answer':
+            role = "assistant"
+            icon = "✅"
+            metadata = {"title": "✅ Final Answer"}
+        else:
+            role = "assistant"
+            icon = "ℹ️"
+            metadata = {"title": "ℹ️ Info"}
+        return {
+            "role": role,
+            "content": f"{icon} {content}",
+            "metadata": metadata
+        }
+    def get_status_from_thought(thought):
+        """Get status message from thought"""
+        if thought.type == 'planning':
+            return "🧠 Planning execution..."
+        elif thought.type == 'tool_call':
+            return f"🔧 Executing: {thought.tool_name or 'tool'}..."
+        elif thought.type == 'reflection':
+            return "💭 Analyzing results..."
+        elif thought.type == 'answer':
+            return "✅ Complete!"
+        else:
+            return "🤖 Processing..."
+    def collect_output_files():
+        """Collect generated output files"""
+        output_dir = Path("data/outputs")
+        if not output_dir.exists():
+            return None
+        # Get recent files (last 5 minutes)
+        recent_files = []
+        cutoff_time = time.time() - 300
+        for file_path in output_dir.glob("*"):
+            if file_path.is_file() and file_path.stat().st_mtime > cutoff_time:
+                recent_files.append(str(file_path))
+        return recent_files if recent_files else None
+    # Wire up events
+    voice_file_upload.change(
+        fn=handle_voice_file_upload,
+        inputs=[voice_file_upload],
+        outputs=[uploaded_files_list, uploaded_files_state]
+    )
+    execute_btn.click(
+        fn=process_audio_command,
+        inputs=[audio_input, text_input, uploaded_files_state],
+        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
+    )
+    # Quick action buttons
+    gr.Markdown("### ⚡ Quick Actions")
+    with gr.Row():
+        quick_summarize = gr.Button("📝 Summarize All Documents", size="sm")
+        quick_calendar = gr.Button("📅 Extract & Create Events", size="sm")
+        quick_organize = gr.Button("🗂️ Organize Files", size="sm")
+        quick_search = gr.Button("🔍 Search Documents", size="sm")
+    async def quick_action(action_text, files_list):
+        """Execute quick action"""
+        async for update in process_audio_command(None, action_text, files_list):
+            yield update
+    quick_summarize.click(
+        fn=lambda f: quick_action("Summarize all my uploaded documents", f),
+        inputs=[uploaded_files_state],
+        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
+    )
+    quick_calendar.click(
+        fn=lambda f: quick_action("Extract all dates and deadlines from my documents and create calendar events", f),
+        inputs=[uploaded_files_state],
+        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
+    )
+    quick_organize.click(
+        fn=lambda f: quick_action("Organize all my files by type", f),
+        inputs=[uploaded_files_state],
+        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
+    )
+    quick_search.click(
+        fn=lambda f: quick_action("Search my documents for important information and summarize findings", f),
+        inputs=[uploaded_files_state],
+        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
+    )