Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

App Files Files Community

Maheen001 commited on 11 days ago

Commit

da51f27

verified ·

1 Parent(s): 135bd9e

Update ui/voice_agent_ui.py

Browse files

Files changed (1) hide show

ui/voice_agent_ui.py +123 -232

ui/voice_agent_ui.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Voice Agent UI - Autonomous voice-controlled agent
 """
 import gradio as gr
@@ -10,309 +10,200 @@ import time
 def create_voice_agent_ui(agent):
-    """Create voice agent interface"""
     with gr.Row():
-        # Left column - Voice control
         with gr.Column(scale=1):
             gr.Markdown("""
             ### 🎤 Voice Control
-            Click the microphone button and speak your command.
-            The agent will autonomously execute your request.
             """)
-            # Audio input
             audio_input = gr.Audio(
                 sources=["microphone"],
                 type="filepath",
-                label="Speak Your Command"
             )
-            # Manual text input as fallback
             text_input = gr.Textbox(
-                label="Or Type Your Command",
-                placeholder="Example: Extract deadlines from my PDFs and create calendar events",
                 lines=3
             )
             # Execute button
             execute_btn = gr.Button(
                 "🚀 Execute Command",
-                variant="primary",
-                size="lg"
             )
-            # Status indicator
             status_box = gr.Textbox(
                 label="Status",
                 value="Ready",
                 interactive=False
             )
             gr.Markdown("---")
-            # Upload files for agent to process
-            voice_file_upload = gr.File(
-                label="Upload Files for Agent",
                 file_count="multiple",
                 file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
             )
             uploaded_files_list = gr.Textbox(
                 label="Available Files",
-                placeholder="No files uploaded",
-                interactive=False,
-                lines=4
             )
-        # Right column - Agent execution trace
         with gr.Column(scale=2):
-            gr.Markdown("### 🤖 Agent Thoughts & Execution")
-            # Chat-like interface for agent thoughts
             thought_trace = gr.Chatbot(
                 label="Agent Reasoning",
-                height=400,
-                type="messages"
             )
-            # Final response
             final_response = gr.Textbox(
                 label="Final Response",
                 lines=6,
-                placeholder="Agent's final answer will appear here..."
             )
-            # Audio output
             audio_output = gr.Audio(
                 label="Voice Response",
                 type="filepath",
                 autoplay=True
             )
-            # Download outputs
             with gr.Accordion("📥 Generated Files", open=False):
-                outputs_files = gr.File(
-                    label="Download Generated Files",
                     file_count="multiple"
                 )
-    # State variables
     uploaded_files_state = gr.State([])
-    # Example commands
-    with gr.Row():
-        gr.Markdown("""
-        ### 💡 Example Commands
-        Try these voice commands:
-        - "Extract all deadlines from my PDFs and add them to my calendar"
-        - "Summarize this document and send me a professional email summary"
-        - "Organize my uploaded files by type"
-        - "Find all documents mentioning invoices and extract amounts"
-        - "Create a calendar event for tomorrow at 2 PM titled Team Meeting"
-        - "Draft a friendly email to John about the project update"
-        """)
-    # Event handlers
     async def handle_voice_file_upload(files):
-        """Handle file uploads for voice agent"""
         if not files:
             return "No files uploaded", []
-        file_list = []
         file_info_text = []
         for file in files:
-            from utils.file_utils import copy_file, get_file_info
             dest_path = f"data/uploads/{Path(file.name).name}"
             copy_file(file.name, dest_path)
             info = get_file_info(dest_path)
-            file_list.append(dest_path)
-            file_info_text.append(f"✓ {info['name']} ({info['size_mb']} MB)")
-            # Add to RAG
-            await agent.process_files_to_rag([{'path': dest_path, 'name': info['name']}])
-        return "\n".join(file_info_text), file_list
     async def process_audio_command(audio_file, text_command, files_list):
-        """Process voice or text command"""
-        # Determine input
         if audio_file and not text_command:
-            # Transcribe audio
-            yield [], "🎤 Transcribing audio...", "", None, None
-            command_text = await speech_to_text(audio_file)
-            if not command_text:
-                yield [], "❌ Failed to transcribe audio", "", None, None
                 return
-            yield [], f"✓ Transcribed: {command_text}", "", None, None
-            await asyncio.sleep(0.5)
         elif text_command:
-            command_text = text_command
         else:
-            yield [], "⚠️ Please provide a voice or text command", "", None, None
             return
-        # Update status
-        yield [], f"🤖 Planning: {command_text}", "", None, None
-        # Execute with agent
-        thoughts_display = []
-        final_answer = ""
         try:
-            # Stream agent execution
-            async for thought in agent.execute(command_text, files_list, stream_thoughts=True):
                 if thought:
-                    # Format thought for display
-                    thought_msg = format_thought_message(thought)
-                    thoughts_display.append(thought_msg)
-                    # Update UI
-                    status = get_status_from_thought(thought)
-                    yield thoughts_display, status, "", None, None
-                    await asyncio.sleep(0.1)  # Small delay for UI update
-            # Get final answer
-            final_answer, all_thoughts = await agent.execute(command_text, files_list, stream_thoughts=False)
-            # Generate voice response
-            yield thoughts_display, "🔊 Generating voice response...", final_answer, None, None
-            if final_answer:
-                audio_path = await text_to_speech(final_answer)
-                # Collect generated files
-                output_files = collect_output_files()
-                yield thoughts_display, "✓ Complete!", final_answer, audio_path, output_files
-            else:
-                yield thoughts_display, "✓ Complete!", "Task executed successfully.", None, None
         except Exception as e:
-            error_msg = f"❌ Error: {str(e)}"
-            yield thoughts_display, error_msg, error_msg, None, None
-    def format_thought_message(thought):
-        """Format thought as chat message"""
-        thought_type = thought.type
-        content = thought.content
-        # Choose role and styling based on thought type
-        if thought_type == 'planning':
-            role = "assistant"
-            icon = "🧠"
-            metadata = {"title": "🧠 Planning"}
-        elif thought_type == 'tool_call':
-            role = "assistant"
-            icon = "🔧"
-            tool_name = thought.tool_name or "unknown"
-            metadata = {"title": f"🔧 Using Tool: {tool_name}"}
-        elif thought_type == 'reflection':
-            role = "assistant"
-            icon = "💭"
-            metadata = {"title": "💭 Reflecting"}
-        elif thought_type == 'answer':
-            role = "assistant"
-            icon = "✅"
-            metadata = {"title": "✅ Final Answer"}
-        else:
-            role = "assistant"
-            icon = "ℹ️"
-            metadata = {"title": "ℹ️ Info"}
-        return {
-            "role": role,
-            "content": f"{icon} {content}",
-            "metadata": metadata
-        }
-    def get_status_from_thought(thought):
-        """Get status message from thought"""
-        if thought.type == 'planning':
-            return "🧠 Planning execution..."
-        elif thought.type == 'tool_call':
-            return f"🔧 Executing: {thought.tool_name or 'tool'}..."
-        elif thought.type == 'reflection':
-            return "💭 Analyzing results..."
-        elif thought.type == 'answer':
-            return "✅ Complete!"
-        else:
-            return "🤖 Processing..."
     def collect_output_files():
-        """Collect generated output files"""
         output_dir = Path("data/outputs")
-        if not output_dir.exists():
-            return None
-        # Get recent files (last 5 minutes)
-        recent_files = []
-        cutoff_time = time.time() - 300
-        for file_path in output_dir.glob("*"):
-            if file_path.is_file() and file_path.stat().st_mtime > cutoff_time:
-                recent_files.append(str(file_path))
-        return recent_files if recent_files else None
-    # Wire up events
     voice_file_upload.change(
         fn=handle_voice_file_upload,
         inputs=[voice_file_upload],
         outputs=[uploaded_files_list, uploaded_files_state]
     )
     execute_btn.click(
         fn=process_audio_command,
         inputs=[audio_input, text_input, uploaded_files_state],
         outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
     )
-    # Quick action buttons
-    gr.Markdown("### ⚡ Quick Actions")
-    with gr.Row():
-        quick_summarize = gr.Button("📝 Summarize All Documents", size="sm")
-        quick_calendar = gr.Button("📅 Extract & Create Events", size="sm")
-        quick_organize = gr.Button("🗂️ Organize Files", size="sm")
-        quick_search = gr.Button("🔍 Search Documents", size="sm")
-    async def quick_action(action_text, files_list):
-        """Execute quick action"""
-        async for update in process_audio_command(None, action_text, files_list):
-            yield update
-    quick_summarize.click(
-        fn=lambda f: quick_action("Summarize all my uploaded documents", f),
-        inputs=[uploaded_files_state],
-        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
-    )
-    quick_calendar.click(
-        fn=lambda f: quick_action("Extract all dates and deadlines from my documents and create calendar events", f),
-        inputs=[uploaded_files_state],
-        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
-    )
-    quick_organize.click(
-        fn=lambda f: quick_action("Organize all my files by type", f),
-        inputs=[uploaded_files_state],
-        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
-    )
-    quick_search.click(
-        fn=lambda f: quick_action("Search my documents for important information and summarize findings", f),
-        inputs=[uploaded_files_state],
-        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
-    )

 """
+Voice Agent UI - Autonomous voice-controlled agent (Gradio 6 Safe Version)
 """
 import gradio as gr
 def create_voice_agent_ui(agent):
+    """Create voice agent interface (error-free for Gradio 6)"""
     with gr.Row():
+        # --------------------------------------
+        # LEFT COLUMN — INPUTS
+        # --------------------------------------
         with gr.Column(scale=1):
             gr.Markdown("""
             ### 🎤 Voice Control
+            Speak your command or type it manually.
+            The agent will autonomously execute tasks using MCP tools.
             """)
+            # Audio input (microphone)
             audio_input = gr.Audio(
                 sources=["microphone"],
                 type="filepath",
+                label="🎙️ Speak Your Command"
             )
+            # Manual command input
             text_input = gr.Textbox(
+                label="⌨️ Or Type Command",
+                placeholder="Example: Extract deadlines from all PDFs",
                 lines=3
             )
             # Execute button
             execute_btn = gr.Button(
                 "🚀 Execute Command",
+                variant="primary"
             )
+            # Status
             status_box = gr.Textbox(
                 label="Status",
                 value="Ready",
                 interactive=False
             )
             gr.Markdown("---")
+            # File uploader
+            voice_file_upload = gr.Files(
+                label="📁 Upload Files for Agent",
                 file_count="multiple",
                 file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
             )
             uploaded_files_list = gr.Textbox(
                 label="Available Files",
+                placeholder="No files uploaded yet",
+                lines=4,
+                interactive=False
             )
+    # --------------------------------------
+    # RIGHT COLUMN — AGENT EXECUTION TRACE
+    # --------------------------------------
+    with gr.Row():
         with gr.Column(scale=2):
+            gr.Markdown("### 🤖 Agent Reasoning & Execution Trace")
+            # FIX: No "type=messages" (removed)
             thought_trace = gr.Chatbot(
                 label="Agent Reasoning",
+                height=400
             )
             final_response = gr.Textbox(
                 label="Final Response",
                 lines=6,
             )
             audio_output = gr.Audio(
                 label="Voice Response",
                 type="filepath",
                 autoplay=True
             )
             with gr.Accordion("📥 Generated Files", open=False):
+                outputs_files = gr.Files(
+                    label="Download Outputs",
                     file_count="multiple"
                 )
+    # STATE: store uploaded files
     uploaded_files_state = gr.State([])
+    # ---------------------------------------------------------
+    # FILE UPLOAD HANDLER
+    # ---------------------------------------------------------
     async def handle_voice_file_upload(files):
+        """Handle file uploads"""
         if not files:
             return "No files uploaded", []
+        file_paths = []
         file_info_text = []
+        from utils.file_utils import copy_file, get_file_info
         for file in files:
             dest_path = f"data/uploads/{Path(file.name).name}"
             copy_file(file.name, dest_path)
             info = get_file_info(dest_path)
+            file_paths.append(dest_path)
+            file_info_text.append(f"• {info['name']} ({info['size_mb']} MB)")
+            await agent.process_files_to_rag([
+                {"path": dest_path, "name": info['name']}
+            ])
+        return "\n".join(file_info_text), file_paths
+    # ---------------------------------------------------------
+    # MAIN COMMAND PROCESSOR
+    # ---------------------------------------------------------
     async def process_audio_command(audio_file, text_command, files_list):
+        """Process both voice + text commands"""
+        # Step 1 — Identify user command
         if audio_file and not text_command:
+            yield [], "🎤 Transcribing...", "", None, None
+            cmd = await speech_to_text(audio_file)
+            if not cmd:
+                yield [], "❌ Failed to transcribe", "", None, None
                 return
+            yield [], f"🎤 Transcribed: {cmd}", "", None, None
         elif text_command:
+            cmd = text_command
         else:
+            yield [], "⚠️ Provide voice or text", "", None, None
             return
+        # Step 2 — Show planning
+        yield [], "🧠 Planning...", "", None, None
+        thoughts_ui = []
+        # Step 3 — Stream agent thoughts
         try:
+            async for thought in agent.execute(cmd, files_list, stream_thoughts=True):
                 if thought:
+                    msg = {
+                        "role": "assistant",
+                        "content": f"🧠 {thought.content}"
+                    }
+                    thoughts_ui.append(msg)
+                    yield thoughts_ui, "🔧 Working...", "", None, None
+            # Step 4 — Final answer
+            final_answer, _ = await agent.execute(cmd, files_list, stream_thoughts=False)
+            yield thoughts_ui, "🔊 Generating voice...", final_answer, None, None
+            audio_file_path = await text_to_speech(final_answer)
+            files_generated = collect_output_files()
+            yield thoughts_ui, "✅ Complete!", final_answer, audio_file_path, files_generated
         except Exception as e:
+            err = f"❌ Error: {str(e)}"
+            yield thoughts_ui, err, err, None, None
+    # ---------------------------------------------------------
+    # COLLECT OUTPUT FILES
+    # ---------------------------------------------------------
     def collect_output_files():
         output_dir = Path("data/outputs")
+        if not output_dir.exists(): return None
+        cutoff = time.time() - 300
+        files = [
+            str(f) for f in output_dir.glob("*")
+            if f.is_file() and f.stat().st_mtime > cutoff
+        ]
+        return files or None
+    # ---------------------------------------------------------
+    # CONNECT EVENTS
+    # ---------------------------------------------------------
     voice_file_upload.change(
         fn=handle_voice_file_upload,
         inputs=[voice_file_upload],
         outputs=[uploaded_files_list, uploaded_files_state]
     )
     execute_btn.click(
         fn=process_audio_command,
         inputs=[audio_input, text_input, uploaded_files_state],
         outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
     )