Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

App Files Files Community

Maheen001 commited on 11 days ago

Commit

3163565

verified ·

1 Parent(s): 398cd77

Update ui/voice_agent_ui.py

Browse files

Files changed (1) hide show

ui/voice_agent_ui.py +62 -39

ui/voice_agent_ui.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """
-Voice Agent UI - Autonomous voice-controlled agent (Gradio 6.0 compatible)
 """
 import gradio as gr
@@ -18,8 +19,16 @@ def create_voice_agent_ui(agent):
             gr.Markdown("""
             ### 🎤 Voice Control
-            Speak your command or type it manually.
-            The agent will autonomously execute tasks using MCP tools.
             """)
             # Audio input
@@ -39,13 +48,14 @@ def create_voice_agent_ui(agent):
             # Execute button
             execute_btn = gr.Button(
                 "🚀 Execute Command",
-                variant="primary"
             )
             # Status
             status_box = gr.Textbox(
-                label="Status",
-                value="Ready",
                 interactive=False
             )
@@ -59,33 +69,39 @@ def create_voice_agent_ui(agent):
             )
             uploaded_files_list = gr.Textbox(
-                label="Available Files",
                 placeholder="No files uploaded yet",
                 lines=4,
                 interactive=False
             )
-        # RIGHT COLUMN — AGENT EXECUTION TRACE
         with gr.Column(scale=2):
-            gr.Markdown("### 🤖 Agent Reasoning & Execution Trace")
-            # Chatbot (Gradio 6.0 - no type parameter)
             thought_trace = gr.Chatbot(
-                label="Agent Reasoning",
-                height=400
             )
             final_response = gr.Textbox(
-                label="Final Response",
                 lines=6,
             )
             audio_output = gr.Audio(
-                label="🔊 Voice Response",
                 type="filepath",
                 autoplay=True
             )
             with gr.Accordion("📥 Generated Files", open=False):
                 outputs_files = gr.Files(
                     label="Download Outputs",
@@ -126,33 +142,34 @@ def create_voice_agent_ui(agent):
     # MAIN COMMAND PROCESSOR
     async def process_audio_command(audio_file, text_command, files_list):
-        """Process voice + text commands"""
         # Step 1 — Identify user command
         if audio_file and not text_command:
             # Transcribe
-            yield [], "🎤 Transcribing...", "", None, None
             cmd = await speech_to_text(audio_file)
             if not cmd:
-                yield [], "⚠️ Failed to transcribe", "", None, None
                 return
             else:
                 yield [], f"🎤 Transcribed: {cmd}", "", None, None
         elif text_command:
             cmd = text_command
         else:
-            yield [], "⚠️ Provide voice or text command", "", None, None
             return
         # Show planning state
-        yield [], "🧠 Planning...", "", None, None
         try:
             # Call agent (non-streaming)
             final_answer, thoughts = await agent.execute(cmd, files_list)
-            # Convert AgentThought objects into Gradio 6.0 format (tuples)
             messages = []
             for t in thoughts:
                 # Handle both AgentThought objects and dicts
@@ -169,31 +186,32 @@ def create_voice_agent_ui(agent):
                     t_content = str(t)
                     t_tool = None
                 icon = "ℹ️"
                 title = ""
                 if t_type == "planning":
                     icon = "🧠"
-                    title = " Planning"
                 elif t_type == "tool_call":
                     icon = "🔧"
-                    title = f" Tool: {t_tool}" if t_tool else " Tool"
                 elif t_type == "reflection":
                     icon = "💭"
-                    title = " Reflection"
                 elif t_type == "answer":
                     icon = "✅"
-                    title = " Answer"
-                # Gradio 6.0 format: tuple (role, content)
-                messages.append((
-                    "assistant",
-                    f"{icon}{title} — {t_content}"
-                ))
             # Show results
-            yield messages, "🔊 Generating voice...", final_answer, None, None
-            # TTS
             try:
                 audio_path = await text_to_speech(final_answer)
             except Exception:
@@ -203,18 +221,23 @@ def create_voice_agent_ui(agent):
             output_dir = Path("data/outputs")
             files_generated = []
             if output_dir.exists():
-                cutoff = time.time() - 300
                 files_generated = [str(f) for f in output_dir.glob("*") if f.is_file() and f.stat().st_mtime > cutoff]
             yield messages, "✅ Complete!", final_answer, audio_path, files_generated
         except Exception as e:
-            err = f"⚠️ Error: {str(e)}"
-            # Error message in Gradio 6.0 format: tuple
-            error_messages = [("assistant", err)]
-            yield error_messages, err, err, None, None
-    # CONNECT EVENTS (using run_sync wrapper for async functions)
     def handle_voice_file_upload_sync(files):
         """Sync wrapper for async function"""
         return asyncio.run(handle_voice_file_upload(files))
@@ -231,4 +254,4 @@ def create_voice_agent_ui(agent):
         outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
     )
-    return gr.Column()  # Return a component to avoid Gradio warning

 """
+Voice Agent UI - Autonomous voice-controlled agent
+FIXED: Proper Gradio 6.0 messages format with type="messages"
 """
 import gradio as gr
             gr.Markdown("""
             ### 🎤 Voice Control
+            **How to use:**
+            1. Upload files (optional)
+            2. Speak OR type your command
+            3. Click Execute
+            4. Watch agent work!
+            **Example commands:**
+            - "Extract text from my PDF"
+            - "Summarize this document"
+            - "Organize my files"
             """)
             # Audio input
             # Execute button
             execute_btn = gr.Button(
                 "🚀 Execute Command",
+                variant="primary",
+                size="lg"
             )
             # Status
             status_box = gr.Textbox(
+                label="📊 Status",
+                value="Ready to execute commands",
                 interactive=False
             )
             )
             uploaded_files_list = gr.Textbox(
+                label="📂 Available Files",
                 placeholder="No files uploaded yet",
                 lines=4,
                 interactive=False
             )
+        # RIGHT COLUMN — AGENT EXECUTION
         with gr.Column(scale=2):
+            gr.Markdown("### 🤖 Agent Execution & Results")
+            # Agent Reasoning Trace (Chatbot with type="messages")
             thought_trace = gr.Chatbot(
+                label="🧠 Agent Reasoning Steps",
+                height=400,
+                type="messages",  # REQUIRED for dict format
+                show_copy_button=True
             )
+            # Final Response
             final_response = gr.Textbox(
+                label="✅ Final Answer",
                 lines=6,
+                show_copy_button=True
             )
+            # Voice Output
             audio_output = gr.Audio(
+                label="🔊 Voice Response (if available)",
                 type="filepath",
                 autoplay=True
             )
+            # Generated Files
             with gr.Accordion("📥 Generated Files", open=False):
                 outputs_files = gr.Files(
                     label="Download Outputs",
     # MAIN COMMAND PROCESSOR
     async def process_audio_command(audio_file, text_command, files_list):
+        """Process voice + text commands - FIXED for Gradio 6.0"""
         # Step 1 — Identify user command
         if audio_file and not text_command:
             # Transcribe
+            yield [], "🎤 Transcribing audio...", "", None, None
             cmd = await speech_to_text(audio_file)
             if not cmd:
+                yield [], "⚠️ Failed to transcribe audio", "", None, None
                 return
             else:
                 yield [], f"🎤 Transcribed: {cmd}", "", None, None
         elif text_command:
             cmd = text_command
         else:
+            yield [], "⚠️ Please provide a voice or text command", "", None, None
             return
         # Show planning state
+        yield [], "🧠 Agent is planning...", "", None, None
         try:
             # Call agent (non-streaming)
             final_answer, thoughts = await agent.execute(cmd, files_list)
+            # Convert AgentThought objects to Gradio 6.0 messages format
+            # MUST be dict with "role" and "content" keys
             messages = []
             for t in thoughts:
                 # Handle both AgentThought objects and dicts
                     t_content = str(t)
                     t_tool = None
+                # Format message with icon
                 icon = "ℹ️"
                 title = ""
                 if t_type == "planning":
                     icon = "🧠"
+                    title = "Planning"
                 elif t_type == "tool_call":
                     icon = "🔧"
+                    title = f"Tool: {t_tool}" if t_tool else "Tool Call"
                 elif t_type == "reflection":
                     icon = "💭"
+                    title = "Reflection"
                 elif t_type == "answer":
                     icon = "✅"
+                    title = "Answer"
+                # Gradio 6.0 format: dict with "role" and "content"
+                messages.append({
+                    "role": "assistant",
+                    "content": f"{icon} **{title}**\n\n{t_content}"
+                })
             # Show results
+            yield messages, "🔊 Generating voice response...", final_answer, None, None
+            # TTS (optional - may fail if no API key)
             try:
                 audio_path = await text_to_speech(final_answer)
             except Exception:
             output_dir = Path("data/outputs")
             files_generated = []
             if output_dir.exists():
+                cutoff = time.time() - 300  # Last 5 minutes
                 files_generated = [str(f) for f in output_dir.glob("*") if f.is_file() and f.stat().st_mtime > cutoff]
             yield messages, "✅ Complete!", final_answer, audio_path, files_generated
         except Exception as e:
+            import traceback
+            err_msg = f"⚠️ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
+            # Error message in Gradio 6.0 format
+            error_messages = [{
+                "role": "assistant",
+                "content": f"❌ **Error**\n\n{str(e)}"
+            }]
+            yield error_messages, f"❌ Error: {str(e)}", err_msg, None, None
+    # CONNECT EVENTS
     def handle_voice_file_upload_sync(files):
         """Sync wrapper for async function"""
         return asyncio.run(handle_voice_file_upload(files))
         outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
     )
+    return gr.Column()  # Return component to satisfy Gradio