Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

App Files Files Community

Maheen001 commited on 10 days ago

Commit

61fccc8

verified ·

1 Parent(s): 72aa415

Update ui/voice_agent_ui.py

Browse files

Files changed (1) hide show

ui/voice_agent_ui.py +42 -38

ui/voice_agent_ui.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """
 Voice Agent UI - Autonomous voice-controlled agent (Gradio-compatible)
-This version calls the non-streaming agent.execute(...) and converts AgentThought
-objects into Chatbot messages for display.
 """
 import gradio as gr
@@ -12,12 +10,10 @@ import time
 def create_voice_agent_ui(agent):
-    """Create voice agent interface (error-free for Gradio 6)"""
     with gr.Row():
-        # --------------------------------------
         # LEFT COLUMN — INPUTS
-        # --------------------------------------
         with gr.Column(scale=1):
             gr.Markdown("""
             ### 🎤 Voice Control
@@ -26,7 +22,7 @@ def create_voice_agent_ui(agent):
             The agent will autonomously execute tasks using MCP tools.
             """)
-            # Audio input (microphone)
             audio_input = gr.Audio(
                 sources=["microphone"],
                 type="filepath",
@@ -69,17 +65,15 @@ def create_voice_agent_ui(agent):
                 interactive=False
             )
-    # --------------------------------------
-    # RIGHT COLUMN — AGENT EXECUTION TRACE
-    # --------------------------------------
-    with gr.Row():
         with gr.Column(scale=2):
             gr.Markdown("### 🤖 Agent Reasoning & Execution Trace")
-            # Chatbot
             thought_trace = gr.Chatbot(
                 label="Agent Reasoning",
-                height=400
             )
             final_response = gr.Textbox(
@@ -88,7 +82,7 @@ def create_voice_agent_ui(agent):
             )
             audio_output = gr.Audio(
-                label="Voice Response",
                 type="filepath",
                 autoplay=True
             )
@@ -102,9 +96,7 @@ def create_voice_agent_ui(agent):
     # STATE: store uploaded files
     uploaded_files_state = gr.State([])
-    # ---------------------------------------------------------
     # FILE UPLOAD HANDLER
-    # ---------------------------------------------------------
     async def handle_voice_file_upload(files):
         """Handle file uploads"""
         if not files:
@@ -114,16 +106,18 @@ def create_voice_agent_ui(agent):
         file_info_text = []
         from utils.file_utils import copy_file, get_file_info
         for file in files:
-            dest_path = f"data/uploads/{Path(file.name).name}"
-            copy_file(file.name, dest_path)
             info = get_file_info(dest_path)
             file_paths.append(dest_path)
             file_info_text.append(f"• {info['name']} ({info['size_mb']} MB)")
-            # Add to RAG (best-effort)
             try:
                 await agent.process_files_to_rag([{"path": dest_path, "name": info['name']}])
             except Exception:
@@ -131,28 +125,25 @@ def create_voice_agent_ui(agent):
         return "\n".join(file_info_text), file_paths
-    # ---------------------------------------------------------
-    # MAIN COMMAND PROCESSOR (non-streaming agent)
-    # ---------------------------------------------------------
     async def process_audio_command(audio_file, text_command, files_list):
-        """Process both voice + text commands"""
         # Step 1 — Identify user command
         if audio_file and not text_command:
-            # transcribe
-            status_msg = "🎤 Transcribing..."
-            yield [], status_msg, "", None, None
             cmd = await speech_to_text(audio_file)
             if not cmd:
-                yield [], "❌ Failed to transcribe", "", None, None
                 return
             else:
                 yield [], f"🎤 Transcribed: {cmd}", "", None, None
         elif text_command:
             cmd = text_command
         else:
-            yield [], "⚠️ Provide voice or text", "", None, None
             return
         # Show planning state
@@ -162,10 +153,10 @@ def create_voice_agent_ui(agent):
             # Call agent (non-streaming)
             final_answer, thoughts = await agent.execute(cmd, files_list)
-            # Convert AgentThought objects into chatbot messages (role, content)
             messages = []
             for t in thoughts:
-                # t might be AgentThought instance or dict (if agent serialized); handle both
                 if hasattr(t, "type"):
                     t_type = t.type
                     t_content = t.content
@@ -194,9 +185,13 @@ def create_voice_agent_ui(agent):
                     icon = "✅"
                     title = " Answer"
-                messages.append(( "assistant", f"{icon}{title} — {t_content}" ))
-            # show results
             yield messages, "🔊 Generating voice...", final_answer, None, None
             # TTS
@@ -205,7 +200,7 @@ def create_voice_agent_ui(agent):
             except Exception:
                 audio_path = None
-            # collect recent outputs
             output_dir = Path("data/outputs")
             files_generated = []
             if output_dir.exists():
@@ -215,14 +210,21 @@ def create_voice_agent_ui(agent):
             yield messages, "✅ Complete!", final_answer, audio_path, files_generated
         except Exception as e:
-            err = f"❌ Error: {str(e)}"
-            yield [], err, err, None, None
-    # ---------------------------------------------------------
-    # CONNECT EVENTS
-    # ---------------------------------------------------------
     voice_file_upload.change(
-        fn=handle_voice_file_upload,
         inputs=[voice_file_upload],
         outputs=[uploaded_files_list, uploaded_files_state]
     )
@@ -232,3 +234,5 @@ def create_voice_agent_ui(agent):
         inputs=[audio_input, text_input, uploaded_files_state],
         outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
     )

 """
 Voice Agent UI - Autonomous voice-controlled agent (Gradio-compatible)
 """
 import gradio as gr
 def create_voice_agent_ui(agent):
+    """Create voice agent interface"""
     with gr.Row():
         # LEFT COLUMN — INPUTS
         with gr.Column(scale=1):
             gr.Markdown("""
             ### 🎤 Voice Control
             The agent will autonomously execute tasks using MCP tools.
             """)
+            # Audio input
             audio_input = gr.Audio(
                 sources=["microphone"],
                 type="filepath",
                 interactive=False
             )
+        # RIGHT COLUMN — AGENT EXECUTION TRACE
         with gr.Column(scale=2):
             gr.Markdown("### 🤖 Agent Reasoning & Execution Trace")
+            # Chatbot (FIXED FORMAT)
             thought_trace = gr.Chatbot(
                 label="Agent Reasoning",
+                height=400,
+                type="messages"  # Use messages format
             )
             final_response = gr.Textbox(
             )
             audio_output = gr.Audio(
+                label="🔊 Voice Response",
                 type="filepath",
                 autoplay=True
             )
     # STATE: store uploaded files
     uploaded_files_state = gr.State([])
     # FILE UPLOAD HANDLER
     async def handle_voice_file_upload(files):
         """Handle file uploads"""
         if not files:
         file_info_text = []
         from utils.file_utils import copy_file, get_file_info
+        import os
         for file in files:
+            filename = os.path.basename(file)
+            dest_path = f"data/uploads/{filename}"
+            copy_file(file, dest_path)
             info = get_file_info(dest_path)
             file_paths.append(dest_path)
             file_info_text.append(f"• {info['name']} ({info['size_mb']} MB)")
+            # Add to RAG
             try:
                 await agent.process_files_to_rag([{"path": dest_path, "name": info['name']}])
             except Exception:
         return "\n".join(file_info_text), file_paths
+    # MAIN COMMAND PROCESSOR (FIXED FORMAT)
     async def process_audio_command(audio_file, text_command, files_list):
+        """Process voice + text commands"""
         # Step 1 — Identify user command
         if audio_file and not text_command:
+            # Transcribe
+            yield [], "🎤 Transcribing...", "", None, None
             cmd = await speech_to_text(audio_file)
             if not cmd:
+                yield [], "⚠️ Failed to transcribe", "", None, None
                 return
             else:
                 yield [], f"🎤 Transcribed: {cmd}", "", None, None
         elif text_command:
             cmd = text_command
         else:
+            yield [], "⚠️ Provide voice or text command", "", None, None
             return
         # Show planning state
             # Call agent (non-streaming)
             final_answer, thoughts = await agent.execute(cmd, files_list)
+            # Convert AgentThought objects into CORRECT chatbot messages format
             messages = []
             for t in thoughts:
+                # Handle both AgentThought objects and dicts
                 if hasattr(t, "type"):
                     t_type = t.type
                     t_content = t.content
                     icon = "✅"
                     title = " Answer"
+                # CORRECT FORMAT: dict with 'role' and 'content'
+                messages.append({
+                    "role": "assistant",
+                    "content": f"{icon}{title} — {t_content}"
+                })
+            # Show results
             yield messages, "🔊 Generating voice...", final_answer, None, None
             # TTS
             except Exception:
                 audio_path = None
+            # Collect recent outputs
             output_dir = Path("data/outputs")
             files_generated = []
             if output_dir.exists():
             yield messages, "✅ Complete!", final_answer, audio_path, files_generated
         except Exception as e:
+            err = f"⚠️ Error: {str(e)}"
+            # Error message in correct format
+            error_messages = [{
+                "role": "assistant",
+                "content": err
+            }]
+            yield error_messages, err, err, None, None
+    # CONNECT EVENTS (using run_sync wrapper for async functions)
+    def handle_voice_file_upload_sync(files):
+        """Sync wrapper for async function"""
+        return asyncio.run(handle_voice_file_upload(files))
     voice_file_upload.change(
+        fn=handle_voice_file_upload_sync,
         inputs=[voice_file_upload],
         outputs=[uploaded_files_list, uploaded_files_state]
     )
         inputs=[audio_input, text_input, uploaded_files_state],
         outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
     )
+    return ui