Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

File size: 8,924 Bytes

02476c0
3163565
bead2fc
02476c0
 
 
 
 
c9b0c9c
0396ae3
c9b0c9c
02476c0
 
61fccc8
da51f27
02476c0
0396ae3
02476c0
0396ae3
 
 
3163565
 
 
 
 
 
 
 
 
 
0396ae3
 
61fccc8
0396ae3
 
 
 
 
da51f27
0396ae3
 
 
 
 
 
da51f27
0396ae3
 
 
3163565
 
0396ae3
c9b0c9c
0396ae3
 
3163565
 
0396ae3
 
da51f27
02476c0
da51f27
0396ae3
 
 
02476c0
 
 
da51f27
0396ae3
3163565
0396ae3
 
 
 
da51f27
3163565
02476c0
3163565
da51f27
bead2fc
0396ae3
3163565
5c77b23
0396ae3
da51f27
3163565
0396ae3
3163565
5c77b23
0396ae3
da51f27
3163565
0396ae3
3163565
0396ae3
 
 
da51f27
3163565
02476c0
0396ae3
 
 
 
da51f27
0396ae3
02476c0
da51f27
0396ae3
02476c0
0396ae3
02476c0
 
da51f27
 
0396ae3
 
 
61fccc8
0396ae3
 
61fccc8
 
 
0396ae3
 
 
 
 
61fccc8
c9b0c9c
0396ae3
 
 
 
 
 
398cd77
02476c0
bead2fc
0396ae3
 
 
61fccc8
3163565
0396ae3
 
 
3163565
02476c0
0396ae3
 
 
 
02476c0
3163565
02476c0
da51f27
0396ae3
3163565
da51f27
02476c0
0396ae3
 
 
ca3e20d
 
0396ae3
d6124d1
b9e63ff
61fccc8
0396ae3
 
 
 
 
 
 
 
 
 
 
 
 
3163565
0396ae3
 
 
 
3163565
0396ae3
 
3163565
0396ae3
 
3163565
0396ae3
 
3163565
0396ae3
ca3e20d
 
 
 
 
0396ae3
61fccc8
bead2fc
0396ae3
3163565
bead2fc
0396ae3
 
bead2fc
 
 
 
 
 
 
0396ae3
 
bead2fc
0396ae3
 
 
3163565
bead2fc
 
 
 
 
 
0396ae3
 
 
02476c0
3163565
 
 
ca3e20d
 
 
 
 
3163565
 
 
61fccc8
 
 
c9b0c9c
02476c0
61fccc8
02476c0
 
 
da51f27
02476c0
 
 
 
 
61fccc8
d6124d1

"""
Voice Agent UI - Autonomous voice-controlled agent
COMPLETE FIXED VERSION
"""

import gradio as gr
import asyncio
from pathlib import Path
from utils.audio_utils import speech_to_text, text_to_speech
import time


def create_voice_agent_ui(agent):
    """Create voice agent interface"""

    with gr.Row():
        # LEFT COLUMN — INPUTS
        with gr.Column(scale=1):
            gr.Markdown("""
            ### 🎤 Voice Control
            
            **How to use:**
            1. Upload files (optional)
            2. Speak OR type your command
            3. Click Execute
            4. Watch agent work!
            
            **Example commands:**
            - "Extract text from my PDF"
            - "Summarize this document"
            - "Organize my files"
            """)

            # Audio input
            audio_input = gr.Audio(
                sources=["microphone"],
                type="filepath",
                label="🎙️ Speak Your Command"
            )

            # Manual command input
            text_input = gr.Textbox(
                label="⌨️ Or Type Command",
                placeholder="Example: Extract deadlines from all PDFs",
                lines=3
            )

            # Execute button
            execute_btn = gr.Button(
                "🚀 Execute Command",
                variant="primary",
                size="lg"
            )

            # Status
            status_box = gr.Textbox(
                label="📊 Status",
                value="Ready to execute commands",
                interactive=False
            )

            gr.Markdown("---")

            # File uploader
            voice_file_upload = gr.Files(
                label="📁 Upload Files for Agent",
                file_count="multiple",
                file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
            )

            uploaded_files_list = gr.Textbox(
                label="📂 Available Files",
                placeholder="No files uploaded yet",
                lines=4,
                interactive=False
            )

        # RIGHT COLUMN — AGENT EXECUTION
        with gr.Column(scale=2):
            gr.Markdown("### 🤖 Agent Execution & Results")

            # Agent Reasoning Trace (Simple Chatbot - minimal parameters)
            thought_trace = gr.Chatbot(
                label="🧠 Agent Reasoning Steps",
                height=400
            )

            # Final Response
            final_response = gr.Textbox(
                label="✅ Final Answer",
                lines=6
            )

            # Voice Output
            audio_output = gr.Audio(
                label="🔊 Voice Response (if available)",
                type="filepath",
                autoplay=True
            )

            # Generated Files
            with gr.Accordion("📥 Generated Files", open=False):
                outputs_files = gr.Files(
                    label="Download Outputs",
                    file_count="multiple"
                )

    # STATE: store uploaded files
    uploaded_files_state = gr.State([])

    # FILE UPLOAD HANDLER
    async def handle_voice_file_upload(files):
        """Handle file uploads"""
        if not files:
            return "No files uploaded", []

        file_paths = []
        file_info_text = []

        from utils.file_utils import copy_file, get_file_info
        import os

        for file in files:
            filename = os.path.basename(file)
            dest_path = f"data/uploads/{filename}"
            copy_file(file, dest_path)

            info = get_file_info(dest_path)
            file_paths.append(dest_path)
            file_info_text.append(f"• {info['name']} ({info['size_mb']} MB)")

            # Add to RAG
            try:
                await agent.process_files_to_rag([{"path": dest_path, "name": info['name']}])
            except Exception:
                pass

        return "\n".join(file_info_text), file_paths

    # MAIN COMMAND PROCESSOR
    async def process_audio_command(audio_file, text_command, files_list):
        """Process voice + text commands - COMPLETE FIXED VERSION"""

        # Step 1 — Identify user command
        if audio_file and not text_command:
            # Transcribe
            yield [], "🎤 Transcribing audio...", "", None, None
            cmd = await speech_to_text(audio_file)

            if not cmd:
                yield [], "⚠️ Failed to transcribe audio", "", None, None
                return
            else:
                yield [], f"🎤 Transcribed: {cmd}", "", None, None
        elif text_command:
            cmd = text_command
        else:
            yield [], "⚠️ Please provide a voice or text command", "", None, None
            return

        # Show planning state
        yield [], "🧠 Agent is planning...", "", None, None

        try:
            # Call agent (non-streaming)
            final_answer, thoughts = await agent.execute(cmd, files_list)

            # Convert AgentThought objects to Gradio 6.0 format
            # MUST be list of dicts with "role" and "content" keys
            messages = []
            
            for i, t in enumerate(thoughts):
                # Handle both AgentThought objects and dicts
                if hasattr(t, "type"):
                    t_type = t.type
                    t_content = t.content
                    t_tool = getattr(t, "tool_name", None)
                elif isinstance(t, dict):
                    t_type = t.get("type", "info")
                    t_content = t.get("content", "")
                    t_tool = t.get("tool_name")
                else:
                    t_type = "info"
                    t_content = str(t)
                    t_tool = None

                # Format message with icon
                icon = "ℹ️"
                title = ""
                if t_type == "planning":
                    icon = "🧠"
                    title = "Planning"
                elif t_type == "tool_call":
                    icon = "🔧"
                    title = f"Tool: {t_tool}" if t_tool else "Tool Call"
                elif t_type == "reflection":
                    icon = "💭"
                    title = "Reflection"
                elif t_type == "answer":
                    icon = "✅"
                    title = "Answer"

                # Add as assistant message
                messages.append({
                    "role": "assistant",
                    "content": f"{icon} **{title}**\n\n{t_content}"
                })

            # Show results
            yield messages, "📊 Processing complete...", final_answer, None, None

            # TTS (optional - may fail if no API key)
            audio_path = None
            try:
                audio_path = await text_to_speech(final_answer)
                # Only yield audio if it's a valid file path, not a directory
                if audio_path and Path(audio_path).is_file():
                    yield messages, "✅ Complete!", final_answer, audio_path, None
                else:
                    audio_path = None
            except Exception as e:
                print(f"TTS Error (non-critical): {e}")
                audio_path = None

            # Collect recent outputs (only files, not directories)
            output_dir = Path("data/outputs")
            files_generated = []
            if output_dir.exists():
                cutoff = time.time() - 300  # Last 5 minutes
                try:
                    for f in output_dir.glob("*"):
                        if f.is_file() and f.stat().st_mtime > cutoff:
                            files_generated.append(str(f))
                except Exception:
                    pass

            yield messages, "✅ Complete!", final_answer, audio_path, files_generated

        except Exception as e:
            import traceback
            err_msg = f"⚠️ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
            
            # Error message in dictionary format
            error_messages = [{
                "role": "assistant",
                "content": f"❌ **Error**\n\n{str(e)}"
            }]
            yield error_messages, f"❌ Error: {str(e)}", err_msg, None, None

    # CONNECT EVENTS
    def handle_voice_file_upload_sync(files):
        """Sync wrapper for async function"""
        return asyncio.run(handle_voice_file_upload(files))

    voice_file_upload.change(
        fn=handle_voice_file_upload_sync,
        inputs=[voice_file_upload],
        outputs=[uploaded_files_list, uploaded_files_state]
    )

    execute_btn.click(
        fn=process_audio_command,
        inputs=[audio_input, text_input, uploaded_files_state],
        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
    )

    return gr.Column()