LifeAdmin-AI / ui /voice_agent_ui.py
Maheen001's picture
Update ui/voice_agent_ui.py
bead2fc verified
raw
history blame
8.92 kB
"""
Voice Agent UI - Autonomous voice-controlled agent
COMPLETE FIXED VERSION
"""
import gradio as gr
import asyncio
from pathlib import Path
from utils.audio_utils import speech_to_text, text_to_speech
import time
def create_voice_agent_ui(agent):
"""Create voice agent interface"""
with gr.Row():
# LEFT COLUMN β€” INPUTS
with gr.Column(scale=1):
gr.Markdown("""
### 🎀 Voice Control
**How to use:**
1. Upload files (optional)
2. Speak OR type your command
3. Click Execute
4. Watch agent work!
**Example commands:**
- "Extract text from my PDF"
- "Summarize this document"
- "Organize my files"
""")
# Audio input
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="πŸŽ™οΈ Speak Your Command"
)
# Manual command input
text_input = gr.Textbox(
label="⌨️ Or Type Command",
placeholder="Example: Extract deadlines from all PDFs",
lines=3
)
# Execute button
execute_btn = gr.Button(
"πŸš€ Execute Command",
variant="primary",
size="lg"
)
# Status
status_box = gr.Textbox(
label="πŸ“Š Status",
value="Ready to execute commands",
interactive=False
)
gr.Markdown("---")
# File uploader
voice_file_upload = gr.Files(
label="πŸ“ Upload Files for Agent",
file_count="multiple",
file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
)
uploaded_files_list = gr.Textbox(
label="πŸ“‚ Available Files",
placeholder="No files uploaded yet",
lines=4,
interactive=False
)
# RIGHT COLUMN β€” AGENT EXECUTION
with gr.Column(scale=2):
gr.Markdown("### πŸ€– Agent Execution & Results")
# Agent Reasoning Trace (Simple Chatbot - minimal parameters)
thought_trace = gr.Chatbot(
label="🧠 Agent Reasoning Steps",
height=400
)
# Final Response
final_response = gr.Textbox(
label="βœ… Final Answer",
lines=6
)
# Voice Output
audio_output = gr.Audio(
label="πŸ”Š Voice Response (if available)",
type="filepath",
autoplay=True
)
# Generated Files
with gr.Accordion("πŸ“₯ Generated Files", open=False):
outputs_files = gr.Files(
label="Download Outputs",
file_count="multiple"
)
# STATE: store uploaded files
uploaded_files_state = gr.State([])
# FILE UPLOAD HANDLER
async def handle_voice_file_upload(files):
"""Handle file uploads"""
if not files:
return "No files uploaded", []
file_paths = []
file_info_text = []
from utils.file_utils import copy_file, get_file_info
import os
for file in files:
filename = os.path.basename(file)
dest_path = f"data/uploads/{filename}"
copy_file(file, dest_path)
info = get_file_info(dest_path)
file_paths.append(dest_path)
file_info_text.append(f"β€’ {info['name']} ({info['size_mb']} MB)")
# Add to RAG
try:
await agent.process_files_to_rag([{"path": dest_path, "name": info['name']}])
except Exception:
pass
return "\n".join(file_info_text), file_paths
# MAIN COMMAND PROCESSOR
async def process_audio_command(audio_file, text_command, files_list):
"""Process voice + text commands - COMPLETE FIXED VERSION"""
# Step 1 β€” Identify user command
if audio_file and not text_command:
# Transcribe
yield [], "🎀 Transcribing audio...", "", None, None
cmd = await speech_to_text(audio_file)
if not cmd:
yield [], "⚠️ Failed to transcribe audio", "", None, None
return
else:
yield [], f"🎀 Transcribed: {cmd}", "", None, None
elif text_command:
cmd = text_command
else:
yield [], "⚠️ Please provide a voice or text command", "", None, None
return
# Show planning state
yield [], "🧠 Agent is planning...", "", None, None
try:
# Call agent (non-streaming)
final_answer, thoughts = await agent.execute(cmd, files_list)
# Convert AgentThought objects to Gradio 6.0 format
# MUST be list of dicts with "role" and "content" keys
messages = []
for i, t in enumerate(thoughts):
# Handle both AgentThought objects and dicts
if hasattr(t, "type"):
t_type = t.type
t_content = t.content
t_tool = getattr(t, "tool_name", None)
elif isinstance(t, dict):
t_type = t.get("type", "info")
t_content = t.get("content", "")
t_tool = t.get("tool_name")
else:
t_type = "info"
t_content = str(t)
t_tool = None
# Format message with icon
icon = "ℹ️"
title = ""
if t_type == "planning":
icon = "🧠"
title = "Planning"
elif t_type == "tool_call":
icon = "πŸ”§"
title = f"Tool: {t_tool}" if t_tool else "Tool Call"
elif t_type == "reflection":
icon = "πŸ’­"
title = "Reflection"
elif t_type == "answer":
icon = "βœ…"
title = "Answer"
# Add as assistant message
messages.append({
"role": "assistant",
"content": f"{icon} **{title}**\n\n{t_content}"
})
# Show results
yield messages, "πŸ“Š Processing complete...", final_answer, None, None
# TTS (optional - may fail if no API key)
audio_path = None
try:
audio_path = await text_to_speech(final_answer)
# Only yield audio if it's a valid file path, not a directory
if audio_path and Path(audio_path).is_file():
yield messages, "βœ… Complete!", final_answer, audio_path, None
else:
audio_path = None
except Exception as e:
print(f"TTS Error (non-critical): {e}")
audio_path = None
# Collect recent outputs (only files, not directories)
output_dir = Path("data/outputs")
files_generated = []
if output_dir.exists():
cutoff = time.time() - 300 # Last 5 minutes
try:
for f in output_dir.glob("*"):
if f.is_file() and f.stat().st_mtime > cutoff:
files_generated.append(str(f))
except Exception:
pass
yield messages, "βœ… Complete!", final_answer, audio_path, files_generated
except Exception as e:
import traceback
err_msg = f"⚠️ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
# Error message in dictionary format
error_messages = [{
"role": "assistant",
"content": f"❌ **Error**\n\n{str(e)}"
}]
yield error_messages, f"❌ Error: {str(e)}", err_msg, None, None
# CONNECT EVENTS
def handle_voice_file_upload_sync(files):
"""Sync wrapper for async function"""
return asyncio.run(handle_voice_file_upload(files))
voice_file_upload.change(
fn=handle_voice_file_upload_sync,
inputs=[voice_file_upload],
outputs=[uploaded_files_list, uploaded_files_state]
)
execute_btn.click(
fn=process_audio_command,
inputs=[audio_input, text_input, uploaded_files_state],
outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
)
return gr.Column()