LifeAdmin-AI / ui /voice_agent_ui.py
Maheen001's picture
Create ui/voice_agent_ui.py
02476c0 verified
raw
history blame
11.4 kB
"""
Voice Agent UI - Autonomous voice-controlled agent
"""
import gradio as gr
import asyncio
from pathlib import Path
from utils.audio_utils import speech_to_text, text_to_speech
import time
def create_voice_agent_ui(agent):
"""Create voice agent interface"""
with gr.Row():
# Left column - Voice control
with gr.Column(scale=1):
gr.Markdown("""
### 🎀 Voice Control
Click the microphone button and speak your command.
The agent will autonomously execute your request.
""")
# Audio input
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Speak Your Command"
)
# Manual text input as fallback
text_input = gr.Textbox(
label="Or Type Your Command",
placeholder="Example: Extract deadlines from my PDFs and create calendar events",
lines=3
)
# Execute button
execute_btn = gr.Button(
"πŸš€ Execute Command",
variant="primary",
size="lg"
)
# Status indicator
status_box = gr.Textbox(
label="Status",
value="Ready",
interactive=False
)
gr.Markdown("---")
# Upload files for agent to process
voice_file_upload = gr.File(
label="Upload Files for Agent",
file_count="multiple",
file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
)
uploaded_files_list = gr.Textbox(
label="Available Files",
placeholder="No files uploaded",
interactive=False,
lines=4
)
# Right column - Agent execution trace
with gr.Column(scale=2):
gr.Markdown("### πŸ€– Agent Thoughts & Execution")
# Chat-like interface for agent thoughts
thought_trace = gr.Chatbot(
label="Agent Reasoning",
height=400,
type="messages"
)
# Final response
final_response = gr.Textbox(
label="Final Response",
lines=6,
placeholder="Agent's final answer will appear here..."
)
# Audio output
audio_output = gr.Audio(
label="Voice Response",
type="filepath",
autoplay=True
)
# Download outputs
with gr.Accordion("πŸ“₯ Generated Files", open=False):
outputs_files = gr.File(
label="Download Generated Files",
file_count="multiple"
)
# State variables
uploaded_files_state = gr.State([])
# Example commands
with gr.Row():
gr.Markdown("""
### πŸ’‘ Example Commands
Try these voice commands:
- "Extract all deadlines from my PDFs and add them to my calendar"
- "Summarize this document and send me a professional email summary"
- "Organize my uploaded files by type"
- "Find all documents mentioning invoices and extract amounts"
- "Create a calendar event for tomorrow at 2 PM titled Team Meeting"
- "Draft a friendly email to John about the project update"
""")
# Event handlers
async def handle_voice_file_upload(files):
"""Handle file uploads for voice agent"""
if not files:
return "No files uploaded", []
file_list = []
file_info_text = []
for file in files:
from utils.file_utils import copy_file, get_file_info
dest_path = f"data/uploads/{Path(file.name).name}"
copy_file(file.name, dest_path)
info = get_file_info(dest_path)
file_list.append(dest_path)
file_info_text.append(f"βœ“ {info['name']} ({info['size_mb']} MB)")
# Add to RAG
await agent.process_files_to_rag([{'path': dest_path, 'name': info['name']}])
return "\n".join(file_info_text), file_list
async def process_audio_command(audio_file, text_command, files_list):
"""Process voice or text command"""
# Determine input
if audio_file and not text_command:
# Transcribe audio
yield [], "🎀 Transcribing audio...", "", None, None
command_text = await speech_to_text(audio_file)
if not command_text:
yield [], "❌ Failed to transcribe audio", "", None, None
return
yield [], f"βœ“ Transcribed: {command_text}", "", None, None
await asyncio.sleep(0.5)
elif text_command:
command_text = text_command
else:
yield [], "⚠️ Please provide a voice or text command", "", None, None
return
# Update status
yield [], f"πŸ€– Planning: {command_text}", "", None, None
# Execute with agent
thoughts_display = []
final_answer = ""
try:
# Stream agent execution
async for thought in agent.execute(command_text, files_list, stream_thoughts=True):
if thought:
# Format thought for display
thought_msg = format_thought_message(thought)
thoughts_display.append(thought_msg)
# Update UI
status = get_status_from_thought(thought)
yield thoughts_display, status, "", None, None
await asyncio.sleep(0.1) # Small delay for UI update
# Get final answer
final_answer, all_thoughts = await agent.execute(command_text, files_list, stream_thoughts=False)
# Generate voice response
yield thoughts_display, "πŸ”Š Generating voice response...", final_answer, None, None
if final_answer:
audio_path = await text_to_speech(final_answer)
# Collect generated files
output_files = collect_output_files()
yield thoughts_display, "βœ“ Complete!", final_answer, audio_path, output_files
else:
yield thoughts_display, "βœ“ Complete!", "Task executed successfully.", None, None
except Exception as e:
error_msg = f"❌ Error: {str(e)}"
yield thoughts_display, error_msg, error_msg, None, None
def format_thought_message(thought):
"""Format thought as chat message"""
thought_type = thought.type
content = thought.content
# Choose role and styling based on thought type
if thought_type == 'planning':
role = "assistant"
icon = "🧠"
metadata = {"title": "🧠 Planning"}
elif thought_type == 'tool_call':
role = "assistant"
icon = "πŸ”§"
tool_name = thought.tool_name or "unknown"
metadata = {"title": f"πŸ”§ Using Tool: {tool_name}"}
elif thought_type == 'reflection':
role = "assistant"
icon = "πŸ’­"
metadata = {"title": "πŸ’­ Reflecting"}
elif thought_type == 'answer':
role = "assistant"
icon = "βœ…"
metadata = {"title": "βœ… Final Answer"}
else:
role = "assistant"
icon = "ℹ️"
metadata = {"title": "ℹ️ Info"}
return {
"role": role,
"content": f"{icon} {content}",
"metadata": metadata
}
def get_status_from_thought(thought):
"""Get status message from thought"""
if thought.type == 'planning':
return "🧠 Planning execution..."
elif thought.type == 'tool_call':
return f"πŸ”§ Executing: {thought.tool_name or 'tool'}..."
elif thought.type == 'reflection':
return "πŸ’­ Analyzing results..."
elif thought.type == 'answer':
return "βœ… Complete!"
else:
return "πŸ€– Processing..."
def collect_output_files():
"""Collect generated output files"""
output_dir = Path("data/outputs")
if not output_dir.exists():
return None
# Get recent files (last 5 minutes)
recent_files = []
cutoff_time = time.time() - 300
for file_path in output_dir.glob("*"):
if file_path.is_file() and file_path.stat().st_mtime > cutoff_time:
recent_files.append(str(file_path))
return recent_files if recent_files else None
# Wire up events
voice_file_upload.change(
fn=handle_voice_file_upload,
inputs=[voice_file_upload],
outputs=[uploaded_files_list, uploaded_files_state]
)
execute_btn.click(
fn=process_audio_command,
inputs=[audio_input, text_input, uploaded_files_state],
outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
)
# Quick action buttons
gr.Markdown("### ⚑ Quick Actions")
with gr.Row():
quick_summarize = gr.Button("πŸ“ Summarize All Documents", size="sm")
quick_calendar = gr.Button("πŸ“… Extract & Create Events", size="sm")
quick_organize = gr.Button("πŸ—‚οΈ Organize Files", size="sm")
quick_search = gr.Button("πŸ” Search Documents", size="sm")
async def quick_action(action_text, files_list):
"""Execute quick action"""
async for update in process_audio_command(None, action_text, files_list):
yield update
quick_summarize.click(
fn=lambda f: quick_action("Summarize all my uploaded documents", f),
inputs=[uploaded_files_state],
outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
)
quick_calendar.click(
fn=lambda f: quick_action("Extract all dates and deadlines from my documents and create calendar events", f),
inputs=[uploaded_files_state],
outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
)
quick_organize.click(
fn=lambda f: quick_action("Organize all my files by type", f),
inputs=[uploaded_files_state],
outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
)
quick_search.click(
fn=lambda f: quick_action("Search my documents for important information and summarize findings", f),
inputs=[uploaded_files_state],
outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
)