Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

App Files Files Community

LifeAdmin-AI / ui /voice_agent_ui.py

Maheen001

Create ui/voice_agent_ui.py

02476c0 verified 13 days ago

raw

history blame

11.4 kB

	"""
	Voice Agent UI - Autonomous voice-controlled agent
	"""

	import gradio as gr
	import asyncio
	from pathlib import Path
	from utils.audio_utils import speech_to_text, text_to_speech
	import time


	def create_voice_agent_ui(agent):
	"""Create voice agent interface"""

	with gr.Row():
	# Left column - Voice control
	with gr.Column(scale=1):
	gr.Markdown("""
	### 🎤 Voice Control

	Click the microphone button and speak your command.
	The agent will autonomously execute your request.
	""")

	# Audio input
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Speak Your Command"
	)

	# Manual text input as fallback
	text_input = gr.Textbox(
	label="Or Type Your Command",
	placeholder="Example: Extract deadlines from my PDFs and create calendar events",
	lines=3
	)

	# Execute button
	execute_btn = gr.Button(
	"🚀 Execute Command",
	variant="primary",
	size="lg"
	)

	# Status indicator
	status_box = gr.Textbox(
	label="Status",
	value="Ready",
	interactive=False
	)

	gr.Markdown("---")

	# Upload files for agent to process
	voice_file_upload = gr.File(
	label="Upload Files for Agent",
	file_count="multiple",
	file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
	)

	uploaded_files_list = gr.Textbox(
	label="Available Files",
	placeholder="No files uploaded",
	interactive=False,
	lines=4
	)

	# Right column - Agent execution trace
	with gr.Column(scale=2):
	gr.Markdown("### 🤖 Agent Thoughts & Execution")

	# Chat-like interface for agent thoughts
	thought_trace = gr.Chatbot(
	label="Agent Reasoning",
	height=400,
	type="messages"
	)

	# Final response
	final_response = gr.Textbox(
	label="Final Response",
	lines=6,
	placeholder="Agent's final answer will appear here..."
	)

	# Audio output
	audio_output = gr.Audio(
	label="Voice Response",
	type="filepath",
	autoplay=True
	)

	# Download outputs
	with gr.Accordion("📥 Generated Files", open=False):
	outputs_files = gr.File(
	label="Download Generated Files",
	file_count="multiple"
	)

	# State variables
	uploaded_files_state = gr.State([])

	# Example commands
	with gr.Row():
	gr.Markdown("""
	### 💡 Example Commands

	Try these voice commands:
	- "Extract all deadlines from my PDFs and add them to my calendar"
	- "Summarize this document and send me a professional email summary"
	- "Organize my uploaded files by type"
	- "Find all documents mentioning invoices and extract amounts"
	- "Create a calendar event for tomorrow at 2 PM titled Team Meeting"
	- "Draft a friendly email to John about the project update"
	""")

	# Event handlers
	async def handle_voice_file_upload(files):
	"""Handle file uploads for voice agent"""
	if not files:
	return "No files uploaded", []

	file_list = []
	file_info_text = []

	for file in files:
	from utils.file_utils import copy_file, get_file_info

	dest_path = f"data/uploads/{Path(file.name).name}"
	copy_file(file.name, dest_path)

	info = get_file_info(dest_path)
	file_list.append(dest_path)
	file_info_text.append(f"✓ {info['name']} ({info['size_mb']} MB)")

	# Add to RAG
	await agent.process_files_to_rag([{'path': dest_path, 'name': info['name']}])

	return "\n".join(file_info_text), file_list

	async def process_audio_command(audio_file, text_command, files_list):
	"""Process voice or text command"""

	# Determine input
	if audio_file and not text_command:
	# Transcribe audio
	yield [], "🎤 Transcribing audio...", "", None, None
	command_text = await speech_to_text(audio_file)

	if not command_text:
	yield [], "❌ Failed to transcribe audio", "", None, None
	return

	yield [], f"✓ Transcribed: {command_text}", "", None, None
	await asyncio.sleep(0.5)

	elif text_command:
	command_text = text_command

	else:
	yield [], "⚠️ Please provide a voice or text command", "", None, None
	return

	# Update status
	yield [], f"🤖 Planning: {command_text}", "", None, None

	# Execute with agent
	thoughts_display = []
	final_answer = ""

	try:
	# Stream agent execution
	async for thought in agent.execute(command_text, files_list, stream_thoughts=True):
	if thought:
	# Format thought for display
	thought_msg = format_thought_message(thought)
	thoughts_display.append(thought_msg)

	# Update UI
	status = get_status_from_thought(thought)
	yield thoughts_display, status, "", None, None

	await asyncio.sleep(0.1) # Small delay for UI update

	# Get final answer
	final_answer, all_thoughts = await agent.execute(command_text, files_list, stream_thoughts=False)

	# Generate voice response
	yield thoughts_display, "🔊 Generating voice response...", final_answer, None, None

	if final_answer:
	audio_path = await text_to_speech(final_answer)

	# Collect generated files
	output_files = collect_output_files()

	yield thoughts_display, "✓ Complete!", final_answer, audio_path, output_files
	else:
	yield thoughts_display, "✓ Complete!", "Task executed successfully.", None, None

	except Exception as e:
	error_msg = f"❌ Error: {str(e)}"
	yield thoughts_display, error_msg, error_msg, None, None

	def format_thought_message(thought):
	"""Format thought as chat message"""
	thought_type = thought.type
	content = thought.content

	# Choose role and styling based on thought type
	if thought_type == 'planning':
	role = "assistant"
	icon = "🧠"
	metadata = {"title": "🧠 Planning"}
	elif thought_type == 'tool_call':
	role = "assistant"
	icon = "🔧"
	tool_name = thought.tool_name or "unknown"
	metadata = {"title": f"🔧 Using Tool: {tool_name}"}
	elif thought_type == 'reflection':
	role = "assistant"
	icon = "💭"
	metadata = {"title": "💭 Reflecting"}
	elif thought_type == 'answer':
	role = "assistant"
	icon = "✅"
	metadata = {"title": "✅ Final Answer"}
	else:
	role = "assistant"
	icon = "ℹ️"
	metadata = {"title": "ℹ️ Info"}

	return {
	"role": role,
	"content": f"{icon} {content}",
	"metadata": metadata
	}

	def get_status_from_thought(thought):
	"""Get status message from thought"""
	if thought.type == 'planning':
	return "🧠 Planning execution..."
	elif thought.type == 'tool_call':
	return f"🔧 Executing: {thought.tool_name or 'tool'}..."
	elif thought.type == 'reflection':
	return "💭 Analyzing results..."
	elif thought.type == 'answer':
	return "✅ Complete!"
	else:
	return "🤖 Processing..."

	def collect_output_files():
	"""Collect generated output files"""
	output_dir = Path("data/outputs")
	if not output_dir.exists():
	return None

	# Get recent files (last 5 minutes)
	recent_files = []
	cutoff_time = time.time() - 300

	for file_path in output_dir.glob("*"):
	if file_path.is_file() and file_path.stat().st_mtime > cutoff_time:
	recent_files.append(str(file_path))

	return recent_files if recent_files else None

	# Wire up events
	voice_file_upload.change(
	fn=handle_voice_file_upload,
	inputs=[voice_file_upload],
	outputs=[uploaded_files_list, uploaded_files_state]
	)

	execute_btn.click(
	fn=process_audio_command,
	inputs=[audio_input, text_input, uploaded_files_state],
	outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
	)

	# Quick action buttons
	gr.Markdown("### ⚡ Quick Actions")

	with gr.Row():
	quick_summarize = gr.Button("📝 Summarize All Documents", size="sm")
	quick_calendar = gr.Button("📅 Extract & Create Events", size="sm")
	quick_organize = gr.Button("🗂️ Organize Files", size="sm")
	quick_search = gr.Button("🔍 Search Documents", size="sm")

	async def quick_action(action_text, files_list):
	"""Execute quick action"""
	async for update in process_audio_command(None, action_text, files_list):
	yield update

	quick_summarize.click(
	fn=lambda f: quick_action("Summarize all my uploaded documents", f),
	inputs=[uploaded_files_state],
	outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
	)

	quick_calendar.click(
	fn=lambda f: quick_action("Extract all dates and deadlines from my documents and create calendar events", f),
	inputs=[uploaded_files_state],
	outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
	)

	quick_organize.click(
	fn=lambda f: quick_action("Organize all my files by type", f),
	inputs=[uploaded_files_state],
	outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
	)

	quick_search.click(
	fn=lambda f: quick_action("Search my documents for important information and summarize findings", f),
	inputs=[uploaded_files_state],
	outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
	)