Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

App Files Files Community

LifeAdmin-AI / ui /voice_agent_ui.py

Maheen001

Update ui/voice_agent_ui.py

bead2fc verified 7 days ago

raw

history blame

8.92 kB

	"""
	Voice Agent UI - Autonomous voice-controlled agent
	COMPLETE FIXED VERSION
	"""

	import gradio as gr
	import asyncio
	from pathlib import Path
	from utils.audio_utils import speech_to_text, text_to_speech
	import time


	def create_voice_agent_ui(agent):
	"""Create voice agent interface"""

	with gr.Row():
	# LEFT COLUMN — INPUTS
	with gr.Column(scale=1):
	gr.Markdown("""
	### 🎤 Voice Control

	How to use:
	1. Upload files (optional)
	2. Speak OR type your command
	3. Click Execute
	4. Watch agent work!

	Example commands:
	- "Extract text from my PDF"
	- "Summarize this document"
	- "Organize my files"
	""")

	# Audio input
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="🎙️ Speak Your Command"
	)

	# Manual command input
	text_input = gr.Textbox(
	label="⌨️ Or Type Command",
	placeholder="Example: Extract deadlines from all PDFs",
	lines=3
	)

	# Execute button
	execute_btn = gr.Button(
	"🚀 Execute Command",
	variant="primary",
	size="lg"
	)

	# Status
	status_box = gr.Textbox(
	label="📊 Status",
	value="Ready to execute commands",
	interactive=False
	)

	gr.Markdown("---")

	# File uploader
	voice_file_upload = gr.Files(
	label="📁 Upload Files for Agent",
	file_count="multiple",
	file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
	)

	uploaded_files_list = gr.Textbox(
	label="📂 Available Files",
	placeholder="No files uploaded yet",
	lines=4,
	interactive=False
	)

	# RIGHT COLUMN — AGENT EXECUTION
	with gr.Column(scale=2):
	gr.Markdown("### 🤖 Agent Execution & Results")

	# Agent Reasoning Trace (Simple Chatbot - minimal parameters)
	thought_trace = gr.Chatbot(
	label="🧠 Agent Reasoning Steps",
	height=400
	)

	# Final Response
	final_response = gr.Textbox(
	label="✅ Final Answer",
	lines=6
	)

	# Voice Output
	audio_output = gr.Audio(
	label="🔊 Voice Response (if available)",
	type="filepath",
	autoplay=True
	)

	# Generated Files
	with gr.Accordion("📥 Generated Files", open=False):
	outputs_files = gr.Files(
	label="Download Outputs",
	file_count="multiple"
	)

	# STATE: store uploaded files
	uploaded_files_state = gr.State([])

	# FILE UPLOAD HANDLER
	async def handle_voice_file_upload(files):
	"""Handle file uploads"""
	if not files:
	return "No files uploaded", []

	file_paths = []
	file_info_text = []

	from utils.file_utils import copy_file, get_file_info
	import os

	for file in files:
	filename = os.path.basename(file)
	dest_path = f"data/uploads/{filename}"
	copy_file(file, dest_path)

	info = get_file_info(dest_path)
	file_paths.append(dest_path)
	file_info_text.append(f"• {info['name']} ({info['size_mb']} MB)")

	# Add to RAG
	try:
	await agent.process_files_to_rag([{"path": dest_path, "name": info['name']}])
	except Exception:
	pass

	return "\n".join(file_info_text), file_paths

	# MAIN COMMAND PROCESSOR
	async def process_audio_command(audio_file, text_command, files_list):
	"""Process voice + text commands - COMPLETE FIXED VERSION"""

	# Step 1 — Identify user command
	if audio_file and not text_command:
	# Transcribe
	yield [], "🎤 Transcribing audio...", "", None, None
	cmd = await speech_to_text(audio_file)

	if not cmd:
	yield [], "⚠️ Failed to transcribe audio", "", None, None
	return
	else:
	yield [], f"🎤 Transcribed: {cmd}", "", None, None
	elif text_command:
	cmd = text_command
	else:
	yield [], "⚠️ Please provide a voice or text command", "", None, None
	return

	# Show planning state
	yield [], "🧠 Agent is planning...", "", None, None

	try:
	# Call agent (non-streaming)
	final_answer, thoughts = await agent.execute(cmd, files_list)

	# Convert AgentThought objects to Gradio 6.0 format
	# MUST be list of dicts with "role" and "content" keys
	messages = []

	for i, t in enumerate(thoughts):
	# Handle both AgentThought objects and dicts
	if hasattr(t, "type"):
	t_type = t.type
	t_content = t.content
	t_tool = getattr(t, "tool_name", None)
	elif isinstance(t, dict):
	t_type = t.get("type", "info")
	t_content = t.get("content", "")
	t_tool = t.get("tool_name")
	else:
	t_type = "info"
	t_content = str(t)
	t_tool = None

	# Format message with icon
	icon = "ℹ️"
	title = ""
	if t_type == "planning":
	icon = "🧠"
	title = "Planning"
	elif t_type == "tool_call":
	icon = "🔧"
	title = f"Tool: {t_tool}" if t_tool else "Tool Call"
	elif t_type == "reflection":
	icon = "💭"
	title = "Reflection"
	elif t_type == "answer":
	icon = "✅"
	title = "Answer"

	# Add as assistant message
	messages.append({
	"role": "assistant",
	"content": f"{icon} {title}\n\n{t_content}"
	})

	# Show results
	yield messages, "📊 Processing complete...", final_answer, None, None

	# TTS (optional - may fail if no API key)
	audio_path = None
	try:
	audio_path = await text_to_speech(final_answer)
	# Only yield audio if it's a valid file path, not a directory
	if audio_path and Path(audio_path).is_file():
	yield messages, "✅ Complete!", final_answer, audio_path, None
	else:
	audio_path = None
	except Exception as e:
	print(f"TTS Error (non-critical): {e}")
	audio_path = None

	# Collect recent outputs (only files, not directories)
	output_dir = Path("data/outputs")
	files_generated = []
	if output_dir.exists():
	cutoff = time.time() - 300 # Last 5 minutes
	try:
	for f in output_dir.glob("*"):
	if f.is_file() and f.stat().st_mtime > cutoff:
	files_generated.append(str(f))
	except Exception:
	pass

	yield messages, "✅ Complete!", final_answer, audio_path, files_generated

	except Exception as e:
	import traceback
	err_msg = f"⚠️ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"

	# Error message in dictionary format
	error_messages = [{
	"role": "assistant",
	"content": f"❌ Error\n\n{str(e)}"
	}]
	yield error_messages, f"❌ Error: {str(e)}", err_msg, None, None

	# CONNECT EVENTS
	def handle_voice_file_upload_sync(files):
	"""Sync wrapper for async function"""
	return asyncio.run(handle_voice_file_upload(files))

	voice_file_upload.change(
	fn=handle_voice_file_upload_sync,
	inputs=[voice_file_upload],
	outputs=[uploaded_files_list, uploaded_files_state]
	)

	execute_btn.click(
	fn=process_audio_command,
	inputs=[audio_input, text_input, uploaded_files_state],
	outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
	)

	return gr.Column()