Spaces:
Running
Running
| """ | |
| Voice Agent UI - Autonomous voice-controlled agent | |
| COMPLETE FIXED VERSION | |
| """ | |
| import gradio as gr | |
| import asyncio | |
| from pathlib import Path | |
| from utils.audio_utils import speech_to_text, text_to_speech | |
| import time | |
| def create_voice_agent_ui(agent): | |
| """Create voice agent interface""" | |
| with gr.Row(): | |
| # LEFT COLUMN β INPUTS | |
| with gr.Column(scale=1): | |
| gr.Markdown(""" | |
| ### π€ Voice Control | |
| **How to use:** | |
| 1. Upload files (optional) | |
| 2. Speak OR type your command | |
| 3. Click Execute | |
| 4. Watch agent work! | |
| **Example commands:** | |
| - "Extract text from my PDF" | |
| - "Summarize this document" | |
| - "Organize my files" | |
| """) | |
| # Audio input | |
| audio_input = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="ποΈ Speak Your Command" | |
| ) | |
| # Manual command input | |
| text_input = gr.Textbox( | |
| label="β¨οΈ Or Type Command", | |
| placeholder="Example: Extract deadlines from all PDFs", | |
| lines=3 | |
| ) | |
| # Execute button | |
| execute_btn = gr.Button( | |
| "π Execute Command", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| # Status | |
| status_box = gr.Textbox( | |
| label="π Status", | |
| value="Ready to execute commands", | |
| interactive=False | |
| ) | |
| gr.Markdown("---") | |
| # File uploader | |
| voice_file_upload = gr.Files( | |
| label="π Upload Files for Agent", | |
| file_count="multiple", | |
| file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"] | |
| ) | |
| uploaded_files_list = gr.Textbox( | |
| label="π Available Files", | |
| placeholder="No files uploaded yet", | |
| lines=4, | |
| interactive=False | |
| ) | |
| # RIGHT COLUMN β AGENT EXECUTION | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π€ Agent Execution & Results") | |
| # Agent Reasoning Trace (Simple Chatbot - minimal parameters) | |
| thought_trace = gr.Chatbot( | |
| label="π§ Agent Reasoning Steps", | |
| height=400 | |
| ) | |
| # Final Response | |
| final_response = gr.Textbox( | |
| label="β Final Answer", | |
| lines=6 | |
| ) | |
| # Voice Output | |
| audio_output = gr.Audio( | |
| label="π Voice Response (if available)", | |
| type="filepath", | |
| autoplay=True | |
| ) | |
| # Generated Files | |
| with gr.Accordion("π₯ Generated Files", open=False): | |
| outputs_files = gr.Files( | |
| label="Download Outputs", | |
| file_count="multiple" | |
| ) | |
| # STATE: store uploaded files | |
| uploaded_files_state = gr.State([]) | |
| # FILE UPLOAD HANDLER | |
| async def handle_voice_file_upload(files): | |
| """Handle file uploads""" | |
| if not files: | |
| return "No files uploaded", [] | |
| file_paths = [] | |
| file_info_text = [] | |
| from utils.file_utils import copy_file, get_file_info | |
| import os | |
| for file in files: | |
| filename = os.path.basename(file) | |
| dest_path = f"data/uploads/{filename}" | |
| copy_file(file, dest_path) | |
| info = get_file_info(dest_path) | |
| file_paths.append(dest_path) | |
| file_info_text.append(f"β’ {info['name']} ({info['size_mb']} MB)") | |
| # Add to RAG | |
| try: | |
| await agent.process_files_to_rag([{"path": dest_path, "name": info['name']}]) | |
| except Exception: | |
| pass | |
| return "\n".join(file_info_text), file_paths | |
| # MAIN COMMAND PROCESSOR | |
| async def process_audio_command(audio_file, text_command, files_list): | |
| """Process voice + text commands - COMPLETE FIXED VERSION""" | |
| # Step 1 β Identify user command | |
| if audio_file and not text_command: | |
| # Transcribe | |
| yield [], "π€ Transcribing audio...", "", None, None | |
| cmd = await speech_to_text(audio_file) | |
| if not cmd: | |
| yield [], "β οΈ Failed to transcribe audio", "", None, None | |
| return | |
| else: | |
| yield [], f"π€ Transcribed: {cmd}", "", None, None | |
| elif text_command: | |
| cmd = text_command | |
| else: | |
| yield [], "β οΈ Please provide a voice or text command", "", None, None | |
| return | |
| # Show planning state | |
| yield [], "π§ Agent is planning...", "", None, None | |
| try: | |
| # Call agent (non-streaming) | |
| final_answer, thoughts = await agent.execute(cmd, files_list) | |
| # Convert AgentThought objects to Gradio 6.0 format | |
| # MUST be list of dicts with "role" and "content" keys | |
| messages = [] | |
| for i, t in enumerate(thoughts): | |
| # Handle both AgentThought objects and dicts | |
| if hasattr(t, "type"): | |
| t_type = t.type | |
| t_content = t.content | |
| t_tool = getattr(t, "tool_name", None) | |
| elif isinstance(t, dict): | |
| t_type = t.get("type", "info") | |
| t_content = t.get("content", "") | |
| t_tool = t.get("tool_name") | |
| else: | |
| t_type = "info" | |
| t_content = str(t) | |
| t_tool = None | |
| # Format message with icon | |
| icon = "βΉοΈ" | |
| title = "" | |
| if t_type == "planning": | |
| icon = "π§ " | |
| title = "Planning" | |
| elif t_type == "tool_call": | |
| icon = "π§" | |
| title = f"Tool: {t_tool}" if t_tool else "Tool Call" | |
| elif t_type == "reflection": | |
| icon = "π" | |
| title = "Reflection" | |
| elif t_type == "answer": | |
| icon = "β " | |
| title = "Answer" | |
| # Add as assistant message | |
| messages.append({ | |
| "role": "assistant", | |
| "content": f"{icon} **{title}**\n\n{t_content}" | |
| }) | |
| # Show results | |
| yield messages, "π Processing complete...", final_answer, None, None | |
| # TTS (optional - may fail if no API key) | |
| audio_path = None | |
| try: | |
| audio_path = await text_to_speech(final_answer) | |
| # Only yield audio if it's a valid file path, not a directory | |
| if audio_path and Path(audio_path).is_file(): | |
| yield messages, "β Complete!", final_answer, audio_path, None | |
| else: | |
| audio_path = None | |
| except Exception as e: | |
| print(f"TTS Error (non-critical): {e}") | |
| audio_path = None | |
| # Collect recent outputs (only files, not directories) | |
| output_dir = Path("data/outputs") | |
| files_generated = [] | |
| if output_dir.exists(): | |
| cutoff = time.time() - 300 # Last 5 minutes | |
| try: | |
| for f in output_dir.glob("*"): | |
| if f.is_file() and f.stat().st_mtime > cutoff: | |
| files_generated.append(str(f)) | |
| except Exception: | |
| pass | |
| yield messages, "β Complete!", final_answer, audio_path, files_generated | |
| except Exception as e: | |
| import traceback | |
| err_msg = f"β οΈ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}" | |
| # Error message in dictionary format | |
| error_messages = [{ | |
| "role": "assistant", | |
| "content": f"β **Error**\n\n{str(e)}" | |
| }] | |
| yield error_messages, f"β Error: {str(e)}", err_msg, None, None | |
| # CONNECT EVENTS | |
| def handle_voice_file_upload_sync(files): | |
| """Sync wrapper for async function""" | |
| return asyncio.run(handle_voice_file_upload(files)) | |
| voice_file_upload.change( | |
| fn=handle_voice_file_upload_sync, | |
| inputs=[voice_file_upload], | |
| outputs=[uploaded_files_list, uploaded_files_state] | |
| ) | |
| execute_btn.click( | |
| fn=process_audio_command, | |
| inputs=[audio_input, text_input, uploaded_files_state], | |
| outputs=[thought_trace, status_box, final_response, audio_output, outputs_files] | |
| ) | |
| return gr.Column() |