File size: 8,924 Bytes
02476c0
3163565
bead2fc
02476c0
 
 
 
 
c9b0c9c
0396ae3
c9b0c9c
02476c0
 
61fccc8
da51f27
02476c0
0396ae3
02476c0
0396ae3
 
 
3163565
 
 
 
 
 
 
 
 
 
0396ae3
 
61fccc8
0396ae3
 
 
 
 
da51f27
0396ae3
 
 
 
 
 
da51f27
0396ae3
 
 
3163565
 
0396ae3
c9b0c9c
0396ae3
 
3163565
 
0396ae3
 
da51f27
02476c0
da51f27
0396ae3
 
 
02476c0
 
 
da51f27
0396ae3
3163565
0396ae3
 
 
 
da51f27
3163565
02476c0
3163565
da51f27
bead2fc
0396ae3
3163565
5c77b23
0396ae3
da51f27
3163565
0396ae3
3163565
5c77b23
0396ae3
da51f27
3163565
0396ae3
3163565
0396ae3
 
 
da51f27
3163565
02476c0
0396ae3
 
 
 
da51f27
0396ae3
02476c0
da51f27
0396ae3
02476c0
0396ae3
02476c0
 
da51f27
 
0396ae3
 
 
61fccc8
0396ae3
 
61fccc8
 
 
0396ae3
 
 
 
 
61fccc8
c9b0c9c
0396ae3
 
 
 
 
 
398cd77
02476c0
bead2fc
0396ae3
 
 
61fccc8
3163565
0396ae3
 
 
3163565
02476c0
0396ae3
 
 
 
02476c0
3163565
02476c0
da51f27
0396ae3
3163565
da51f27
02476c0
0396ae3
 
 
ca3e20d
 
0396ae3
d6124d1
b9e63ff
61fccc8
0396ae3
 
 
 
 
 
 
 
 
 
 
 
 
3163565
0396ae3
 
 
 
3163565
0396ae3
 
3163565
0396ae3
 
3163565
0396ae3
 
3163565
0396ae3
ca3e20d
 
 
 
 
0396ae3
61fccc8
bead2fc
0396ae3
3163565
bead2fc
0396ae3
 
bead2fc
 
 
 
 
 
 
0396ae3
 
bead2fc
0396ae3
 
 
3163565
bead2fc
 
 
 
 
 
0396ae3
 
 
02476c0
3163565
 
 
ca3e20d
 
 
 
 
3163565
 
 
61fccc8
 
 
c9b0c9c
02476c0
61fccc8
02476c0
 
 
da51f27
02476c0
 
 
 
 
61fccc8
d6124d1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
"""
Voice Agent UI - Autonomous voice-controlled agent
COMPLETE FIXED VERSION
"""

import gradio as gr
import asyncio
from pathlib import Path
from utils.audio_utils import speech_to_text, text_to_speech
import time


def create_voice_agent_ui(agent):
    """Create voice agent interface"""

    with gr.Row():
        # LEFT COLUMN β€” INPUTS
        with gr.Column(scale=1):
            gr.Markdown("""
            ### 🎀 Voice Control
            
            **How to use:**
            1. Upload files (optional)
            2. Speak OR type your command
            3. Click Execute
            4. Watch agent work!
            
            **Example commands:**
            - "Extract text from my PDF"
            - "Summarize this document"
            - "Organize my files"
            """)

            # Audio input
            audio_input = gr.Audio(
                sources=["microphone"],
                type="filepath",
                label="πŸŽ™οΈ Speak Your Command"
            )

            # Manual command input
            text_input = gr.Textbox(
                label="⌨️ Or Type Command",
                placeholder="Example: Extract deadlines from all PDFs",
                lines=3
            )

            # Execute button
            execute_btn = gr.Button(
                "πŸš€ Execute Command",
                variant="primary",
                size="lg"
            )

            # Status
            status_box = gr.Textbox(
                label="πŸ“Š Status",
                value="Ready to execute commands",
                interactive=False
            )

            gr.Markdown("---")

            # File uploader
            voice_file_upload = gr.Files(
                label="πŸ“ Upload Files for Agent",
                file_count="multiple",
                file_types=[".pdf", ".png", ".jpg", ".jpeg", ".docx", ".txt", ".csv"]
            )

            uploaded_files_list = gr.Textbox(
                label="πŸ“‚ Available Files",
                placeholder="No files uploaded yet",
                lines=4,
                interactive=False
            )

        # RIGHT COLUMN β€” AGENT EXECUTION
        with gr.Column(scale=2):
            gr.Markdown("### πŸ€– Agent Execution & Results")

            # Agent Reasoning Trace (Simple Chatbot - minimal parameters)
            thought_trace = gr.Chatbot(
                label="🧠 Agent Reasoning Steps",
                height=400
            )

            # Final Response
            final_response = gr.Textbox(
                label="βœ… Final Answer",
                lines=6
            )

            # Voice Output
            audio_output = gr.Audio(
                label="πŸ”Š Voice Response (if available)",
                type="filepath",
                autoplay=True
            )

            # Generated Files
            with gr.Accordion("πŸ“₯ Generated Files", open=False):
                outputs_files = gr.Files(
                    label="Download Outputs",
                    file_count="multiple"
                )

    # STATE: store uploaded files
    uploaded_files_state = gr.State([])

    # FILE UPLOAD HANDLER
    async def handle_voice_file_upload(files):
        """Handle file uploads"""
        if not files:
            return "No files uploaded", []

        file_paths = []
        file_info_text = []

        from utils.file_utils import copy_file, get_file_info
        import os

        for file in files:
            filename = os.path.basename(file)
            dest_path = f"data/uploads/{filename}"
            copy_file(file, dest_path)

            info = get_file_info(dest_path)
            file_paths.append(dest_path)
            file_info_text.append(f"β€’ {info['name']} ({info['size_mb']} MB)")

            # Add to RAG
            try:
                await agent.process_files_to_rag([{"path": dest_path, "name": info['name']}])
            except Exception:
                pass

        return "\n".join(file_info_text), file_paths

    # MAIN COMMAND PROCESSOR
    async def process_audio_command(audio_file, text_command, files_list):
        """Process voice + text commands - COMPLETE FIXED VERSION"""

        # Step 1 β€” Identify user command
        if audio_file and not text_command:
            # Transcribe
            yield [], "🎀 Transcribing audio...", "", None, None
            cmd = await speech_to_text(audio_file)

            if not cmd:
                yield [], "⚠️ Failed to transcribe audio", "", None, None
                return
            else:
                yield [], f"🎀 Transcribed: {cmd}", "", None, None
        elif text_command:
            cmd = text_command
        else:
            yield [], "⚠️ Please provide a voice or text command", "", None, None
            return

        # Show planning state
        yield [], "🧠 Agent is planning...", "", None, None

        try:
            # Call agent (non-streaming)
            final_answer, thoughts = await agent.execute(cmd, files_list)

            # Convert AgentThought objects to Gradio 6.0 format
            # MUST be list of dicts with "role" and "content" keys
            messages = []
            
            for i, t in enumerate(thoughts):
                # Handle both AgentThought objects and dicts
                if hasattr(t, "type"):
                    t_type = t.type
                    t_content = t.content
                    t_tool = getattr(t, "tool_name", None)
                elif isinstance(t, dict):
                    t_type = t.get("type", "info")
                    t_content = t.get("content", "")
                    t_tool = t.get("tool_name")
                else:
                    t_type = "info"
                    t_content = str(t)
                    t_tool = None

                # Format message with icon
                icon = "ℹ️"
                title = ""
                if t_type == "planning":
                    icon = "🧠"
                    title = "Planning"
                elif t_type == "tool_call":
                    icon = "πŸ”§"
                    title = f"Tool: {t_tool}" if t_tool else "Tool Call"
                elif t_type == "reflection":
                    icon = "πŸ’­"
                    title = "Reflection"
                elif t_type == "answer":
                    icon = "βœ…"
                    title = "Answer"

                # Add as assistant message
                messages.append({
                    "role": "assistant",
                    "content": f"{icon} **{title}**\n\n{t_content}"
                })

            # Show results
            yield messages, "πŸ“Š Processing complete...", final_answer, None, None

            # TTS (optional - may fail if no API key)
            audio_path = None
            try:
                audio_path = await text_to_speech(final_answer)
                # Only yield audio if it's a valid file path, not a directory
                if audio_path and Path(audio_path).is_file():
                    yield messages, "βœ… Complete!", final_answer, audio_path, None
                else:
                    audio_path = None
            except Exception as e:
                print(f"TTS Error (non-critical): {e}")
                audio_path = None

            # Collect recent outputs (only files, not directories)
            output_dir = Path("data/outputs")
            files_generated = []
            if output_dir.exists():
                cutoff = time.time() - 300  # Last 5 minutes
                try:
                    for f in output_dir.glob("*"):
                        if f.is_file() and f.stat().st_mtime > cutoff:
                            files_generated.append(str(f))
                except Exception:
                    pass

            yield messages, "βœ… Complete!", final_answer, audio_path, files_generated

        except Exception as e:
            import traceback
            err_msg = f"⚠️ Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
            
            # Error message in dictionary format
            error_messages = [{
                "role": "assistant",
                "content": f"❌ **Error**\n\n{str(e)}"
            }]
            yield error_messages, f"❌ Error: {str(e)}", err_msg, None, None

    # CONNECT EVENTS
    def handle_voice_file_upload_sync(files):
        """Sync wrapper for async function"""
        return asyncio.run(handle_voice_file_upload(files))

    voice_file_upload.change(
        fn=handle_voice_file_upload_sync,
        inputs=[voice_file_upload],
        outputs=[uploaded_files_list, uploaded_files_state]
    )

    execute_btn.click(
        fn=process_audio_command,
        inputs=[audio_input, text_input, uploaded_files_state],
        outputs=[thought_trace, status_box, final_response, audio_output, outputs_files]
    )

    return gr.Column()