TheZakynthian commited on
Commit
8e99e29
·
verified ·
1 Parent(s): de5c364

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +609 -24
app.py CHANGED
@@ -1,9 +1,29 @@
1
  import os
2
  import gradio as gr
3
- from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import requests
5
- import inspect
6
  import pandas as pd
 
 
 
 
 
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
@@ -13,18 +33,557 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
  # --- Final Agent Definition ---
15
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
16
- class FinalAgent:
17
- def __init__(self):
18
- print("FinalAgent initialized.")
19
- def __call__(self, question: str) -> str:
20
- print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
23
 
 
 
 
24
 
25
- fixed_answer = "This is a default answer."
26
- print(f"Agent returning fixed answer: {fixed_answer}")
27
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def run_and_submit_all( profile: gr.OAuthProfile | None):
30
  """
@@ -47,13 +606,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
47
 
48
  # 1. Instantiate Agent ( modify this part to create your agent)
49
  try:
50
- agent = FinalAgent()
51
  except Exception as e:
52
  print(f"Error instantiating agent: {e}")
53
  return f"Error initializing agent: {e}", None
54
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
55
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
56
- print(agent_code)
57
 
58
  # 2. Fetch Questions
59
  print(f"Fetching questions from: {questions_url}")
@@ -80,30 +638,56 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
80
  results_log = []
81
  answers_payload = []
82
  print(f"Running agent on {len(questions_data)} questions...")
 
83
 
 
84
  for item in questions_data:
85
- print(item.keys())
86
- print(item.get("task_id"))
87
- print(item.get("question"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  """
89
- task_id = item.get("task_id")
90
- question_text = item.get("question")
91
- if not task_id or question_text is None:
 
92
  print(f"Skipping item with missing task_id or question: {item}")
93
  continue
94
  try:
95
- submitted_answer = agent(question_text)
 
96
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
97
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
98
  except Exception as e:
99
  print(f"Error running agent on task {task_id}: {e}")
100
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
101
- """
 
 
102
 
103
  if not answers_payload:
104
  print("Agent did not produce any answers to submit.")
105
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
106
- """
107
  # 4. Prepare Submission
108
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
109
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
@@ -151,7 +735,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
151
  print(status_message)
152
  results_df = pd.DataFrame(results_log)
153
  return status_message, results_df
154
- """
155
 
156
  # --- Build Gradio Interface using Blocks ---
157
  with gr.Blocks() as demo:
@@ -185,6 +769,7 @@ with gr.Blocks() as demo:
185
  )
186
 
187
  if __name__ == "__main__":
 
188
  print("\n" + "-"*30 + " App Starting " + "-"*30)
189
  # Check for SPACE_HOST and SPACE_ID at startup for information
190
  space_host_startup = os.getenv("SPACE_HOST")
 
1
  import os
2
  import gradio as gr
3
+ import base64
4
+ import ffmpeg, cv2, numpy as np, tempfile, io, base64, os, pathlib
5
+ import openai
6
+ from pathlib import Path
7
+ from typing import List, TypedDict, Dict, Any
8
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
9
+ from pytube import YouTube
10
+ from langchain.tools import tool
11
+ from langchain_community.utilities import WikipediaAPIWrapper
12
+ from langchain.agents import OpenAIFunctionsAgent, AgentExecutor
13
+ from langchain_openai import ChatOpenAI
14
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
15
+ from langgraph.graph import START, StateGraph, END
16
+ from langchain_community.tools.tavily_search import TavilySearchResults
17
+ import PIL.Image as Image
18
+ import subprocess
19
+ import requests, os, tempfile, shutil
20
  import requests
 
21
  import pandas as pd
22
+ import time
23
+
24
+ os.environ["OPENAI_API_KEY"] = "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"
25
+ os.environ['TAVILY_API_KEY'] = "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"
26
+ openai.api_key = "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"
27
 
28
  # (Keep Constants as is)
29
  # --- Constants ---
 
33
 
34
  # --- Final Agent Definition ---
35
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
36
+
37
+ general_llm = ChatOpenAI(model="gpt-4o-mini")
38
+ audio_llm = "whisper-1"
39
+
40
+ class AgentState(TypedDict, total=False):
41
+ file_path: str | None # Contains file path
42
+ question: str # Contains tabular file path (CSV)
43
+ answer: str | None
44
+ agent_type: str | None
45
+ messages: list[AIMessage | HumanMessage | SystemMessage]
46
+
47
+ @tool
48
+ def addition_tool(list: List[float]) -> float:
49
+ """
50
+ Description:
51
+ A simple addition tool that takes a list of numbers and returns their sum.
52
+
53
+ Arguments:
54
+ • list (List[float]): List of numbers to add.
55
+
56
+ Return:
57
+ float – The sum of the numbers in the list.
58
+ """
59
+
60
+ return sum(list)
61
+
62
+ @tool
63
+ def xlsx_handler(filepath: str) -> List[Dict[str, Any]]:
64
+ """
65
+ Description:
66
+ Load the first sheet of an Excel workbook and convert it into
67
+ a JSON-serialisable list of row dictionaries (records).
68
+
69
+ Arguments:
70
+ • filepath (str): Absolute or relative path to the .xlsx file.
71
+
72
+ Return:
73
+ str – A list of dictionaries representing the column names and their values.
74
+ """
75
+ # Load the Excel file
76
+ df = pd.read_excel(filepath)
77
+
78
+ columns = df.columns.tolist()
79
+
80
+ result = []
81
+ for col in columns:
82
+ result.append({"column": col, "values": df[col].tolist()})
83
+ # Convert to list of dictionaries (records)
84
+ #data = df.to_dict(orient="records")
85
+
86
+ # Convert to JSON string (pretty-printed)
87
+ #return json.dumps(data, indent=4)
88
+ return result
89
+
90
+ @tool
91
+ def python_handler(filepath: str) -> str:
92
+ """
93
+ Description:
94
+ Execute a stand-alone Python script in a sandboxed subprocess and
95
+ capture anything the script prints to stdout. Stderr is returned
96
+ instead if the script exits with a non-zero status.
97
+
98
+ Arguments:
99
+ • filepath (str): Path to the .py file to run.
100
+
101
+ Return:
102
+ str – The final output of the .py file.
103
+ """
104
+ try:
105
+ result = subprocess.run(
106
+ ["python", filepath],
107
+ capture_output=True,
108
+ text=True,
109
+ timeout=10 # Safety
110
+ )
111
+ return result.stdout.strip() if result.returncode == 0 else result.stderr
112
+ except Exception as e:
113
+ return f"Execution failed: {str(e)}"
114
+
115
+ @tool
116
+ def video_decomposition(url: str, task: str) -> str:
117
+ """
118
+ Description:
119
+ Download a YouTube video, extract ≤ 10 visually distinct key frames
120
+ and a Whisper transcript, feed them plus the user’s task to a
121
+ vision-capable LLM, and return the model’s answer.
122
+
123
+ Arguments:
124
+ • url (str) : Full YouTube link.
125
+ • task (str) : The question the model should answer about the clip.
126
+
127
+ Return:
128
+ str – The final response to the user question derived from both audio and visuals.
129
+ """
130
+
131
+ with tempfile.TemporaryDirectory() as tmp:
132
+ tmp_dir = pathlib.Path(tmp)
133
+
134
+ # 1) Fetch clip
135
+ vid_path = download_youtube(url, tmp_dir)
136
+
137
+ # 2) Key-frame extraction
138
+ frames = key_frames_retrieval(vid_path)
139
+
140
+ # 3) Audio extraction
141
+ transcript = audio_retrieval(vid_path)
142
+
143
+ system_msg = SystemMessage(
144
+ content=("You are a Vision AI assistant that can process videos and answer correctly the user's questions"
145
+ "You are provided with key video frames, an audio transcript and a task related with those"
146
+ "Read the task **carefully**, examine all the video frames and the audio transcript and your final response **MUST** be only the final answer to the task's question"
147
+ "The content and format of your final respose is dictated by the task and only that")
148
+ )
149
+
150
+ # 4) Build multimodal prompt
151
+ parts = [
152
+ {
153
+ "type": "text",
154
+ "Task": (f"{task}")
155
+ },
156
+ {
157
+ "type": "text",
158
+ "Transcript": (f"{transcript[:4000]}")
159
+ }
160
+ ]
161
+ for im in frames:
162
+ parts.extend(
163
+ {
164
+ "type": "image_url",
165
+ "image_url": {"url": img_to_data(im)},
166
+ }
167
+ )
168
+
169
+ messages = [
170
+ system_msg,
171
+ HumanMessage(
172
+ content=parts
173
+ )
174
+ ]
175
+
176
+ response = general_llm.invoke(messages)
177
+
178
+ return response
179
+
180
+ @tool
181
+ def reverse_string(text: str) -> str:
182
+ """
183
+ Description:
184
+ Reverse the order of words *and* the letters inside each word.
185
+ Converts a fully reversed sentence back to readable form.
186
+
187
+ Arguments:
188
+ • text (str): Original sentence to transform.
189
+
190
+ Return:
191
+ str – The readable reversed sentence.
192
+ """
193
+ # 1️⃣ split into words, 2️⃣ reverse word order,
194
+ # 3️⃣ reverse letters in each word, 4️⃣ re-join
195
+ reversed_words = [word[::-1] for word in reversed(text.split())]
196
+ return " ".join(reversed_words)
197
+
198
+ @tool
199
+ def web_search(query: str):
200
+ """
201
+ Description:
202
+ A web search tool. Scrapes the top results and returns each on its own line.
203
+
204
+ Arguments:
205
+ • query (str) : question you want to web search.
206
+
207
+ Return:
208
+ str – A newline-separated text summary: '<title> — <url> : <snippet>' or 'No results found'
209
+ """
210
+ search = TavilySearchResults()
211
+ results = search.run(query)
212
+ return "\n".join([f"- {r['content']} ({r['url']})" for r in results])
213
+
214
+ @tool
215
+ def wikipedia_search(query: str):
216
+ """
217
+ Description:
218
+ Query the English-language Wikipedia via the MediaWiki API and
219
+ return a short plain-text extract.
220
+
221
+ Arguments:
222
+ • query (str) : Page title or free-text search string.
223
+
224
+ Return:
225
+ str – Extracted summary paragraph.
226
+ """
227
+
228
+ wiki = WikipediaAPIWrapper()
229
+ return wiki.run(query)
230
+
231
+ def download_youtube(url: str, out_dir: pathlib.Path) -> pathlib.Path:
232
+ delay = 2
233
+ yt = YouTube(url)
234
+ stream = yt.streams.filter(progressive=True, file_extension="mp4")\
235
+ .order_by("resolution").desc().first()
236
+ return pathlib.Path(stream.download(output_path=out_dir))
237
+
238
+ def key_frames_retrieval(video: pathlib.Path, max: int = 6, thresh: float = 0.35, max_frame_mb: float = 0.25):
239
+ """
240
+ Scan *all* frames in `video`, keep every frame whose colour-histogram
241
+ differs from the previous scene by more than `thresh`, then return the first
242
+ `max` most-distinct ones (highest histogram distance).
243
+
244
+ Returns
245
+ -------
246
+ List[PIL.Image] # ≤ `limit` images, sorted by descending “scene change” score
247
+ """
248
+ cap = cv2.VideoCapture(str(video))
249
+ ok, frame = cap.read()
250
+
251
+ if not ok:
252
+ cap.release()
253
+ return []
254
+
255
+ def hsv_hist(img) -> np.ndarray:
256
+ return cv2.calcHist(
257
+ [cv2.cvtColor(img, cv2.COLOR_BGR2HSV)],
258
+ [0, 1], None, [50, 60], [0, 180, 0, 256]
259
+ )
260
+
261
+ def bgr_to_pil(bgr) -> Image.Image:
262
+ img = Image.fromarray(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
263
+ # shrink oversized frames so base64 prompt stays small
264
+ if (img.width * img.height * 3 / 1_048_576) > max_frame_mb:
265
+ img.thumbnail((800, 800))
266
+ return img
267
+
268
+ prev_hist = hsv_hist(frame)
269
+ candidates: list[tuple[float, Image.Image]] = [(1.0, bgr_to_pil(frame))] # always keep first
270
+
271
+ while ok:
272
+
273
+ ok, frame = cap.read()
274
+
275
+ if not ok:
276
+ break
277
+
278
+ hist = hsv_hist(frame)
279
+
280
+ diff = cv2.compareHist(prev_hist, hist, cv2.HISTCMP_BHATTACHARYYA)
281
+
282
+ if diff > thresh:
283
+
284
+ candidates.append((diff, bgr_to_pil(frame)))
285
+ prev_hist = hist
286
+
287
+ cap.release()
288
+
289
+ candidates.sort(key=lambda t: t[0], reverse=True)
290
+
291
+ top_frames = [img for _, img in candidates[:max]]
292
+
293
+ return top_frames
294
+
295
+ def audio_retrieval(video: pathlib.Path) -> str:
296
+ """
297
+ Extract the audio track from `video`, save it as a temporary MP3,
298
+ and return the transcript produced by `audio_llm.audio_to_text`.
299
+ """
300
+ with tempfile.NamedTemporaryFile(suffix=".mp3") as tmp_mp3:
301
+ (
302
+ ffmpeg
303
+ .input(str(video))
304
+ .output(
305
+ tmp_mp3.name,
306
+ ac=1, ar="16000", # mono, 16 kHz (keeps Whisper happy)
307
+ audio_bitrate="128k",
308
+ format="mp3",
309
+ loglevel="quiet"
310
+ )
311
+ .overwrite_output()
312
+ .run()
313
+ )
314
+ tmp_mp3.seek(0) # rewind before passing the handle
315
+ transcript = openai.audio.transcriptions.create(model=audio_llm, file=tmp_mp3, response_format="text")
316
+
317
+ return transcript
318
+
319
+ def img_to_data(img: Image.Image) -> str:
320
+ buf = io.BytesIO(); img.save(buf, format="PNG", optimize=True)
321
+ b64 = base64.b64encode(buf.getvalue()).decode()
322
+ return f"data:image/png;base64,{b64}"
323
+
324
+ def task_examiner(state: AgentState):
325
+ file_path = state["file_path"]
326
+
327
+ if file_path != None:
328
+ p = Path(file_path)
329
+ suffix = p.suffix
330
+ if suffix == ".png":
331
+ state["agent_type"] = "vision"
332
+ elif suffix == ".mp3":
333
+ state["agent_type"] = "audio"
334
+ elif suffix == ".py" or suffix == ".xlsx":
335
+ state["agent_type"] = "code"
336
+ else:
337
+ #if "video" in state["question"]:
338
+ # state["agent_type"] = "vision"
339
+ #else:
340
+ state["agent_type"] = "general"
341
+ return state
342
+
343
+ def task_router(state: AgentState) -> str:
344
+
345
+ return state["agent_type"]
346
+
347
+ def general_agent(state: AgentState):
348
+
349
+ question = state["question"]
350
+
351
+ tools = [web_search, wikipedia_search, reverse_string]
352
+
353
+ system_prompt = ChatPromptTemplate.from_messages([
354
+ ("system",
355
+ """
356
+ SYSTEM GUIDELINES:
357
+ - You are a general AI assistant that is tasked with answering correctly the user's questions.
358
+ - You have several tools in your disposal for differend kinds of tasks.
359
+ - You **MUST** think step by step before using any tool and call the tools only when you are sure that you need them.
360
+ **Tool-reuse rule:**
361
+ - Keep an internal list of tool names you have already called in this answer
362
+ - If a name is on that list you MUST NOT call it again. (You may still call a different tool once.)
363
+ TOOLS:
364
+ - reverse_string: This is a tool that reverses a sentence so if a question is not readable then try to pass it to this tool.
365
+ - web_search: This tool takes a question as input and searches the web for up-to-date information and return an answer.
366
+ - wikipedia_search: This searches exclusively the english wikipedia page for up-to-date information that may not available in your training data.
367
+ INPUT FORMAT:
368
+ - A question (text) that you should answer correctly.
369
+ OUTPUT FORMAT:
370
+ Output **ONLY** the final answer dictated by the user's question and nothing more
371
+ **IMPORTANT** If the question contains a youtube link (https://www.youtube.com/watch?...) and **ONLY THEN** output this "Don't know".
372
+ If the question tells you to output 'How many ...' you **MUST** response with **only** a single numeral and absolutely nothing else (no punctuation, no sentence, no units).
373
+ If the question tells you to output 'What number ...' you **MUST** response with **only** a single numeral and absolutely nothing else (no punctuation, no sentence, no units).
374
+ If the question tells you to output 'Who did ...' you **MUST** response with **only** the full name unless the question directs you otherwise and absolutely nothing else (no punctuation, no sentence, no units).
375
+ If the question tells you to output 'Provide a comma-separated list that ...' you **MUST** response with **only** a comma-separated list '[...,...,...]' as instructed and absolutely nothing else (no punctuation, no sentence, no units).
376
+ If the question asks to output a list -> Output: [item1,item2,item3]
377
+ If the question tells you to output 'What does the person A say when ...' you **MUST** response with **only** the phrase that person says and absolutely nothing else (no punctuation, no sentence, no units).
378
+ """),
379
+ ("user", "{input}"),
380
+ MessagesPlaceholder("agent_scratchpad"),
381
+ ])
382
+
383
+
384
+ agent = OpenAIFunctionsAgent(
385
+ llm=general_llm,
386
+ tools=tools,
387
+ prompt=system_prompt
388
+ )
389
+
390
+ agent_executor = AgentExecutor.from_agent_and_tools(
391
+ agent=agent,
392
+ tools=tools,
393
+ verbose=True,
394
+ )
395
+
396
+ response = agent_executor.invoke({"input": question})
397
+
398
+ state["answer"] = response["output"]
399
+
400
+ return state
401
+
402
+ def audio_agent(state: AgentState):
403
+
404
+ with open(state["file_path"], "rb") as f:
405
+ transcript = openai.audio.transcriptions.create(model=audio_llm, file=f, response_format="text")
406
+
407
+ question = state["question"]
408
+
409
+ system_msg = SystemMessage(
410
+ content=("You are an AI assistant that answers the user's question based solely on the provided transcript."
411
+ "When the user asks for a “comma-delimited / comma-separated list”, you must:"
412
+ " - Filter the items exactly as requested."
413
+ " - Output one single line that contains the items separated by commas and a space enclosed in square brackets."
414
+ " - Output nothing else- no extra words or explanations"
415
+ "OUTPUT FORMAT EXAMPLES:"
416
+ "If asked to output a list -> Output: [item1,item2,item3]"
417
+ "If asked something else -> Output: text answering exactly that question and nothing more"
418
+ )
419
+ )
420
+
421
+ messages = [
422
+ system_msg,
423
+ HumanMessage(
424
+ content=[
425
+ {
426
+ "type": "text",
427
+ "text": f"Transcript:\n{transcript}\n\nQuestion:\n{question}"
428
+ }
429
+ ]
430
+ )
431
+ ]
432
+
433
+ response = general_llm.invoke(messages)
434
+
435
+ state["answer"] = response.content.strip()
436
+
437
+ return state
438
+
439
+ def vision_agent(state: AgentState):
440
+
441
+ file_path = state["file_path"]
442
+ question = state["question"]
443
+
444
+ with open(file_path, "rb") as image_file:
445
 
446
+ image_bytes = image_file.read()
447
+
448
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
449
+
450
+ system_msg = SystemMessage(
451
+ content=("""
452
+ You are a Vision AI assistant that can process images and answer correctly the user's questions"
453
+ **OUTPUT** only the final answer and absolutely nothing else (no punctuation, no sentence, no units).
454
+ """)
455
+ )
456
+
457
+ messages = [
458
+ system_msg,
459
+ HumanMessage(
460
+ content=[
461
+ {
462
+ "type": "text",
463
+ "text": (f"{question}")
464
+ },
465
+ {
466
+ "type": "image_url",
467
+ "image_url": {
468
+ "url": f"data:image/png;base64,{image_base64}"
469
+ },
470
+ }
471
+ ]
472
+ )
473
+ ]
474
 
475
+ response = general_llm.invoke(messages)
476
 
477
+ state["answer"] = response.content.strip()
478
+
479
+ return state
480
 
481
+ def code_agent(state: AgentState):
482
+
483
+ file_path = state["file_path"]
484
+ question = state["question"]
485
+
486
+ tools = [xlsx_handler, python_handler, addition_tool]
487
+
488
+ system_prompt = ChatPromptTemplate.from_messages([
489
+ ("system",
490
+ """ SYSTEM GUIDELINES:
491
+ - You are a data AI assistant and your job is to answer questions that depend on .xlsx or .py files.
492
+ - You have in your disposal 2 tools that are mandatory for solving the tasks.
493
+ - You **MUST** use the tools as instructed below and you **MUST** output only the final numeric result of the task.
494
+ INPUT FORMAT:
495
+ - A question (text) based on a file which will be either .py or .xlsx.
496
+ - The path of the file related to the question.
497
+ TOOLS:
498
+ - Tool name: xlsx_handler, Purpose: This is the tool you should use if the file contained in the file_path is an .xlsx file and it's purpose is to return the contents of the file in a list of dictionaries for you to process, reason **INTERNALLY** and output only the final numeric result.
499
+ - Tool name: python_handler, Purpose: This is the tool you should use if the file contained in the file_path is a .py file and it's purpose is to execute the python file and return the final numeric result of it.
500
+ - Tool name: addition_tool, Purpose: This is the tool you should use if the question asks you to sum a list of numbers and return the final numeric result.
501
+ EXAMPLE OUTPUTS:
502
+ - Input: "What is the result of the code in the file?" Output: "5"
503
+ - Input: "What is the total sales mentioned in the file. Your answer must have 2 decimal places?" Output: "305.00"
504
+ - YOU MUST OUTPUT ONLY THE FINAL NUMBER.
505
+
506
+ The file relevant to the task is at: {file_path}."""),
507
+ ("user", "{input}"),
508
+ MessagesPlaceholder("agent_scratchpad"),
509
+ ])
510
+
511
+
512
+ agent = OpenAIFunctionsAgent(
513
+ llm=general_llm,
514
+ tools=tools,
515
+ prompt=system_prompt
516
+ )
517
+
518
+ agent_executor = AgentExecutor.from_agent_and_tools(
519
+ agent=agent,
520
+ tools=tools,
521
+ verbose=True,
522
+ )
523
+
524
+ #agent_executor = agent_executor.partial(file_path=file_path)
525
+
526
+ response = agent_executor.invoke({"input": question, "file_path": file_path})
527
+
528
+ state["answer"] = response["output"]
529
+
530
+ return state
531
+
532
+ class Agent_Workflow:
533
+ def __init__(self):
534
+ print("Agent Workflow initialized.")
535
+ def __call__(self, question: str, filepath: str) -> str:
536
+
537
+ builder = StateGraph(AgentState)
538
+
539
+ # Agent Nodes
540
+ builder.add_node("task_examiner", task_examiner)
541
+ builder.add_node("general_agent", general_agent)
542
+ builder.add_node("audio_agent", audio_agent)
543
+ builder.add_node("vision_agent", vision_agent)
544
+ builder.add_node("code_agent", code_agent)
545
+
546
+ # Edges that connect agent nodes
547
+ builder.add_edge(START, "task_examiner")
548
+ builder.add_conditional_edges("task_examiner", task_router,
549
+ {
550
+ "general": "general_agent",
551
+ "audio": "audio_agent",
552
+ "vision": "vision_agent",
553
+ "code": "code_agent"
554
+ }
555
+ )
556
+ builder.add_edge("general_agent", END)
557
+ builder.add_edge("audio_agent", END)
558
+ builder.add_edge("vision_agent", END)
559
+ builder.add_edge("code_agent", END)
560
+
561
+ workflow_graph = builder.compile()
562
+
563
+ state = workflow_graph.invoke({"file_path": filepath, "question": question, "answer": "",})
564
+
565
+ return state["answer"]
566
+
567
+ def fetch_task_file_static(task_id: str, file_name: str | None = None, session: requests.Session | None = None) -> Path:
568
+ """
569
+ Download the attachment for `task_id` to temp_files/<task_id>.<suffix>
570
+ """
571
+ if file_name == None:
572
+ return None
573
+
574
+ # Decide the suffix
575
+ suffix = Path(file_name).suffix if file_name else ""
576
+ dest = "temp/"+task_id+suffix
577
+
578
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
579
+ s = session or requests
580
+
581
+ with s.get(url, stream=True, timeout=30) as r:
582
+ r.raise_for_status()
583
+ with open(dest, "wb") as f:
584
+ shutil.copyfileobj(r.raw, f)
585
+
586
+ return dest
587
 
588
  def run_and_submit_all( profile: gr.OAuthProfile | None):
589
  """
 
606
 
607
  # 1. Instantiate Agent ( modify this part to create your agent)
608
  try:
609
+ agent = Agent_Workflow()
610
  except Exception as e:
611
  print(f"Error instantiating agent: {e}")
612
  return f"Error initializing agent: {e}", None
613
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
614
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
615
 
616
  # 2. Fetch Questions
617
  print(f"Fetching questions from: {questions_url}")
 
638
  results_log = []
639
  answers_payload = []
640
  print(f"Running agent on {len(questions_data)} questions...")
641
+ session = requests.Session()
642
 
643
+ #j=0
644
  for item in questions_data:
645
+ task_id = item["task_id"]
646
+ question = item["question"]
647
+ file_name = item.get("file_name")
648
+
649
+ file_path = None
650
+
651
+ if file_name:
652
+ try:
653
+ file_path = fetch_task_file_static(task_id, file_name, session=session)
654
+ except requests.HTTPError as e:
655
+ print(f"⚠️ Couldn’t fetch file for {task_id}: {e}")
656
+
657
+ #print(f"Question is : {question}\n")
658
+ #[2,4,5,6,7,8,10,12,15,16,17]
659
+ """
660
+ if j in [2,4,5,6,7,8,10,12,15,16,17]:
661
+ time.sleep(5)
662
+ print(f"Question is : {question}")
663
+ print(f"File path is : {file_path}")
664
+ submitted_answer = agent(question=question, filepath=file_path)
665
+ print(f"Answer is : {submitted_answer}")
666
+
667
+ j=j+1
668
  """
669
+ print(f"Question {j+1} is : {question}")
670
+ print(f"File path is : {file_path}")
671
+
672
+ if not task_id or question is None:
673
  print(f"Skipping item with missing task_id or question: {item}")
674
  continue
675
  try:
676
+ submitted_answer = agent(question=question, filepath=file_path)
677
+ print(f"Answer for question {j+1} is: {submitted_answer}")
678
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
679
+ results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": submitted_answer})
680
  except Exception as e:
681
  print(f"Error running agent on task {task_id}: {e}")
682
+ results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"AGENT ERROR: {e}"})
683
+
684
+ j=j+1
685
+
686
 
687
  if not answers_payload:
688
  print("Agent did not produce any answers to submit.")
689
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
690
+
691
  # 4. Prepare Submission
692
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
693
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
 
735
  print(status_message)
736
  results_df = pd.DataFrame(results_log)
737
  return status_message, results_df
738
+
739
 
740
  # --- Build Gradio Interface using Blocks ---
741
  with gr.Blocks() as demo:
 
769
  )
770
 
771
  if __name__ == "__main__":
772
+ print(os.getenv("HF_TOKEN"))
773
  print("\n" + "-"*30 + " App Starting " + "-"*30)
774
  # Check for SPACE_HOST and SPACE_ID at startup for information
775
  space_host_startup = os.getenv("SPACE_HOST")