Spaces:

agents-course
/

Final_Assignment_Template

Running

App Files Files Community

289

Final_Assignment_Template / app.py

SantoshKumar1310

Update app.py

da40f6e verified about 2 months ago

raw

history blame

5.02 kB

	# app.py — Final GAIA Assignment Template (Enhanced)

	import streamlit as st
	from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonREPLTool, HfApiModel
	from huggingface_hub import login
	import json
	import time
	import os

	# =========================
	# 1. Define the GAIA Agent
	# =========================
	class BasicAgent:
	def __init__(self):
	st.write("🔧 Initializing enhanced GAIA Agent...")

	# Core model from Hugging Face
	self.model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")

	# Tools for reasoning and search
	self.tools = [
	DuckDuckGoSearchTool(),
	PythonREPLTool()
	]

	# Create a CodeAgent instance
	self.agent = CodeAgent(
	tools=self.tools,
	model=self.model,
	name="GAIA_Level1_Agent",
	description="Hybrid reasoning agent using web + code execution to answer GAIA L1 questions.",
	max_steps=5
	)

	def sanitize(self, text: str) -> str:
	"""Clean and simplify final outputs for benchmark scoring."""
	if not text:
	return ""
	text = text.strip()
	for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:", "answer:"]:
	if text.startswith(prefix):
	text = text[len(prefix):].strip()
	if text.startswith('"') and text.endswith('"'):
	text = text[1:-1]
	text = " ".join(text.split())
	return text

	def __call__(self, question: str) -> str:
	"""Run the agent on a single GAIA question."""
	st.write(f"🤖 Running agent on: {question[:80]}...")
	prompt = (
	"You are a concise reasoning agent. "
	"Use your tools to find accurate answers. "
	"Always return only the final answer (no explanations).\n\n"
	f"Question: {question}"
	)

	try:
	response = self.agent.run(prompt)
	clean_answer = self.sanitize(response)
	st.write(f"✅ Final Answer: {clean_answer}")
	return clean_answer or "N/A"
	except Exception as e:
	st.error(f"⚠️ Agent failed: {e}")
	return "N/A"


	# =======================================
	# 2. Streamlit UI and GAIA Leaderboard
	# =======================================
	st.set_page_config(page_title="GAIA Final Assignment", layout="centered")

	st.title("🤖 GAIA Benchmark Final Assignment")
	st.markdown(
	"""
	Welcome to your Final Assignment for the Agents course!

	This app evaluates your custom agent on a subset of GAIA Level 1 benchmark questions.
	To pass and earn your certificate 🏅, your agent must score ≥ 30% accuracy.

	---

	### 🧠 Steps
	1. Log in to your Hugging Face account.
	2. Run your agent on the GAIA dataset.
	3. Automatically submit your results for scoring.

	---
	"""
	)

	# =========================
	# 3. Login Section
	# =========================
	hf_token = st.text_input("🔑 Enter your Hugging Face access token:", type="password")
	if st.button("Login to Hugging Face"):
	try:
	login(token=hf_token)
	st.success("✅ Logged in successfully!")
	except Exception as e:
	st.error(f"Login failed: {e}")

	# =========================
	# 4. Load GAIA Questions
	# =========================
	if st.button("🧩 Load GAIA Dataset"):
	st.info("Fetching 20 GAIA Level 1 questions...")
	os.system("wget -q https://huggingface.co/spaces/agents-course/Final_Assignment_Template/resolve/main/questions.json -O questions.json")
	st.success("✅ Dataset loaded!")

	# =========================
	# 5. Run Evaluation
	# =========================
	if st.button("🚀 Run Evaluation & Submit All Answers"):
	if not os.path.exists("questions.json"):
	st.warning("Please load the GAIA dataset first.")
	else:
	with open("questions.json", "r") as f:
	data = json.load(f)
	questions = data["questions"]

	agent = BasicAgent()
	results = {}

	for i, q in enumerate(questions):
	st.write(f"### Question {i+1}:")
	st.write(q)
	ans = agent(q)
	results[q] = ans
	time.sleep(1)

	# Save answers
	with open("answers.json", "w") as f:
	json.dump(results, f, indent=2)
	st.success("✅ All questions answered and saved as answers.json")

	# Auto-submit via huggingface CLI (if supported)
	st.info("📤 Submitting answers to GAIA leaderboard...")
	os.system("python3 -m smolagents.eval_gaia submit answers.json")
	st.success("🎉 Submission complete! Check your score on the leaderboard.")

	# =========================
	# 6. Notes
	# =========================
	st.markdown(
	"""
	---
	### ℹ️ Notes
	- You can edit the agent logic inside the `BasicAgent` class to boost performance.
	- Use more reasoning, examples, or API calls for higher accuracy.
	- Make your Space public before submitting.

	Good luck on the GAIA leaderboard! 🌍
	"""
	)