SantoshKumar1310's picture
Update app.py
da40f6e verified
raw
history blame
5.02 kB
# app.py β€” Final GAIA Assignment Template (Enhanced)
import streamlit as st
from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonREPLTool, HfApiModel
from huggingface_hub import login
import json
import time
import os
# =========================
# 1. Define the GAIA Agent
# =========================
class BasicAgent:
def __init__(self):
st.write("πŸ”§ Initializing enhanced GAIA Agent...")
# Core model from Hugging Face
self.model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
# Tools for reasoning and search
self.tools = [
DuckDuckGoSearchTool(),
PythonREPLTool()
]
# Create a CodeAgent instance
self.agent = CodeAgent(
tools=self.tools,
model=self.model,
name="GAIA_Level1_Agent",
description="Hybrid reasoning agent using web + code execution to answer GAIA L1 questions.",
max_steps=5
)
def sanitize(self, text: str) -> str:
"""Clean and simplify final outputs for benchmark scoring."""
if not text:
return ""
text = text.strip()
for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:", "answer:"]:
if text.startswith(prefix):
text = text[len(prefix):].strip()
if text.startswith('"') and text.endswith('"'):
text = text[1:-1]
text = " ".join(text.split())
return text
def __call__(self, question: str) -> str:
"""Run the agent on a single GAIA question."""
st.write(f"πŸ€– Running agent on: {question[:80]}...")
prompt = (
"You are a concise reasoning agent. "
"Use your tools to find accurate answers. "
"Always return only the final answer (no explanations).\n\n"
f"Question: {question}"
)
try:
response = self.agent.run(prompt)
clean_answer = self.sanitize(response)
st.write(f"βœ… Final Answer: {clean_answer}")
return clean_answer or "N/A"
except Exception as e:
st.error(f"⚠️ Agent failed: {e}")
return "N/A"
# =======================================
# 2. Streamlit UI and GAIA Leaderboard
# =======================================
st.set_page_config(page_title="GAIA Final Assignment", layout="centered")
st.title("πŸ€– GAIA Benchmark Final Assignment")
st.markdown(
"""
Welcome to your **Final Assignment** for the Agents course!
This app evaluates your custom agent on a subset of **GAIA Level 1** benchmark questions.
To pass and earn your certificate πŸ…, your agent must score **β‰₯ 30% accuracy**.
---
### 🧠 Steps
1. Log in to your **Hugging Face** account.
2. Run your **agent** on the GAIA dataset.
3. Automatically submit your results for scoring.
---
"""
)
# =========================
# 3. Login Section
# =========================
hf_token = st.text_input("πŸ”‘ Enter your Hugging Face access token:", type="password")
if st.button("Login to Hugging Face"):
try:
login(token=hf_token)
st.success("βœ… Logged in successfully!")
except Exception as e:
st.error(f"Login failed: {e}")
# =========================
# 4. Load GAIA Questions
# =========================
if st.button("🧩 Load GAIA Dataset"):
st.info("Fetching 20 GAIA Level 1 questions...")
os.system("wget -q https://huggingface.co/spaces/agents-course/Final_Assignment_Template/resolve/main/questions.json -O questions.json")
st.success("βœ… Dataset loaded!")
# =========================
# 5. Run Evaluation
# =========================
if st.button("πŸš€ Run Evaluation & Submit All Answers"):
if not os.path.exists("questions.json"):
st.warning("Please load the GAIA dataset first.")
else:
with open("questions.json", "r") as f:
data = json.load(f)
questions = data["questions"]
agent = BasicAgent()
results = {}
for i, q in enumerate(questions):
st.write(f"### Question {i+1}:")
st.write(q)
ans = agent(q)
results[q] = ans
time.sleep(1)
# Save answers
with open("answers.json", "w") as f:
json.dump(results, f, indent=2)
st.success("βœ… All questions answered and saved as answers.json")
# Auto-submit via huggingface CLI (if supported)
st.info("πŸ“€ Submitting answers to GAIA leaderboard...")
os.system("python3 -m smolagents.eval_gaia submit answers.json")
st.success("πŸŽ‰ Submission complete! Check your score on the leaderboard.")
# =========================
# 6. Notes
# =========================
st.markdown(
"""
---
### ℹ️ Notes
- You can edit the agent logic inside the `BasicAgent` class to boost performance.
- Use more reasoning, examples, or API calls for higher accuracy.
- Make your Space **public** before submitting.
Good luck on the GAIA leaderboard! 🌍
"""
)