File size: 5,024 Bytes
da40f6e 10e9b7d da40f6e e80aab9 da40f6e 31243f4 da40f6e 3c4371f da40f6e 3c4371f da40f6e e80aab9 31243f4 da40f6e 31243f4 da40f6e 31243f4 da40f6e 31243f4 da40f6e e80aab9 da40f6e e80aab9 da40f6e 7d65c66 da40f6e 7d65c66 da40f6e 7d65c66 da40f6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# app.py β Final GAIA Assignment Template (Enhanced)
import streamlit as st
from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonREPLTool, HfApiModel
from huggingface_hub import login
import json
import time
import os
# =========================
# 1. Define the GAIA Agent
# =========================
class BasicAgent:
def __init__(self):
st.write("π§ Initializing enhanced GAIA Agent...")
# Core model from Hugging Face
self.model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
# Tools for reasoning and search
self.tools = [
DuckDuckGoSearchTool(),
PythonREPLTool()
]
# Create a CodeAgent instance
self.agent = CodeAgent(
tools=self.tools,
model=self.model,
name="GAIA_Level1_Agent",
description="Hybrid reasoning agent using web + code execution to answer GAIA L1 questions.",
max_steps=5
)
def sanitize(self, text: str) -> str:
"""Clean and simplify final outputs for benchmark scoring."""
if not text:
return ""
text = text.strip()
for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:", "answer:"]:
if text.startswith(prefix):
text = text[len(prefix):].strip()
if text.startswith('"') and text.endswith('"'):
text = text[1:-1]
text = " ".join(text.split())
return text
def __call__(self, question: str) -> str:
"""Run the agent on a single GAIA question."""
st.write(f"π€ Running agent on: {question[:80]}...")
prompt = (
"You are a concise reasoning agent. "
"Use your tools to find accurate answers. "
"Always return only the final answer (no explanations).\n\n"
f"Question: {question}"
)
try:
response = self.agent.run(prompt)
clean_answer = self.sanitize(response)
st.write(f"β
Final Answer: {clean_answer}")
return clean_answer or "N/A"
except Exception as e:
st.error(f"β οΈ Agent failed: {e}")
return "N/A"
# =======================================
# 2. Streamlit UI and GAIA Leaderboard
# =======================================
st.set_page_config(page_title="GAIA Final Assignment", layout="centered")
st.title("π€ GAIA Benchmark Final Assignment")
st.markdown(
"""
Welcome to your **Final Assignment** for the Agents course!
This app evaluates your custom agent on a subset of **GAIA Level 1** benchmark questions.
To pass and earn your certificate π
, your agent must score **β₯ 30% accuracy**.
---
### π§ Steps
1. Log in to your **Hugging Face** account.
2. Run your **agent** on the GAIA dataset.
3. Automatically submit your results for scoring.
---
"""
)
# =========================
# 3. Login Section
# =========================
hf_token = st.text_input("π Enter your Hugging Face access token:", type="password")
if st.button("Login to Hugging Face"):
try:
login(token=hf_token)
st.success("β
Logged in successfully!")
except Exception as e:
st.error(f"Login failed: {e}")
# =========================
# 4. Load GAIA Questions
# =========================
if st.button("π§© Load GAIA Dataset"):
st.info("Fetching 20 GAIA Level 1 questions...")
os.system("wget -q https://huggingface.co/spaces/agents-course/Final_Assignment_Template/resolve/main/questions.json -O questions.json")
st.success("β
Dataset loaded!")
# =========================
# 5. Run Evaluation
# =========================
if st.button("π Run Evaluation & Submit All Answers"):
if not os.path.exists("questions.json"):
st.warning("Please load the GAIA dataset first.")
else:
with open("questions.json", "r") as f:
data = json.load(f)
questions = data["questions"]
agent = BasicAgent()
results = {}
for i, q in enumerate(questions):
st.write(f"### Question {i+1}:")
st.write(q)
ans = agent(q)
results[q] = ans
time.sleep(1)
# Save answers
with open("answers.json", "w") as f:
json.dump(results, f, indent=2)
st.success("β
All questions answered and saved as answers.json")
# Auto-submit via huggingface CLI (if supported)
st.info("π€ Submitting answers to GAIA leaderboard...")
os.system("python3 -m smolagents.eval_gaia submit answers.json")
st.success("π Submission complete! Check your score on the leaderboard.")
# =========================
# 6. Notes
# =========================
st.markdown(
"""
---
### βΉοΈ Notes
- You can edit the agent logic inside the `BasicAgent` class to boost performance.
- Use more reasoning, examples, or API calls for higher accuracy.
- Make your Space **public** before submitting.
Good luck on the GAIA leaderboard! π
"""
)
|