# app.py — Final GAIA Assignment Template (Enhanced) import streamlit as st from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonREPLTool, HfApiModel from huggingface_hub import login import json import time import os # ========================= # 1. Define the GAIA Agent # ========================= class BasicAgent: def __init__(self): st.write("🔧 Initializing enhanced GAIA Agent...") # Core model from Hugging Face self.model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") # Tools for reasoning and search self.tools = [ DuckDuckGoSearchTool(), PythonREPLTool() ] # Create a CodeAgent instance self.agent = CodeAgent( tools=self.tools, model=self.model, name="GAIA_Level1_Agent", description="Hybrid reasoning agent using web + code execution to answer GAIA L1 questions.", max_steps=5 ) def sanitize(self, text: str) -> str: """Clean and simplify final outputs for benchmark scoring.""" if not text: return "" text = text.strip() for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:", "answer:"]: if text.startswith(prefix): text = text[len(prefix):].strip() if text.startswith('"') and text.endswith('"'): text = text[1:-1] text = " ".join(text.split()) return text def __call__(self, question: str) -> str: """Run the agent on a single GAIA question.""" st.write(f"🤖 Running agent on: {question[:80]}...") prompt = ( "You are a concise reasoning agent. " "Use your tools to find accurate answers. " "Always return only the final answer (no explanations).\n\n" f"Question: {question}" ) try: response = self.agent.run(prompt) clean_answer = self.sanitize(response) st.write(f"✅ Final Answer: {clean_answer}") return clean_answer or "N/A" except Exception as e: st.error(f"âš ī¸ Agent failed: {e}") return "N/A" # ======================================= # 2. Streamlit UI and GAIA Leaderboard # ======================================= st.set_page_config(page_title="GAIA Final Assignment", layout="centered") st.title("🤖 GAIA Benchmark Final Assignment") st.markdown( """ Welcome to your **Final Assignment** for the Agents course! This app evaluates your custom agent on a subset of **GAIA Level 1** benchmark questions. To pass and earn your certificate 🏅, your agent must score **â‰Ĩ 30% accuracy**. --- ### 🧠 Steps 1. Log in to your **Hugging Face** account. 2. Run your **agent** on the GAIA dataset. 3. Automatically submit your results for scoring. --- """ ) # ========================= # 3. Login Section # ========================= hf_token = st.text_input("🔑 Enter your Hugging Face access token:", type="password") if st.button("Login to Hugging Face"): try: login(token=hf_token) st.success("✅ Logged in successfully!") except Exception as e: st.error(f"Login failed: {e}") # ========================= # 4. Load GAIA Questions # ========================= if st.button("🧩 Load GAIA Dataset"): st.info("Fetching 20 GAIA Level 1 questions...") os.system("wget -q https://huggingface.co/spaces/agents-course/Final_Assignment_Template/resolve/main/questions.json -O questions.json") st.success("✅ Dataset loaded!") # ========================= # 5. Run Evaluation # ========================= if st.button("🚀 Run Evaluation & Submit All Answers"): if not os.path.exists("questions.json"): st.warning("Please load the GAIA dataset first.") else: with open("questions.json", "r") as f: data = json.load(f) questions = data["questions"] agent = BasicAgent() results = {} for i, q in enumerate(questions): st.write(f"### Question {i+1}:") st.write(q) ans = agent(q) results[q] = ans time.sleep(1) # Save answers with open("answers.json", "w") as f: json.dump(results, f, indent=2) st.success("✅ All questions answered and saved as answers.json") # Auto-submit via huggingface CLI (if supported) st.info("📤 Submitting answers to GAIA leaderboard...") os.system("python3 -m smolagents.eval_gaia submit answers.json") st.success("🎉 Submission complete! Check your score on the leaderboard.") # ========================= # 6. Notes # ========================= st.markdown( """ --- ### â„šī¸ Notes - You can edit the agent logic inside the `BasicAgent` class to boost performance. - Use more reasoning, examples, or API calls for higher accuracy. - Make your Space **public** before submitting. Good luck on the GAIA leaderboard! 🌍 """ )