Spaces:

agents-course
/

Final_Assignment_Template

Running

File size: 5,024 Bytes

da40f6e
10e9b7d
da40f6e
 
 
 
 
 
e80aab9
da40f6e
 
 
31243f4
 
da40f6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c4371f
da40f6e
 
 
 
 
 
 
 
 
 
 
 
3c4371f
da40f6e
 
 
 
 
 
 
 
 
e80aab9
31243f4
da40f6e
 
 
 
31243f4
da40f6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31243f4
da40f6e
 
 
 
31243f4
da40f6e
 
 
e80aab9
da40f6e
 
 
 
 
e80aab9
da40f6e
 
7d65c66
da40f6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d65c66
da40f6e
 
 
7d65c66
da40f6e

# app.py — Final GAIA Assignment Template (Enhanced)

import streamlit as st
from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonREPLTool, HfApiModel
from huggingface_hub import login
import json
import time
import os

# =========================
# 1. Define the GAIA Agent
# =========================
class BasicAgent:
    def __init__(self):
        st.write("🔧 Initializing enhanced GAIA Agent...")

        # Core model from Hugging Face
        self.model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")

        # Tools for reasoning and search
        self.tools = [
            DuckDuckGoSearchTool(),
            PythonREPLTool()
        ]

        # Create a CodeAgent instance
        self.agent = CodeAgent(
            tools=self.tools,
            model=self.model,
            name="GAIA_Level1_Agent",
            description="Hybrid reasoning agent using web + code execution to answer GAIA L1 questions.",
            max_steps=5
        )

    def sanitize(self, text: str) -> str:
        """Clean and simplify final outputs for benchmark scoring."""
        if not text:
            return ""
        text = text.strip()
        for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:", "answer:"]:
            if text.startswith(prefix):
                text = text[len(prefix):].strip()
        if text.startswith('"') and text.endswith('"'):
            text = text[1:-1]
        text = " ".join(text.split())
        return text

    def __call__(self, question: str) -> str:
        """Run the agent on a single GAIA question."""
        st.write(f"🤖 Running agent on: {question[:80]}...")
        prompt = (
            "You are a concise reasoning agent. "
            "Use your tools to find accurate answers. "
            "Always return only the final answer (no explanations).\n\n"
            f"Question: {question}"
        )

        try:
            response = self.agent.run(prompt)
            clean_answer = self.sanitize(response)
            st.write(f"✅ Final Answer: {clean_answer}")
            return clean_answer or "N/A"
        except Exception as e:
            st.error(f"⚠️ Agent failed: {e}")
            return "N/A"


# =======================================
# 2. Streamlit UI and GAIA Leaderboard
# =======================================
st.set_page_config(page_title="GAIA Final Assignment", layout="centered")

st.title("🤖 GAIA Benchmark Final Assignment")
st.markdown(
    """
Welcome to your **Final Assignment** for the Agents course!

This app evaluates your custom agent on a subset of **GAIA Level 1** benchmark questions.
To pass and earn your certificate 🏅, your agent must score **≥ 30% accuracy**.

---

### 🧠 Steps
1. Log in to your **Hugging Face** account.  
2. Run your **agent** on the GAIA dataset.  
3. Automatically submit your results for scoring.

---
"""
)

# =========================
# 3. Login Section
# =========================
hf_token = st.text_input("🔑 Enter your Hugging Face access token:", type="password")
if st.button("Login to Hugging Face"):
    try:
        login(token=hf_token)
        st.success("✅ Logged in successfully!")
    except Exception as e:
        st.error(f"Login failed: {e}")

# =========================
# 4. Load GAIA Questions
# =========================
if st.button("🧩 Load GAIA Dataset"):
    st.info("Fetching 20 GAIA Level 1 questions...")
    os.system("wget -q https://huggingface.co/spaces/agents-course/Final_Assignment_Template/resolve/main/questions.json -O questions.json")
    st.success("✅ Dataset loaded!")

# =========================
# 5. Run Evaluation
# =========================
if st.button("🚀 Run Evaluation & Submit All Answers"):
    if not os.path.exists("questions.json"):
        st.warning("Please load the GAIA dataset first.")
    else:
        with open("questions.json", "r") as f:
            data = json.load(f)
        questions = data["questions"]

        agent = BasicAgent()
        results = {}

        for i, q in enumerate(questions):
            st.write(f"### Question {i+1}:")
            st.write(q)
            ans = agent(q)
            results[q] = ans
            time.sleep(1)

        # Save answers
        with open("answers.json", "w") as f:
            json.dump(results, f, indent=2)
        st.success("✅ All questions answered and saved as answers.json")

        # Auto-submit via huggingface CLI (if supported)
        st.info("📤 Submitting answers to GAIA leaderboard...")
        os.system("python3 -m smolagents.eval_gaia submit answers.json")
        st.success("🎉 Submission complete! Check your score on the leaderboard.")

# =========================
# 6. Notes
# =========================
st.markdown(
    """
---
### ℹ️ Notes
- You can edit the agent logic inside the `BasicAgent` class to boost performance.  
- Use more reasoning, examples, or API calls for higher accuracy.  
- Make your Space **public** before submitting.

Good luck on the GAIA leaderboard! 🌍
"""
)