File size: 5,296 Bytes
00dd226
 
 
 
b4f20f1
0dd0640
 
12c1e0f
 
0dd0640
00dd226
 
 
b4f20f1
a2709ae
 
64188e4
00dd226
 
a2709ae
5c082e6
12c1e0f
 
 
 
 
 
 
 
30c59be
12c1e0f
6e9c3e4
12c1e0f
 
0dd0640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4498d52
 
12c1e0f
 
 
 
 
 
 
 
 
 
 
 
 
0dd0640
12c1e0f
 
 
 
8c360c1
12c1e0f
 
 
 
 
 
00dd226
a2709ae
00dd226
 
 
12c1e0f
b4f20f1
00dd226
a2709ae
 
 
00dd226
12c1e0f
00dd226
12c1e0f
 
fe2fa1d
a2709ae
00dd226
a2709ae
 
00dd226
a2709ae
12c1e0f
 
0dd0640
12c1e0f
 
 
0dd0640
 
 
a2709ae
 
00dd226
 
a2709ae
 
 
 
 
 
 
 
 
 
12c1e0f
00dd226
12c1e0f
00dd226
 
fe2fa1d
 
00dd226
 
 
 
12c1e0f
00dd226
 
12c1e0f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import gradio as gr
import requests
import pandas as pd
from dotenv import load_dotenv
from langchain import hub
from langchain_openai import ChatOpenAI 
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import AIMessage, HumanMessage
from tools.web_searcher import web_search_tool
from tools.calculator import calculator_tool
from tools.file_reader import read_file_tool
from tools.code_review import code_reviewer
from tools.web_scraper import web_scraper_tool
import time

load_dotenv()

DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# === Tools ===
tools = [
    web_search_tool,
    calculator_tool,
    read_file_tool,
    code_reviewer,
    web_scraper_tool
]

# === Model ===
model = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY"))

# === ReAct Prompt ===
template = '''Answer the following questions as best you can. You have access to the following tools:

{tools}

Strict rules to follow:
1️⃣ Use tools when needed: web_search_tool for external information, file_reader for file data, calculator for math, code_reviewer for code questions, web_scraper for web content.
2️⃣ Combine tools logically for multi-step problems.
3️⃣ Format answers exactly as requested: single name, city, code, or number—no extra text.
4️⃣ If a link is provided, use web_search_tool or web_scraper to extract the information.
5️⃣ Do not guess; if information is unavailable, say: 'No answer found.'
6️⃣ Be precise, factual, and avoid hallucination. Verify using tools.

Use the following format for every question:

Question: the input question you must answer
Thought: reasoning about what to do
Action: the action to take, one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}'''

react_prompt = PromptTemplate.from_template(template)

# === ReAct Agent ===
agent = create_react_agent(model, tools, react_prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# === Gradio Interface ===
def process_question(profile: gr.OAuthProfile | None):
    if profile is None:
        return "Please log in first."

    username = profile.username
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"
    agent_code = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"

    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}"

    answers_payload = []
    results_log = []

    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or not question_text:
            continue

        try:
            result = agent_executor.invoke({"input": question_text})
            final_answer = result["output"] if isinstance(result, dict) else result
            answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer})
        except Exception as e:
            answers_payload.append({"task_id": task_id, "submitted_answer": f"[ERROR: {e}]"})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"[ERROR: {e}]"})
        time.sleep(1)  # slight delay to avoid rate limits

    submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        return final_status
    except Exception as e:
        return f"Submission Failed: {e}"

with gr.Blocks() as demo:
    gr.Markdown("# GAIA Agent Evaluation")
    gr.Markdown("Login, run evaluation, and submit your answers below.")
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)

    run_button.click(fn=process_question, outputs=[status_output])

if __name__ == "__main__":
    print("Launching GAIA Agent ReAct Evaluation App")
    demo.launch()