Spaces:
Sleeping
Sleeping
File size: 5,296 Bytes
00dd226 b4f20f1 0dd0640 12c1e0f 0dd0640 00dd226 b4f20f1 a2709ae 64188e4 00dd226 a2709ae 5c082e6 12c1e0f 30c59be 12c1e0f 6e9c3e4 12c1e0f 0dd0640 4498d52 12c1e0f 0dd0640 12c1e0f 8c360c1 12c1e0f 00dd226 a2709ae 00dd226 12c1e0f b4f20f1 00dd226 a2709ae 00dd226 12c1e0f 00dd226 12c1e0f fe2fa1d a2709ae 00dd226 a2709ae 00dd226 a2709ae 12c1e0f 0dd0640 12c1e0f 0dd0640 a2709ae 00dd226 a2709ae 12c1e0f 00dd226 12c1e0f 00dd226 fe2fa1d 00dd226 12c1e0f 00dd226 12c1e0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import os
import gradio as gr
import requests
import pandas as pd
from dotenv import load_dotenv
from langchain import hub
from langchain_openai import ChatOpenAI
from langchain.agents import create_react_agent, AgentExecutor
from langchain_core.prompts import PromptTemplate
from langchain_core.messages import AIMessage, HumanMessage
from tools.web_searcher import web_search_tool
from tools.calculator import calculator_tool
from tools.file_reader import read_file_tool
from tools.code_review import code_reviewer
from tools.web_scraper import web_scraper_tool
import time
load_dotenv()
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# === Tools ===
tools = [
web_search_tool,
calculator_tool,
read_file_tool,
code_reviewer,
web_scraper_tool
]
# === Model ===
model = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY"))
# === ReAct Prompt ===
template = '''Answer the following questions as best you can. You have access to the following tools:
{tools}
Strict rules to follow:
1️⃣ Use tools when needed: web_search_tool for external information, file_reader for file data, calculator for math, code_reviewer for code questions, web_scraper for web content.
2️⃣ Combine tools logically for multi-step problems.
3️⃣ Format answers exactly as requested: single name, city, code, or number—no extra text.
4️⃣ If a link is provided, use web_search_tool or web_scraper to extract the information.
5️⃣ Do not guess; if information is unavailable, say: 'No answer found.'
6️⃣ Be precise, factual, and avoid hallucination. Verify using tools.
Use the following format for every question:
Question: the input question you must answer
Thought: reasoning about what to do
Action: the action to take, one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
Use the following format:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question
Begin!
Question: {input}
Thought:{agent_scratchpad}'''
react_prompt = PromptTemplate.from_template(template)
# === ReAct Agent ===
agent = create_react_agent(model, tools, react_prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
# === Gradio Interface ===
def process_question(profile: gr.OAuthProfile | None):
if profile is None:
return "Please log in first."
username = profile.username
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
agent_code = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
except Exception as e:
return f"Error fetching questions: {e}"
answers_payload = []
results_log = []
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or not question_text:
continue
try:
result = agent_executor.invoke({"input": question_text})
final_answer = result["output"] if isinstance(result, dict) else result
answers_payload.append({"task_id": task_id, "submitted_answer": final_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer})
except Exception as e:
answers_payload.append({"task_id": task_id, "submitted_answer": f"[ERROR: {e}]"})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"[ERROR: {e}]"})
time.sleep(1) # slight delay to avoid rate limits
submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'No message received.')}"
)
return final_status
except Exception as e:
return f"Submission Failed: {e}"
with gr.Blocks() as demo:
gr.Markdown("# GAIA Agent Evaluation")
gr.Markdown("Login, run evaluation, and submit your answers below.")
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
run_button.click(fn=process_question, outputs=[status_output])
if __name__ == "__main__":
print("Launching GAIA Agent ReAct Evaluation App")
demo.launch()
|