Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| from langchain import hub | |
| from langchain_openai import ChatOpenAI | |
| from langchain.agents import create_react_agent, AgentExecutor | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_core.messages import AIMessage, HumanMessage | |
| from tools.web_searcher import web_search_tool | |
| from tools.calculator import calculator_tool | |
| from tools.file_reader import read_file_tool | |
| from tools.code_review import code_reviewer | |
| from tools.web_scraper import web_scraper_tool | |
| import time | |
| load_dotenv() | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| # === Tools === | |
| tools = [ | |
| web_search_tool, | |
| calculator_tool, | |
| read_file_tool, | |
| code_reviewer, | |
| web_scraper_tool | |
| ] | |
| # === Model === | |
| model = ChatOpenAI(model="gpt-4o", api_key=os.getenv("OPENAI_API_KEY")) | |
| # === ReAct Prompt === | |
| template = '''Answer the following questions as best you can. You have access to the following tools: | |
| {tools} | |
| Strict rules to follow: | |
| 1️⃣ Use tools when needed: web_search_tool for external information, file_reader for file data, calculator for math, code_reviewer for code questions, web_scraper for web content. | |
| 2️⃣ Combine tools logically for multi-step problems. | |
| 3️⃣ Format answers exactly as requested: single name, city, code, or number—no extra text. | |
| 4️⃣ If a link is provided, use web_search_tool or web_scraper to extract the information. | |
| 5️⃣ Do not guess; if information is unavailable, say: 'No answer found.' | |
| 6️⃣ Be precise, factual, and avoid hallucination. Verify using tools. | |
| Use the following format for every question: | |
| Question: the input question you must answer | |
| Thought: reasoning about what to do | |
| Action: the action to take, one of [{tool_names}] | |
| Action Input: the input to the action | |
| Observation: the result of the action | |
| Use the following format: | |
| Question: the input question you must answer | |
| Thought: you should always think about what to do | |
| Action: the action to take, should be one of [{tool_names}] | |
| Action Input: the input to the action | |
| Observation: the result of the action | |
| (this Thought/Action/Action Input/Observation can repeat N times) | |
| Thought: I now know the final answer | |
| Final Answer: the final answer to the original input question | |
| Begin! | |
| Question: {input} | |
| Thought:{agent_scratchpad}''' | |
| react_prompt = PromptTemplate.from_template(template) | |
| # === ReAct Agent === | |
| agent = create_react_agent(model, tools, react_prompt) | |
| agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True) | |
| # === Gradio Interface === | |
| def process_question(profile: gr.OAuthProfile | None): | |
| if profile is None: | |
| return "Please log in first." | |
| username = profile.username | |
| api_url = DEFAULT_API_URL | |
| questions_url = f"{api_url}/questions" | |
| submit_url = f"{api_url}/submit" | |
| agent_code = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main" | |
| try: | |
| response = requests.get(questions_url, timeout=15) | |
| response.raise_for_status() | |
| questions_data = response.json() | |
| except Exception as e: | |
| return f"Error fetching questions: {e}" | |
| answers_payload = [] | |
| results_log = [] | |
| for item in questions_data: | |
| task_id = item.get("task_id") | |
| question_text = item.get("question") | |
| if not task_id or not question_text: | |
| continue | |
| try: | |
| result = agent_executor.invoke({"input": question_text}) | |
| final_answer = result["output"] if isinstance(result, dict) else result | |
| answers_payload.append({"task_id": task_id, "submitted_answer": final_answer}) | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": final_answer}) | |
| except Exception as e: | |
| answers_payload.append({"task_id": task_id, "submitted_answer": f"[ERROR: {e}]"}) | |
| results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"[ERROR: {e}]"}) | |
| time.sleep(1) # slight delay to avoid rate limits | |
| submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload} | |
| try: | |
| response = requests.post(submit_url, json=submission_data, timeout=60) | |
| response.raise_for_status() | |
| result_data = response.json() | |
| final_status = ( | |
| f"Submission Successful!\n" | |
| f"User: {result_data.get('username')}\n" | |
| f"Overall Score: {result_data.get('score', 'N/A')}% " | |
| f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n" | |
| f"Message: {result_data.get('message', 'No message received.')}" | |
| ) | |
| return final_status | |
| except Exception as e: | |
| return f"Submission Failed: {e}" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# GAIA Agent Evaluation") | |
| gr.Markdown("Login, run evaluation, and submit your answers below.") | |
| gr.LoginButton() | |
| run_button = gr.Button("Run Evaluation & Submit All Answers") | |
| status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) | |
| run_button.click(fn=process_question, outputs=[status_output]) | |
| if __name__ == "__main__": | |
| print("Launching GAIA Agent ReAct Evaluation App") | |
| demo.launch() | |