File size: 4,213 Bytes
10e9b7d eccf8e4 3c4371f a3e6286 10e9b7d e80aab9 3db6293 e80aab9 a3e6286 31243f4 a3e6286 31243f4 a3e6286 3c4371f 7e4a06b a3e6286 7e4a06b a3e6286 3c4371f 7e4a06b 31243f4 e80aab9 a3e6286 31243f4 a3e6286 36ed51a 3c4371f a3e6286 eccf8e4 a3e6286 7d65c66 a3e6286 7d65c66 a3e6286 e80aab9 7d65c66 a3e6286 31243f4 a3e6286 31243f4 a3e6286 31243f4 a3e6286 31243f4 a3e6286 e80aab9 a3e6286 e80aab9 7d65c66 e80aab9 a3e6286 e80aab9 a3e6286 e80aab9 a3e6286 7d65c66 a3e6286 e80aab9 a3e6286 e80aab9 a3e6286 e80aab9 7e4a06b e80aab9 a3e6286 e80aab9 a3e6286 e80aab9 a3e6286 e80aab9 a3e6286 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import os
import gradio as gr
import requests
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# -----------------------------------------------------
# REAL AGENT USING HUGGINGFACE MODEL (NO API KEY NEEDED)
# -----------------------------------------------------
class BasicAgent:
def __init__(self):
print("Loading HF model...")
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float32,
device_map="auto"
)
def __call__(self, question: str) -> str:
prompt = f"""
You are a strict exam solver.
Give ONLY the final answer. No explanation. No extra words.
Question:
{question}
Final Answer:
"""
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
output_ids = self.model.generate(
**inputs,
max_new_tokens=80,
temperature=0.1,
do_sample=False
)
decoded = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
# lấy câu trả lời sau "Final Answer:"
if "Final Answer:" in decoded:
decoded = decoded.split("Final Answer:")[-1].strip()
return decoded
# -----------------------------------------------------
# RUN + SUBMIT ANSWERS
# -----------------------------------------------------
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username = f"{profile.username}"
else:
return "Please login to HuggingFace.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# Instantiate Agent
try:
agent = BasicAgent()
except Exception as e:
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
# Fetch Questions
try:
response = requests.get(questions_url, timeout=20)
response.raise_for_status()
questions = response.json()
except Exception as e:
return f"Error fetching questions: {e}", None
results_log = []
answers_payload = []
for q in questions:
task_id = q["task_id"]
text = q["question"]
try:
ans = agent(text)
except Exception as e:
ans = f"ERROR: {e}"
answers_payload.append({"task_id": task_id, "submitted_answer": ans})
results_log.append({"Task ID": task_id, "Question": text, "Submitted Answer": ans})
submission_data = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload
}
# Submit answers
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_msg = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Score: {result_data.get('score')}%\n"
f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
f"Message: {result_data.get('message')}"
)
return final_msg, pd.DataFrame(results_log)
except Exception as e:
return f"Submission Failed: {e}", pd.DataFrame(results_log)
# -----------------------------------------------------
# GRADIO UI
# -----------------------------------------------------
with gr.Blocks() as demo:
gr.Markdown("# 🧠 GAIA Unit 4 Agent Evaluation Runner")
gr.Markdown("Login → Run agent → Auto-submit to leaderboard")
gr.LoginButton()
run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers")
status_box = gr.Textbox(label="Status", lines=5)
table = gr.DataFrame(label="Agent Answers")
run_btn.click(
run_and_submit_all,
outputs=[status_box, table]
)
if __name__ == "__main__":
demo.launch()
|