import os import gradio as gr import requests import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForCausalLM # --- Constants --- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" # ----------------------------------------------------- # REAL AGENT USING HUGGINGFACE MODEL (NO API KEY NEEDED) # ----------------------------------------------------- class BasicAgent: def __init__(self): print("Loading HF model...") model_name = "Qwen/Qwen2.5-1.5B-Instruct" self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float32, device_map="auto" ) def __call__(self, question: str) -> str: prompt = f""" You are a strict exam solver. Give ONLY the final answer. No explanation. No extra words. Question: {question} Final Answer: """ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) output_ids = self.model.generate( **inputs, max_new_tokens=80, temperature=0.1, do_sample=False ) decoded = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) # lấy câu trả lời sau "Final Answer:" if "Final Answer:" in decoded: decoded = decoded.split("Final Answer:")[-1].strip() return decoded # ----------------------------------------------------- # RUN + SUBMIT ANSWERS # ----------------------------------------------------- def run_and_submit_all(profile: gr.OAuthProfile | None): space_id = os.getenv("SPACE_ID") if profile: username = f"{profile.username}" else: return "Please login to HuggingFace.", None api_url = DEFAULT_API_URL questions_url = f"{api_url}/questions" submit_url = f"{api_url}/submit" # Instantiate Agent try: agent = BasicAgent() except Exception as e: return f"Error initializing agent: {e}", None agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" # Fetch Questions try: response = requests.get(questions_url, timeout=20) response.raise_for_status() questions = response.json() except Exception as e: return f"Error fetching questions: {e}", None results_log = [] answers_payload = [] for q in questions: task_id = q["task_id"] text = q["question"] try: ans = agent(text) except Exception as e: ans = f"ERROR: {e}" answers_payload.append({"task_id": task_id, "submitted_answer": ans}) results_log.append({"Task ID": task_id, "Question": text, "Submitted Answer": ans}) submission_data = { "username": username.strip(), "agent_code": agent_code, "answers": answers_payload } # Submit answers try: response = requests.post(submit_url, json=submission_data, timeout=60) response.raise_for_status() result_data = response.json() final_msg = ( f"Submission Successful!\n" f"User: {result_data.get('username')}\n" f"Score: {result_data.get('score')}%\n" f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n" f"Message: {result_data.get('message')}" ) return final_msg, pd.DataFrame(results_log) except Exception as e: return f"Submission Failed: {e}", pd.DataFrame(results_log) # ----------------------------------------------------- # GRADIO UI # ----------------------------------------------------- with gr.Blocks() as demo: gr.Markdown("# 🧠 GAIA Unit 4 Agent Evaluation Runner") gr.Markdown("Login → Run agent → Auto-submit to leaderboard") gr.LoginButton() run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers") status_box = gr.Textbox(label="Status", lines=5) table = gr.DataFrame(label="Agent Answers") run_btn.click( run_and_submit_all, outputs=[status_box, table] ) if __name__ == "__main__": demo.launch()