File size: 4,213 Bytes
10e9b7d
 
eccf8e4
3c4371f
a3e6286
 
10e9b7d
e80aab9
3db6293
e80aab9
a3e6286
 
 
31243f4
 
a3e6286
 
 
 
 
 
 
 
 
 
31243f4
a3e6286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c4371f
7e4a06b
a3e6286
7e4a06b
a3e6286
3c4371f
7e4a06b
31243f4
 
e80aab9
a3e6286
31243f4
 
 
 
a3e6286
36ed51a
3c4371f
a3e6286
eccf8e4
a3e6286
7d65c66
a3e6286
7d65c66
a3e6286
e80aab9
7d65c66
 
a3e6286
 
 
 
 
31243f4
a3e6286
31243f4
a3e6286
31243f4
a3e6286
 
31243f4
a3e6286
 
 
 
 
e80aab9
a3e6286
e80aab9
7d65c66
e80aab9
 
a3e6286
 
e80aab9
 
a3e6286
 
 
e80aab9
a3e6286
 
 
7d65c66
a3e6286
e80aab9
 
a3e6286
 
 
e80aab9
a3e6286
 
e80aab9
7e4a06b
e80aab9
a3e6286
e80aab9
a3e6286
 
e80aab9
a3e6286
 
 
e80aab9
 
 
a3e6286
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import gradio as gr
import requests
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# -----------------------------------------------------
# REAL AGENT USING HUGGINGFACE MODEL (NO API KEY NEEDED)
# -----------------------------------------------------
class BasicAgent:
    def __init__(self):
        print("Loading HF model...")
        model_name = "Qwen/Qwen2.5-1.5B-Instruct"

        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32,
            device_map="auto"
        )

    def __call__(self, question: str) -> str:

        prompt = f"""
You are a strict exam solver.
Give ONLY the final answer. No explanation. No extra words.

Question:
{question}

Final Answer:
"""

        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)

        output_ids = self.model.generate(
            **inputs,
            max_new_tokens=80,
            temperature=0.1,
            do_sample=False
        )

        decoded = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)

        # lấy câu trả lời sau "Final Answer:"
        if "Final Answer:" in decoded:
            decoded = decoded.split("Final Answer:")[-1].strip()

        return decoded


# -----------------------------------------------------
# RUN + SUBMIT ANSWERS
# -----------------------------------------------------
def run_and_submit_all(profile: gr.OAuthProfile | None):

    space_id = os.getenv("SPACE_ID")

    if profile:
        username = f"{profile.username}"
    else:
        return "Please login to HuggingFace.", None

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # Instantiate Agent
    try:
        agent = BasicAgent()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # Fetch Questions
    try:
        response = requests.get(questions_url, timeout=20)
        response.raise_for_status()
        questions = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    results_log = []
    answers_payload = []

    for q in questions:
        task_id = q["task_id"]
        text = q["question"]

        try:
            ans = agent(text)
        except Exception as e:
            ans = f"ERROR: {e}"

        answers_payload.append({"task_id": task_id, "submitted_answer": ans})
        results_log.append({"Task ID": task_id, "Question": text, "Submitted Answer": ans})

    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code,
        "answers": answers_payload
    }

    # Submit answers
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()

        final_msg = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Score: {result_data.get('score')}%\n"
            f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
            f"Message: {result_data.get('message')}"
        )

        return final_msg, pd.DataFrame(results_log)

    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(results_log)


# -----------------------------------------------------
# GRADIO UI
# -----------------------------------------------------
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 GAIA Unit 4 Agent Evaluation Runner")
    gr.Markdown("Login → Run agent → Auto-submit to leaderboard")

    gr.LoginButton()

    run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers")

    status_box = gr.Textbox(label="Status", lines=5)
    table = gr.DataFrame(label="Agent Answers")

    run_btn.click(
        run_and_submit_all,
        outputs=[status_box, table]
    )

if __name__ == "__main__":
    demo.launch()