tung
local rating file
5fb6c43
import hashlib # <-- added
import os
import tempfile
from datetime import datetime
from pathlib import Path
import datasets
import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download
# ------------------------------------------------------------
# Cloud‑friendly Q/A preference rater for **Hugging Face Spaces**
# ------------------------------------------------------------
# This version swaps local CSV persistence for a tiny remote‑dataset
# workflow that works on Spaces:
# • Ratings are stored in (and loaded from) a lightweight **dataset
# repo** on the Hugging Face Hub – no local file system required.
# • The dataset repo is set via the `RATINGS_REPO` env‑var.
# • You must pass a write‑enabled token (env‑var `HF_TOKEN`) that has
# `write` permission on that dataset.
#
# Quick setup guide
# -----------------
# 1. Create a dataset repository to hold the ratings file, e.g.:
# https://huggingface.co/datasets/<org>/qa‑rater‑data
# 2. Inside **Space Settings ▸ Secrets**, add:
# • `RATINGS_REPO` → <org>/qa‑rater‑data
# • `HF_TOKEN` → a token with *Write* access to that repo
# 3. Add `huggingface‑hub` to your `requirements.txt` or
# `pip install huggingface‑hub` locally.
# 4. Deploy / push your updated Space – ratings will now persist in
# the dataset repo instead of the Space’s ephemeral storage.
# ------------------------------------------------------------
# -----------------------------------------------------------------------------
# Configuration – constants & styling
# -----------------------------------------------------------------------------
DATA_PATH = "human_judgement/selected_samples.json"
RATINGS_FILE = (
"human_judgement_irish_grammatical_test.csv" # Name *inside* the dataset repo
)
# RATINGS_REPO = os.getenv("RATINGS_REPO") # e.g. "org/qa‑rater‑data"
RATINGS_REPO = None
HF_TOKEN = os.getenv("HF_TOKEN") # write token for that repo
MAX_HEIGHT_PX = 400 # Max visible height for answer Markdown blocks
api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
# -----------------------------------------------------------------------------
# Helper functions – data I/O
# -----------------------------------------------------------------------------
def user_bucket(user_id: str, buckets: int = 10) -> int:
"""Deterministically map user_id to 1..buckets."""
h = hashlib.sha256(user_id.encode("utf-8")).hexdigest()
return (int(h, 16) % buckets) + 1
def load_data(user_id: str) -> pd.DataFrame:
"""
Load the split of the dataset assigned to this user (1..10).
Tries several common split naming patterns; falls back to 'train'.
"""
"""
bucket = user_bucket(user_id)
patterns = [
# f"split{bucket}",
# f"split_{bucket}",
# f"fold{bucket}",
# f"fold_{bucket}",
# f"part{bucket}",
f"part_{bucket}",
# f"{bucket}",
]
for split_name in patterns:
try:
ds = datasets.load_dataset("tktung/irish_grammar_test", split=split_name)
df = pd.DataFrame(ds)
break
except Exception:
df = None
if df is None:
# Fallback
ds = datasets.load_dataset("tktung/irish_grammar_test", split="train")
df = pd.DataFrame(ds)
"""
ds = datasets.load_dataset("tktung/irish_grammar_test", split="train")
df = pd.DataFrame(ds)
required = {"question", "response1", "response2"}
if not required.issubset(df.columns):
raise ValueError(f"Dataset must contain columns: {', '.join(required)}")
return df
# ---------- Rating persistence helpers ---------------------------------------
def _download_remote_ratings() -> Path | None:
"""Try to fetch the current ratings file from the Hub; returns path or None."""
if not RATINGS_REPO:
return None
try:
return Path(
hf_hub_download(
repo_id=RATINGS_REPO,
filename=RATINGS_FILE,
repo_type="dataset",
token=HF_TOKEN,
cache_dir=tempfile.gettempdir(),
)
)
except Exception:
# File/repo may not exist yet – caller will create empty DF.
return None
def load_ratings() -> pd.DataFrame:
"""Return ratings DataFrame from remote repo (or empty if none)."""
remote = _download_remote_ratings()
if remote and remote.exists():
df = pd.read_csv(remote)
elif RATINGS_FILE and os.path.exists(RATINGS_FILE):
# Running locally (dev) – load local file if present.
df = pd.read_csv(RATINGS_FILE)
else:
df = pd.DataFrame(
columns=[
"user_id",
"user_bucket", # added
"row_index",
"choice",
"timestamp",
"proficiency",
"is_native",
"studied_second_level",
"studied_third_level",
"uses_for_work",
"usage_frequency",
]
)
# Backward compatibility: ensure new columns exist
required_cols = [
"proficiency",
"is_native",
"studied_second_level",
"studied_third_level",
"uses_for_work",
"usage_frequency",
"user_bucket", # added
]
for col in required_cols:
if col not in df.columns:
df[col] = pd.NA
return df
def _upload_remote_ratings(df: pd.DataFrame):
"""Upload CSV to the dataset repo with a commit per save."""
if not (RATINGS_REPO and api):
# Running locally (dev) – save to a temp file for inspection.
df.to_csv(RATINGS_FILE, index=False)
return
with tempfile.TemporaryDirectory() as tmpdir:
csv_path = Path(tmpdir) / RATINGS_FILE
csv_path.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(csv_path, index=False)
api.upload_file(
path_or_fileobj=str(csv_path),
path_in_repo=RATINGS_FILE,
repo_id=RATINGS_REPO,
repo_type="dataset",
commit_message="Add/Update rating",
)
def save_rating(
user_id: str,
proficiency: str,
is_native: str,
studied_second_level: str,
studied_third_level: str,
uses_for_work: str,
usage_frequency: str,
row_index: int,
choice: int,
):
"""Append a rating (deduplicated) and push to the Hub (stores demographics)."""
ratings = load_ratings()
duplicate = (ratings.user_id == user_id) & (ratings.row_index == row_index)
if duplicate.any():
return
norm = lambda x: (x or "").strip().lower()
bucket = user_bucket(user_id) # added
new_entry = {
"user_id": user_id,
"user_bucket": bucket, # added
"proficiency": norm(proficiency),
"is_native": norm(is_native),
"studied_second_level": norm(studied_second_level),
"studied_third_level": norm(studied_third_level),
"uses_for_work": norm(uses_for_work),
"usage_frequency": norm(usage_frequency),
"row_index": row_index,
"choice": choice,
"timestamp": datetime.utcnow().isoformat(),
}
ratings = pd.concat([ratings, pd.DataFrame([new_entry])], ignore_index=True)
_upload_remote_ratings(ratings)
def get_next_unrated(df: pd.DataFrame, ratings: pd.DataFrame, user_id: str):
rated = ratings.loc[ratings.user_id == user_id, "row_index"].tolist()
unrated = df[~df.index.isin(rated)]
if unrated.empty:
return None
row = unrated.iloc[0]
return row.name, row.question, row.response1, row.response2
def user_progress(user_id: str, state_df) -> str:
"""Return progress string for this user."""
if not isinstance(state_df, pd.DataFrame):
return "Progress: 0 / 0"
ratings = load_ratings()
rated = ratings.loc[ratings.user_id == user_id, "row_index"].nunique()
total = len(state_df)
if total == 0:
return "Progress: 0 / 0"
return f"Progress: {rated} / {total} ({rated/total:.1%})"
# -----------------------------------------------------------------------------
# Gradio callbacks
# -----------------------------------------------------------------------------
def start_or_resume(
user_id: str,
proficiency: str,
is_native: str,
studied_second_level: str,
studied_third_level: str,
uses_for_work: str,
usage_frequency: str,
consent: bool,
state_df, # may be None before first load
):
# If dataset not yet loaded for this session, load user-specific split
if not isinstance(state_df, pd.DataFrame):
try:
state_df = load_data(user_id)
except Exception as e:
progress = user_progress(user_id, state_df)
return (
gr.update(value=user_id, visible=True),
gr.update(visible=False),
gr.update(visible=False),
"",
"",
"",
"",
state_df,
progress,
f"Dataset load failed: {e}",
)
progress = user_progress(user_id, state_df)
# ...existing validation blocks updated to include progress...
if not user_id.strip():
return (
gr.update(value=user_id, visible=True),
gr.update(visible=False),
gr.update(visible=False),
"",
"",
"",
"",
state_df,
progress,
"Please enter a non-empty identifier to begin.",
)
if proficiency not in {"expert", "fluent", "basic"}:
return (
gr.update(value=user_id, visible=True),
gr.update(visible=False),
gr.update(visible=False),
"",
"",
"",
"",
state_df,
progress,
"Please select your language proficiency.",
)
required_yes_no = {
is_native: "Is Native?",
studied_second_level: "Studied Irish At Second Level?",
studied_third_level: "Studied Irish At Third Level?",
uses_for_work: "Use Irish for work?",
}
for val, label in required_yes_no.items():
if val not in {"Yes", "No"}:
return (
gr.update(value=user_id, visible=True),
gr.update(visible=False),
gr.update(visible=False),
"",
"",
"",
"",
state_df,
progress,
f"Please answer: {label}",
)
if usage_frequency not in {"daily", "weekly", "monthly", "yearly"}:
return (
gr.update(value=user_id, visible=True),
gr.update(visible=False),
gr.update(visible=False),
"",
"",
"",
"",
state_df,
progress,
"Please select usage frequency.",
)
if not consent:
return (
gr.update(value=user_id, visible=True),
gr.update(visible=False),
gr.update(visible=False),
"",
"",
"",
"",
state_df,
progress,
"Please provide consent to proceed.",
)
ratings = load_ratings()
record = get_next_unrated(state_df, ratings, user_id)
if record is None:
progress = user_progress(user_id, state_df)
return (
gr.update(value=user_id, visible=True),
gr.update(visible=False),
gr.update(visible=False),
"",
"",
"",
"",
state_df,
progress,
"🎉 You have evaluated every item – thank you!",
)
idx, q, a1, a2 = record
progress = user_progress(user_id, state_df)
return (
gr.update(value=user_id, visible=True),
gr.update(visible=True),
gr.update(visible=True),
"**" + q + "**",
a1,
a2,
str(idx),
state_df,
progress,
"",
)
def submit_preference(
user_id: str,
proficiency: str,
is_native: str,
studied_second_level: str,
studied_third_level: str,
uses_for_work: str,
usage_frequency: str,
row_idx_str: str,
choice: str,
state_df,
):
if choice not in {"Sentence 1", "Sentence 2"}:
progress = user_progress(user_id, state_df)
return (
"",
"",
"",
"",
progress,
"Please choose either Sentence 1 or Sentence 2 before submitting.",
)
row_idx = int(row_idx_str)
save_rating(
user_id,
proficiency,
is_native,
studied_second_level,
studied_third_level,
uses_for_work,
usage_frequency,
row_idx,
1 if choice == "Sentence 1" else 2,
)
ratings = load_ratings()
record = get_next_unrated(state_df, ratings, user_id)
progress = user_progress(user_id, state_df)
if record is None:
return "", "", "", "", progress, "🎉 You have evaluated every item – thank you!"
idx, q, a1, a2 = record
return "**" + q + "**", a1, a2, str(idx), progress, ""
# -----------------------------------------------------------------------------
# Build Gradio interface
# -----------------------------------------------------------------------------
def build_demo():
# Removed upfront dataset load; defer until user supplies ID
# df = load_data()
# CSS to constrain very tall answers
overflow_css = f"""
<style>
.answerbox {{
max-height: {MAX_HEIGHT_PX}px;
overflow-y: auto;
white-space: pre-wrap;
}}
</style>
"""
with gr.Blocks(title="Question/Answer Preference Rater") as demo:
gr.HTML(overflow_css)
gr.Markdown(
"""# Irish Grammatical Test
Enter your identifier below to start or resume. Each sample is a pair of two sentences that varied by a grammatical feature. You should choose the one that you think is correct. Your progress is saved automatically so you can return at any time using the same identifier."""
)
state_df = gr.State(None) # will be filled after hashing user_id
state_row_idx = gr.State("")
# Identifier input
id_input = gr.Textbox(label="User Identifier", placeholder="e.g. alice")
proficiency_radio = gr.Radio(
["expert", "fluent", "basic"],
label="Language proficiency",
info="Select your Irish language proficiency level.",
)
is_native_radio = gr.Radio(
["Yes", "No"], label="Is Native?", info="Are you a native Irish speaker?"
)
studied_second_radio = gr.Radio(
["Yes", "No"],
label="Studied Irish At Second Level?",
info="Did you study Irish in school?",
)
studied_third_radio = gr.Radio(
["Yes", "No"],
label="Studied Irish At Third Level?",
info="Did you study Irish at university/third level?",
)
uses_for_work_radio = gr.Radio(
["Yes", "No"],
label="Use Irish for work?",
info="Do you use Irish in your job?",
)
usage_frequency_radio = gr.Radio(
["daily", "weekly", "monthly", "yearly"],
label="How often do you use Irish?",
)
consent_checkbox = gr.Checkbox( # <-- added
label="I consent to the use of my responses for research purposes."
)
start_btn = gr.Button("Start / Resume")
info_md = gr.Markdown("")
progress_md = gr.Markdown("Progress: 0 / 0") # <-- added
# Evaluation widgets
with gr.Column(visible=False) as eval_col:
question_md = gr.Markdown("")
with gr.Row():
answer1_md = gr.Markdown(label="Sentence A", elem_classes=["answerbox"])
answer2_md = gr.Markdown(label="Sentence B", elem_classes=["answerbox"])
choice_radio = gr.Radio(
["Sentence 1", "Sentence 2"],
label="Which sentence is more grammatically correct?",
)
submit_btn = gr.Button("Submit Preference", visible=False)
# Callbacks wiring (added progress_md in outputs)
start_btn.click(
fn=start_or_resume,
inputs=[
id_input,
proficiency_radio,
is_native_radio,
studied_second_radio,
studied_third_radio,
uses_for_work_radio,
usage_frequency_radio,
consent_checkbox,
state_df,
],
outputs=[
id_input,
eval_col,
submit_btn,
question_md,
answer1_md,
answer2_md,
state_row_idx,
state_df,
progress_md, # <-- added
info_md,
],
)
submit_btn.click(
fn=submit_preference,
inputs=[
id_input,
proficiency_radio,
is_native_radio,
studied_second_radio,
studied_third_radio,
uses_for_work_radio,
usage_frequency_radio,
state_row_idx,
choice_radio,
state_df,
],
outputs=[
question_md,
answer1_md,
answer2_md,
state_row_idx,
progress_md, # <-- added
info_md,
],
)
return demo
if __name__ == "__main__":
build_demo().launch()