Spaces:

tktung
/

irish_grammatical_test

Sleeping

tung

local rating file

5fb6c43 2 months ago

18 kB

	import hashlib # <-- added
	import os
	import tempfile
	from datetime import datetime
	from pathlib import Path

	import datasets
	import gradio as gr
	import pandas as pd
	from huggingface_hub import HfApi, hf_hub_download

	# ------------------------------------------------------------
	# Cloud‑friendly Q/A preference rater for Hugging Face Spaces
	# ------------------------------------------------------------
	# This version swaps local CSV persistence for a tiny remote‑dataset
	# workflow that works on Spaces:
	# • Ratings are stored in (and loaded from) a lightweight **dataset
	# repo** on the Hugging Face Hub – no local file system required.
	# • The dataset repo is set via the `RATINGS_REPO` env‑var.
	# • You must pass a write‑enabled token (env‑var `HF_TOKEN`) that has
	# `write` permission on that dataset.
	#
	# Quick setup guide
	# -----------------
	# 1. Create a dataset repository to hold the ratings file, e.g.:
	# https://huggingface.co/datasets/<org>/qa‑rater‑data
	# 2. Inside Space Settings ▸ Secrets, add:
	# • `RATINGS_REPO` → <org>/qa‑rater‑data
	# • `HF_TOKEN` → a token with Write access to that repo
	# 3. Add `huggingface‑hub` to your `requirements.txt` or
	# `pip install huggingface‑hub` locally.
	# 4. Deploy / push your updated Space – ratings will now persist in
	# the dataset repo instead of the Space’s ephemeral storage.
	# ------------------------------------------------------------


	# -----------------------------------------------------------------------------
	# Configuration – constants & styling
	# -----------------------------------------------------------------------------
	DATA_PATH = "human_judgement/selected_samples.json"
	RATINGS_FILE = (
	"human_judgement_irish_grammatical_test.csv" # Name inside the dataset repo
	)
	# RATINGS_REPO = os.getenv("RATINGS_REPO") # e.g. "org/qa‑rater‑data"
	RATINGS_REPO = None
	HF_TOKEN = os.getenv("HF_TOKEN") # write token for that repo
	MAX_HEIGHT_PX = 400 # Max visible height for answer Markdown blocks

	api = HfApi(token=HF_TOKEN) if HF_TOKEN else None

	# -----------------------------------------------------------------------------
	# Helper functions – data I/O
	# -----------------------------------------------------------------------------


	def user_bucket(user_id: str, buckets: int = 10) -> int:
	"""Deterministically map user_id to 1..buckets."""
	h = hashlib.sha256(user_id.encode("utf-8")).hexdigest()
	return (int(h, 16) % buckets) + 1


	def load_data(user_id: str) -> pd.DataFrame:
	"""
	Load the split of the dataset assigned to this user (1..10).
	Tries several common split naming patterns; falls back to 'train'.
	"""
	"""
	bucket = user_bucket(user_id)
	patterns = [
	# f"split{bucket}",
	# f"split_{bucket}",
	# f"fold{bucket}",
	# f"fold_{bucket}",
	# f"part{bucket}",
	f"part_{bucket}",
	# f"{bucket}",
	]
	for split_name in patterns:
	try:
	ds = datasets.load_dataset("tktung/irish_grammar_test", split=split_name)
	df = pd.DataFrame(ds)
	break
	except Exception:
	df = None
	if df is None:
	# Fallback
	ds = datasets.load_dataset("tktung/irish_grammar_test", split="train")
	df = pd.DataFrame(ds)
	"""
	ds = datasets.load_dataset("tktung/irish_grammar_test", split="train")
	df = pd.DataFrame(ds)
	required = {"question", "response1", "response2"}
	if not required.issubset(df.columns):
	raise ValueError(f"Dataset must contain columns: {', '.join(required)}")
	return df


	# ---------- Rating persistence helpers ---------------------------------------


	def _download_remote_ratings() -> Path \| None:
	"""Try to fetch the current ratings file from the Hub; returns path or None."""
	if not RATINGS_REPO:
	return None
	try:
	return Path(
	hf_hub_download(
	repo_id=RATINGS_REPO,
	filename=RATINGS_FILE,
	repo_type="dataset",
	token=HF_TOKEN,
	cache_dir=tempfile.gettempdir(),
	)
	)
	except Exception:
	# File/repo may not exist yet – caller will create empty DF.
	return None


	def load_ratings() -> pd.DataFrame:
	"""Return ratings DataFrame from remote repo (or empty if none)."""
	remote = _download_remote_ratings()
	if remote and remote.exists():
	df = pd.read_csv(remote)
	elif RATINGS_FILE and os.path.exists(RATINGS_FILE):
	# Running locally (dev) – load local file if present.
	df = pd.read_csv(RATINGS_FILE)
	else:
	df = pd.DataFrame(
	columns=[
	"user_id",
	"user_bucket", # added
	"row_index",
	"choice",
	"timestamp",
	"proficiency",
	"is_native",
	"studied_second_level",
	"studied_third_level",
	"uses_for_work",
	"usage_frequency",
	]
	)
	# Backward compatibility: ensure new columns exist
	required_cols = [
	"proficiency",
	"is_native",
	"studied_second_level",
	"studied_third_level",
	"uses_for_work",
	"usage_frequency",
	"user_bucket", # added
	]
	for col in required_cols:
	if col not in df.columns:
	df[col] = pd.NA
	return df


	def _upload_remote_ratings(df: pd.DataFrame):
	"""Upload CSV to the dataset repo with a commit per save."""
	if not (RATINGS_REPO and api):
	# Running locally (dev) – save to a temp file for inspection.
	df.to_csv(RATINGS_FILE, index=False)
	return

	with tempfile.TemporaryDirectory() as tmpdir:
	csv_path = Path(tmpdir) / RATINGS_FILE
	csv_path.parent.mkdir(parents=True, exist_ok=True)
	df.to_csv(csv_path, index=False)
	api.upload_file(
	path_or_fileobj=str(csv_path),
	path_in_repo=RATINGS_FILE,
	repo_id=RATINGS_REPO,
	repo_type="dataset",
	commit_message="Add/Update rating",
	)


	def save_rating(
	user_id: str,
	proficiency: str,
	is_native: str,
	studied_second_level: str,
	studied_third_level: str,
	uses_for_work: str,
	usage_frequency: str,
	row_index: int,
	choice: int,
	):
	"""Append a rating (deduplicated) and push to the Hub (stores demographics)."""
	ratings = load_ratings()
	duplicate = (ratings.user_id == user_id) & (ratings.row_index == row_index)
	if duplicate.any():
	return
	norm = lambda x: (x or "").strip().lower()
	bucket = user_bucket(user_id) # added
	new_entry = {
	"user_id": user_id,
	"user_bucket": bucket, # added
	"proficiency": norm(proficiency),
	"is_native": norm(is_native),
	"studied_second_level": norm(studied_second_level),
	"studied_third_level": norm(studied_third_level),
	"uses_for_work": norm(uses_for_work),
	"usage_frequency": norm(usage_frequency),
	"row_index": row_index,
	"choice": choice,
	"timestamp": datetime.utcnow().isoformat(),
	}
	ratings = pd.concat([ratings, pd.DataFrame([new_entry])], ignore_index=True)
	_upload_remote_ratings(ratings)


	def get_next_unrated(df: pd.DataFrame, ratings: pd.DataFrame, user_id: str):
	rated = ratings.loc[ratings.user_id == user_id, "row_index"].tolist()
	unrated = df[~df.index.isin(rated)]
	if unrated.empty:
	return None
	row = unrated.iloc[0]
	return row.name, row.question, row.response1, row.response2


	def user_progress(user_id: str, state_df) -> str:
	"""Return progress string for this user."""
	if not isinstance(state_df, pd.DataFrame):
	return "Progress: 0 / 0"
	ratings = load_ratings()
	rated = ratings.loc[ratings.user_id == user_id, "row_index"].nunique()
	total = len(state_df)
	if total == 0:
	return "Progress: 0 / 0"
	return f"Progress: {rated} / {total} ({rated/total:.1%})"


	# -----------------------------------------------------------------------------
	# Gradio callbacks
	# -----------------------------------------------------------------------------


	def start_or_resume(
	user_id: str,
	proficiency: str,
	is_native: str,
	studied_second_level: str,
	studied_third_level: str,
	uses_for_work: str,
	usage_frequency: str,
	consent: bool,
	state_df, # may be None before first load
	):
	# If dataset not yet loaded for this session, load user-specific split
	if not isinstance(state_df, pd.DataFrame):
	try:
	state_df = load_data(user_id)
	except Exception as e:
	progress = user_progress(user_id, state_df)
	return (
	gr.update(value=user_id, visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	"",
	"",
	"",
	"",
	state_df,
	progress,
	f"Dataset load failed: {e}",
	)
	progress = user_progress(user_id, state_df)
	# ...existing validation blocks updated to include progress...
	if not user_id.strip():
	return (
	gr.update(value=user_id, visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	"",
	"",
	"",
	"",
	state_df,
	progress,
	"Please enter a non-empty identifier to begin.",
	)
	if proficiency not in {"expert", "fluent", "basic"}:
	return (
	gr.update(value=user_id, visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	"",
	"",
	"",
	"",
	state_df,
	progress,
	"Please select your language proficiency.",
	)
	required_yes_no = {
	is_native: "Is Native?",
	studied_second_level: "Studied Irish At Second Level?",
	studied_third_level: "Studied Irish At Third Level?",
	uses_for_work: "Use Irish for work?",
	}
	for val, label in required_yes_no.items():
	if val not in {"Yes", "No"}:
	return (
	gr.update(value=user_id, visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	"",
	"",
	"",
	"",
	state_df,
	progress,
	f"Please answer: {label}",
	)
	if usage_frequency not in {"daily", "weekly", "monthly", "yearly"}:
	return (
	gr.update(value=user_id, visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	"",
	"",
	"",
	"",
	state_df,
	progress,
	"Please select usage frequency.",
	)
	if not consent:
	return (
	gr.update(value=user_id, visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	"",
	"",
	"",
	"",
	state_df,
	progress,
	"Please provide consent to proceed.",
	)
	ratings = load_ratings()
	record = get_next_unrated(state_df, ratings, user_id)
	if record is None:
	progress = user_progress(user_id, state_df)
	return (
	gr.update(value=user_id, visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	"",
	"",
	"",
	"",
	state_df,
	progress,
	"🎉 You have evaluated every item – thank you!",
	)
	idx, q, a1, a2 = record
	progress = user_progress(user_id, state_df)
	return (
	gr.update(value=user_id, visible=True),
	gr.update(visible=True),
	gr.update(visible=True),
	"" + q + "",
	a1,
	a2,
	str(idx),
	state_df,
	progress,
	"",
	)


	def submit_preference(
	user_id: str,
	proficiency: str,
	is_native: str,
	studied_second_level: str,
	studied_third_level: str,
	uses_for_work: str,
	usage_frequency: str,
	row_idx_str: str,
	choice: str,
	state_df,
	):
	if choice not in {"Sentence 1", "Sentence 2"}:
	progress = user_progress(user_id, state_df)
	return (
	"",
	"",
	"",
	"",
	progress,
	"Please choose either Sentence 1 or Sentence 2 before submitting.",
	)
	row_idx = int(row_idx_str)
	save_rating(
	user_id,
	proficiency,
	is_native,
	studied_second_level,
	studied_third_level,
	uses_for_work,
	usage_frequency,
	row_idx,
	1 if choice == "Sentence 1" else 2,
	)
	ratings = load_ratings()
	record = get_next_unrated(state_df, ratings, user_id)
	progress = user_progress(user_id, state_df)
	if record is None:
	return "", "", "", "", progress, "🎉 You have evaluated every item – thank you!"
	idx, q, a1, a2 = record
	return "" + q + "", a1, a2, str(idx), progress, ""


	# -----------------------------------------------------------------------------
	# Build Gradio interface
	# -----------------------------------------------------------------------------


	def build_demo():
	# Removed upfront dataset load; defer until user supplies ID
	# df = load_data()

	# CSS to constrain very tall answers
	overflow_css = f"""
	<style>
	.answerbox {{
	max-height: {MAX_HEIGHT_PX}px;
	overflow-y: auto;
	white-space: pre-wrap;
	}}
	</style>
	"""

	with gr.Blocks(title="Question/Answer Preference Rater") as demo:
	gr.HTML(overflow_css)

	gr.Markdown(
	"""# Irish Grammatical Test
	Enter your identifier below to start or resume. Each sample is a pair of two sentences that varied by a grammatical feature. You should choose the one that you think is correct. Your progress is saved automatically so you can return at any time using the same identifier."""
	)

	state_df = gr.State(None) # will be filled after hashing user_id
	state_row_idx = gr.State("")

	# Identifier input
	id_input = gr.Textbox(label="User Identifier", placeholder="e.g. alice")
	proficiency_radio = gr.Radio(
	["expert", "fluent", "basic"],
	label="Language proficiency",
	info="Select your Irish language proficiency level.",
	)
	is_native_radio = gr.Radio(
	["Yes", "No"], label="Is Native?", info="Are you a native Irish speaker?"
	)
	studied_second_radio = gr.Radio(
	["Yes", "No"],
	label="Studied Irish At Second Level?",
	info="Did you study Irish in school?",
	)
	studied_third_radio = gr.Radio(
	["Yes", "No"],
	label="Studied Irish At Third Level?",
	info="Did you study Irish at university/third level?",
	)
	uses_for_work_radio = gr.Radio(
	["Yes", "No"],
	label="Use Irish for work?",
	info="Do you use Irish in your job?",
	)
	usage_frequency_radio = gr.Radio(
	["daily", "weekly", "monthly", "yearly"],
	label="How often do you use Irish?",
	)
	consent_checkbox = gr.Checkbox( # <-- added
	label="I consent to the use of my responses for research purposes."
	)
	start_btn = gr.Button("Start / Resume")

	info_md = gr.Markdown("")
	progress_md = gr.Markdown("Progress: 0 / 0") # <-- added

	# Evaluation widgets
	with gr.Column(visible=False) as eval_col:
	question_md = gr.Markdown("")
	with gr.Row():
	answer1_md = gr.Markdown(label="Sentence A", elem_classes=["answerbox"])
	answer2_md = gr.Markdown(label="Sentence B", elem_classes=["answerbox"])
	choice_radio = gr.Radio(
	["Sentence 1", "Sentence 2"],
	label="Which sentence is more grammatically correct?",
	)
	submit_btn = gr.Button("Submit Preference", visible=False)

	# Callbacks wiring (added progress_md in outputs)
	start_btn.click(
	fn=start_or_resume,
	inputs=[
	id_input,
	proficiency_radio,
	is_native_radio,
	studied_second_radio,
	studied_third_radio,
	uses_for_work_radio,
	usage_frequency_radio,
	consent_checkbox,
	state_df,
	],
	outputs=[
	id_input,
	eval_col,
	submit_btn,
	question_md,
	answer1_md,
	answer2_md,
	state_row_idx,
	state_df,
	progress_md, # <-- added
	info_md,
	],
	)

	submit_btn.click(
	fn=submit_preference,
	inputs=[
	id_input,
	proficiency_radio,
	is_native_radio,
	studied_second_radio,
	studied_third_radio,
	uses_for_work_radio,
	usage_frequency_radio,
	state_row_idx,
	choice_radio,
	state_df,
	],
	outputs=[
	question_md,
	answer1_md,
	answer2_md,
	state_row_idx,
	progress_md, # <-- added
	info_md,
	],
	)
	return demo


	if __name__ == "__main__":
	build_demo().launch()