Spaces:
Sleeping
Sleeping
| import hashlib # <-- added | |
| import os | |
| import tempfile | |
| from datetime import datetime | |
| from pathlib import Path | |
| import datasets | |
| import gradio as gr | |
| import pandas as pd | |
| from huggingface_hub import HfApi, hf_hub_download | |
| # ------------------------------------------------------------ | |
| # Cloud‑friendly Q/A preference rater for **Hugging Face Spaces** | |
| # ------------------------------------------------------------ | |
| # This version swaps local CSV persistence for a tiny remote‑dataset | |
| # workflow that works on Spaces: | |
| # • Ratings are stored in (and loaded from) a lightweight **dataset | |
| # repo** on the Hugging Face Hub – no local file system required. | |
| # • The dataset repo is set via the `RATINGS_REPO` env‑var. | |
| # • You must pass a write‑enabled token (env‑var `HF_TOKEN`) that has | |
| # `write` permission on that dataset. | |
| # | |
| # Quick setup guide | |
| # ----------------- | |
| # 1. Create a dataset repository to hold the ratings file, e.g.: | |
| # https://huggingface.co/datasets/<org>/qa‑rater‑data | |
| # 2. Inside **Space Settings ▸ Secrets**, add: | |
| # • `RATINGS_REPO` → <org>/qa‑rater‑data | |
| # • `HF_TOKEN` → a token with *Write* access to that repo | |
| # 3. Add `huggingface‑hub` to your `requirements.txt` or | |
| # `pip install huggingface‑hub` locally. | |
| # 4. Deploy / push your updated Space – ratings will now persist in | |
| # the dataset repo instead of the Space’s ephemeral storage. | |
| # ------------------------------------------------------------ | |
| # ----------------------------------------------------------------------------- | |
| # Configuration – constants & styling | |
| # ----------------------------------------------------------------------------- | |
| DATA_PATH = "human_judgement/selected_samples.json" | |
| RATINGS_FILE = ( | |
| "human_judgement_irish_grammatical_test.csv" # Name *inside* the dataset repo | |
| ) | |
| # RATINGS_REPO = os.getenv("RATINGS_REPO") # e.g. "org/qa‑rater‑data" | |
| RATINGS_REPO = None | |
| HF_TOKEN = os.getenv("HF_TOKEN") # write token for that repo | |
| MAX_HEIGHT_PX = 400 # Max visible height for answer Markdown blocks | |
| api = HfApi(token=HF_TOKEN) if HF_TOKEN else None | |
| # ----------------------------------------------------------------------------- | |
| # Helper functions – data I/O | |
| # ----------------------------------------------------------------------------- | |
| def user_bucket(user_id: str, buckets: int = 10) -> int: | |
| """Deterministically map user_id to 1..buckets.""" | |
| h = hashlib.sha256(user_id.encode("utf-8")).hexdigest() | |
| return (int(h, 16) % buckets) + 1 | |
| def load_data(user_id: str) -> pd.DataFrame: | |
| """ | |
| Load the split of the dataset assigned to this user (1..10). | |
| Tries several common split naming patterns; falls back to 'train'. | |
| """ | |
| """ | |
| bucket = user_bucket(user_id) | |
| patterns = [ | |
| # f"split{bucket}", | |
| # f"split_{bucket}", | |
| # f"fold{bucket}", | |
| # f"fold_{bucket}", | |
| # f"part{bucket}", | |
| f"part_{bucket}", | |
| # f"{bucket}", | |
| ] | |
| for split_name in patterns: | |
| try: | |
| ds = datasets.load_dataset("tktung/irish_grammar_test", split=split_name) | |
| df = pd.DataFrame(ds) | |
| break | |
| except Exception: | |
| df = None | |
| if df is None: | |
| # Fallback | |
| ds = datasets.load_dataset("tktung/irish_grammar_test", split="train") | |
| df = pd.DataFrame(ds) | |
| """ | |
| ds = datasets.load_dataset("tktung/irish_grammar_test", split="train") | |
| df = pd.DataFrame(ds) | |
| required = {"question", "response1", "response2"} | |
| if not required.issubset(df.columns): | |
| raise ValueError(f"Dataset must contain columns: {', '.join(required)}") | |
| return df | |
| # ---------- Rating persistence helpers --------------------------------------- | |
| def _download_remote_ratings() -> Path | None: | |
| """Try to fetch the current ratings file from the Hub; returns path or None.""" | |
| if not RATINGS_REPO: | |
| return None | |
| try: | |
| return Path( | |
| hf_hub_download( | |
| repo_id=RATINGS_REPO, | |
| filename=RATINGS_FILE, | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| cache_dir=tempfile.gettempdir(), | |
| ) | |
| ) | |
| except Exception: | |
| # File/repo may not exist yet – caller will create empty DF. | |
| return None | |
| def load_ratings() -> pd.DataFrame: | |
| """Return ratings DataFrame from remote repo (or empty if none).""" | |
| remote = _download_remote_ratings() | |
| if remote and remote.exists(): | |
| df = pd.read_csv(remote) | |
| elif RATINGS_FILE and os.path.exists(RATINGS_FILE): | |
| # Running locally (dev) – load local file if present. | |
| df = pd.read_csv(RATINGS_FILE) | |
| else: | |
| df = pd.DataFrame( | |
| columns=[ | |
| "user_id", | |
| "user_bucket", # added | |
| "row_index", | |
| "choice", | |
| "timestamp", | |
| "proficiency", | |
| "is_native", | |
| "studied_second_level", | |
| "studied_third_level", | |
| "uses_for_work", | |
| "usage_frequency", | |
| ] | |
| ) | |
| # Backward compatibility: ensure new columns exist | |
| required_cols = [ | |
| "proficiency", | |
| "is_native", | |
| "studied_second_level", | |
| "studied_third_level", | |
| "uses_for_work", | |
| "usage_frequency", | |
| "user_bucket", # added | |
| ] | |
| for col in required_cols: | |
| if col not in df.columns: | |
| df[col] = pd.NA | |
| return df | |
| def _upload_remote_ratings(df: pd.DataFrame): | |
| """Upload CSV to the dataset repo with a commit per save.""" | |
| if not (RATINGS_REPO and api): | |
| # Running locally (dev) – save to a temp file for inspection. | |
| df.to_csv(RATINGS_FILE, index=False) | |
| return | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| csv_path = Path(tmpdir) / RATINGS_FILE | |
| csv_path.parent.mkdir(parents=True, exist_ok=True) | |
| df.to_csv(csv_path, index=False) | |
| api.upload_file( | |
| path_or_fileobj=str(csv_path), | |
| path_in_repo=RATINGS_FILE, | |
| repo_id=RATINGS_REPO, | |
| repo_type="dataset", | |
| commit_message="Add/Update rating", | |
| ) | |
| def save_rating( | |
| user_id: str, | |
| proficiency: str, | |
| is_native: str, | |
| studied_second_level: str, | |
| studied_third_level: str, | |
| uses_for_work: str, | |
| usage_frequency: str, | |
| row_index: int, | |
| choice: int, | |
| ): | |
| """Append a rating (deduplicated) and push to the Hub (stores demographics).""" | |
| ratings = load_ratings() | |
| duplicate = (ratings.user_id == user_id) & (ratings.row_index == row_index) | |
| if duplicate.any(): | |
| return | |
| norm = lambda x: (x or "").strip().lower() | |
| bucket = user_bucket(user_id) # added | |
| new_entry = { | |
| "user_id": user_id, | |
| "user_bucket": bucket, # added | |
| "proficiency": norm(proficiency), | |
| "is_native": norm(is_native), | |
| "studied_second_level": norm(studied_second_level), | |
| "studied_third_level": norm(studied_third_level), | |
| "uses_for_work": norm(uses_for_work), | |
| "usage_frequency": norm(usage_frequency), | |
| "row_index": row_index, | |
| "choice": choice, | |
| "timestamp": datetime.utcnow().isoformat(), | |
| } | |
| ratings = pd.concat([ratings, pd.DataFrame([new_entry])], ignore_index=True) | |
| _upload_remote_ratings(ratings) | |
| def get_next_unrated(df: pd.DataFrame, ratings: pd.DataFrame, user_id: str): | |
| rated = ratings.loc[ratings.user_id == user_id, "row_index"].tolist() | |
| unrated = df[~df.index.isin(rated)] | |
| if unrated.empty: | |
| return None | |
| row = unrated.iloc[0] | |
| return row.name, row.question, row.response1, row.response2 | |
| def user_progress(user_id: str, state_df) -> str: | |
| """Return progress string for this user.""" | |
| if not isinstance(state_df, pd.DataFrame): | |
| return "Progress: 0 / 0" | |
| ratings = load_ratings() | |
| rated = ratings.loc[ratings.user_id == user_id, "row_index"].nunique() | |
| total = len(state_df) | |
| if total == 0: | |
| return "Progress: 0 / 0" | |
| return f"Progress: {rated} / {total} ({rated/total:.1%})" | |
| # ----------------------------------------------------------------------------- | |
| # Gradio callbacks | |
| # ----------------------------------------------------------------------------- | |
| def start_or_resume( | |
| user_id: str, | |
| proficiency: str, | |
| is_native: str, | |
| studied_second_level: str, | |
| studied_third_level: str, | |
| uses_for_work: str, | |
| usage_frequency: str, | |
| consent: bool, | |
| state_df, # may be None before first load | |
| ): | |
| # If dataset not yet loaded for this session, load user-specific split | |
| if not isinstance(state_df, pd.DataFrame): | |
| try: | |
| state_df = load_data(user_id) | |
| except Exception as e: | |
| progress = user_progress(user_id, state_df) | |
| return ( | |
| gr.update(value=user_id, visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| "", | |
| "", | |
| state_df, | |
| progress, | |
| f"Dataset load failed: {e}", | |
| ) | |
| progress = user_progress(user_id, state_df) | |
| # ...existing validation blocks updated to include progress... | |
| if not user_id.strip(): | |
| return ( | |
| gr.update(value=user_id, visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| "", | |
| "", | |
| state_df, | |
| progress, | |
| "Please enter a non-empty identifier to begin.", | |
| ) | |
| if proficiency not in {"expert", "fluent", "basic"}: | |
| return ( | |
| gr.update(value=user_id, visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| "", | |
| "", | |
| state_df, | |
| progress, | |
| "Please select your language proficiency.", | |
| ) | |
| required_yes_no = { | |
| is_native: "Is Native?", | |
| studied_second_level: "Studied Irish At Second Level?", | |
| studied_third_level: "Studied Irish At Third Level?", | |
| uses_for_work: "Use Irish for work?", | |
| } | |
| for val, label in required_yes_no.items(): | |
| if val not in {"Yes", "No"}: | |
| return ( | |
| gr.update(value=user_id, visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| "", | |
| "", | |
| state_df, | |
| progress, | |
| f"Please answer: {label}", | |
| ) | |
| if usage_frequency not in {"daily", "weekly", "monthly", "yearly"}: | |
| return ( | |
| gr.update(value=user_id, visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| "", | |
| "", | |
| state_df, | |
| progress, | |
| "Please select usage frequency.", | |
| ) | |
| if not consent: | |
| return ( | |
| gr.update(value=user_id, visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| "", | |
| "", | |
| state_df, | |
| progress, | |
| "Please provide consent to proceed.", | |
| ) | |
| ratings = load_ratings() | |
| record = get_next_unrated(state_df, ratings, user_id) | |
| if record is None: | |
| progress = user_progress(user_id, state_df) | |
| return ( | |
| gr.update(value=user_id, visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| "", | |
| "", | |
| "", | |
| "", | |
| state_df, | |
| progress, | |
| "🎉 You have evaluated every item – thank you!", | |
| ) | |
| idx, q, a1, a2 = record | |
| progress = user_progress(user_id, state_df) | |
| return ( | |
| gr.update(value=user_id, visible=True), | |
| gr.update(visible=True), | |
| gr.update(visible=True), | |
| "**" + q + "**", | |
| a1, | |
| a2, | |
| str(idx), | |
| state_df, | |
| progress, | |
| "", | |
| ) | |
| def submit_preference( | |
| user_id: str, | |
| proficiency: str, | |
| is_native: str, | |
| studied_second_level: str, | |
| studied_third_level: str, | |
| uses_for_work: str, | |
| usage_frequency: str, | |
| row_idx_str: str, | |
| choice: str, | |
| state_df, | |
| ): | |
| if choice not in {"Sentence 1", "Sentence 2"}: | |
| progress = user_progress(user_id, state_df) | |
| return ( | |
| "", | |
| "", | |
| "", | |
| "", | |
| progress, | |
| "Please choose either Sentence 1 or Sentence 2 before submitting.", | |
| ) | |
| row_idx = int(row_idx_str) | |
| save_rating( | |
| user_id, | |
| proficiency, | |
| is_native, | |
| studied_second_level, | |
| studied_third_level, | |
| uses_for_work, | |
| usage_frequency, | |
| row_idx, | |
| 1 if choice == "Sentence 1" else 2, | |
| ) | |
| ratings = load_ratings() | |
| record = get_next_unrated(state_df, ratings, user_id) | |
| progress = user_progress(user_id, state_df) | |
| if record is None: | |
| return "", "", "", "", progress, "🎉 You have evaluated every item – thank you!" | |
| idx, q, a1, a2 = record | |
| return "**" + q + "**", a1, a2, str(idx), progress, "" | |
| # ----------------------------------------------------------------------------- | |
| # Build Gradio interface | |
| # ----------------------------------------------------------------------------- | |
| def build_demo(): | |
| # Removed upfront dataset load; defer until user supplies ID | |
| # df = load_data() | |
| # CSS to constrain very tall answers | |
| overflow_css = f""" | |
| <style> | |
| .answerbox {{ | |
| max-height: {MAX_HEIGHT_PX}px; | |
| overflow-y: auto; | |
| white-space: pre-wrap; | |
| }} | |
| </style> | |
| """ | |
| with gr.Blocks(title="Question/Answer Preference Rater") as demo: | |
| gr.HTML(overflow_css) | |
| gr.Markdown( | |
| """# Irish Grammatical Test | |
| Enter your identifier below to start or resume. Each sample is a pair of two sentences that varied by a grammatical feature. You should choose the one that you think is correct. Your progress is saved automatically so you can return at any time using the same identifier.""" | |
| ) | |
| state_df = gr.State(None) # will be filled after hashing user_id | |
| state_row_idx = gr.State("") | |
| # Identifier input | |
| id_input = gr.Textbox(label="User Identifier", placeholder="e.g. alice") | |
| proficiency_radio = gr.Radio( | |
| ["expert", "fluent", "basic"], | |
| label="Language proficiency", | |
| info="Select your Irish language proficiency level.", | |
| ) | |
| is_native_radio = gr.Radio( | |
| ["Yes", "No"], label="Is Native?", info="Are you a native Irish speaker?" | |
| ) | |
| studied_second_radio = gr.Radio( | |
| ["Yes", "No"], | |
| label="Studied Irish At Second Level?", | |
| info="Did you study Irish in school?", | |
| ) | |
| studied_third_radio = gr.Radio( | |
| ["Yes", "No"], | |
| label="Studied Irish At Third Level?", | |
| info="Did you study Irish at university/third level?", | |
| ) | |
| uses_for_work_radio = gr.Radio( | |
| ["Yes", "No"], | |
| label="Use Irish for work?", | |
| info="Do you use Irish in your job?", | |
| ) | |
| usage_frequency_radio = gr.Radio( | |
| ["daily", "weekly", "monthly", "yearly"], | |
| label="How often do you use Irish?", | |
| ) | |
| consent_checkbox = gr.Checkbox( # <-- added | |
| label="I consent to the use of my responses for research purposes." | |
| ) | |
| start_btn = gr.Button("Start / Resume") | |
| info_md = gr.Markdown("") | |
| progress_md = gr.Markdown("Progress: 0 / 0") # <-- added | |
| # Evaluation widgets | |
| with gr.Column(visible=False) as eval_col: | |
| question_md = gr.Markdown("") | |
| with gr.Row(): | |
| answer1_md = gr.Markdown(label="Sentence A", elem_classes=["answerbox"]) | |
| answer2_md = gr.Markdown(label="Sentence B", elem_classes=["answerbox"]) | |
| choice_radio = gr.Radio( | |
| ["Sentence 1", "Sentence 2"], | |
| label="Which sentence is more grammatically correct?", | |
| ) | |
| submit_btn = gr.Button("Submit Preference", visible=False) | |
| # Callbacks wiring (added progress_md in outputs) | |
| start_btn.click( | |
| fn=start_or_resume, | |
| inputs=[ | |
| id_input, | |
| proficiency_radio, | |
| is_native_radio, | |
| studied_second_radio, | |
| studied_third_radio, | |
| uses_for_work_radio, | |
| usage_frequency_radio, | |
| consent_checkbox, | |
| state_df, | |
| ], | |
| outputs=[ | |
| id_input, | |
| eval_col, | |
| submit_btn, | |
| question_md, | |
| answer1_md, | |
| answer2_md, | |
| state_row_idx, | |
| state_df, | |
| progress_md, # <-- added | |
| info_md, | |
| ], | |
| ) | |
| submit_btn.click( | |
| fn=submit_preference, | |
| inputs=[ | |
| id_input, | |
| proficiency_radio, | |
| is_native_radio, | |
| studied_second_radio, | |
| studied_third_radio, | |
| uses_for_work_radio, | |
| usage_frequency_radio, | |
| state_row_idx, | |
| choice_radio, | |
| state_df, | |
| ], | |
| outputs=[ | |
| question_md, | |
| answer1_md, | |
| answer2_md, | |
| state_row_idx, | |
| progress_md, # <-- added | |
| info_md, | |
| ], | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| build_demo().launch() | |