Spaces:

tktung
/

irish_grammatical_test

Sleeping

App Files Files Community

tung commited on Oct 7

Commit

8d295df

0 Parent(s):

initial commit

Browse files

Files changed (2) hide show

app.py +279 -0
human_judgement/selected_samples.json +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,279 @@

+import os
+import tempfile
+from datetime import datetime
+from pathlib import Path
+import gradio as gr
+import pandas as pd
+from huggingface_hub import HfApi, hf_hub_download
+# ------------------------------------------------------------
+# Cloud‑friendly Q/A preference rater for **Hugging Face Spaces**
+# ------------------------------------------------------------
+# This version swaps local CSV persistence for a tiny remote‑dataset
+# workflow that works on Spaces:
+#   • Ratings are stored in (and loaded from) a lightweight **dataset
+#     repo** on the Hugging Face Hub – no local file system required.
+#   • The dataset repo is set via the `RATINGS_REPO` env‑var.
+#   • You must pass a write‑enabled token (env‑var `HF_TOKEN`) that has
+#     `write` permission on that dataset.
+#
+# Quick setup guide
+# -----------------
+# 1.  Create a dataset repository to hold the ratings file, e.g.:
+#       https://huggingface.co/datasets/<org>/qa‑rater‑data
+# 2.  Inside **Space Settings ▸ Secrets**, add:
+#       • `RATINGS_REPO`  →  <org>/qa‑rater‑data
+#       • `HF_TOKEN`      →  a token with *Write* access to that repo
+# 3.  Add `huggingface‑hub` to your `requirements.txt` or
+#     `pip install huggingface‑hub` locally.
+# 4.  Deploy / push your updated Space – ratings will now persist in
+#     the dataset repo instead of the Space’s ephemeral storage.
+# ------------------------------------------------------------
+# -----------------------------------------------------------------------------
+# Configuration – constants & styling
+# -----------------------------------------------------------------------------
+DATA_PATH = "human_judgement/selected_samples.json"
+RATINGS_FILE = "human_judgement/human_judgement.csv"  # Name *inside* the dataset repo
+RATINGS_REPO = os.getenv("RATINGS_REPO")  # e.g. "org/qa‑rater‑data"
+HF_TOKEN = os.getenv("HF_TOKEN")  # write token for that repo
+MAX_HEIGHT_PX = 400  # Max visible height for answer Markdown blocks
+api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
+# -----------------------------------------------------------------------------
+# Helper functions – data I/O
+# -----------------------------------------------------------------------------
+def load_data(path: str = DATA_PATH) -> pd.DataFrame:
+    """Local read for the static Q/A CSV bundled with the Space repo."""
+    if not os.path.exists(path):
+        raise FileNotFoundError(
+            f"Could not find data file at {path} – did you upload it?"
+        )
+    df = pd.read_json(path, lines=True)
+    required = {"question", "response1", "response2"}
+    if not required.issubset(df.columns):
+        raise ValueError(f"CSV must contain columns: {', '.join(required)}")
+    return df
+# ---------- Rating persistence helpers ---------------------------------------
+def _download_remote_ratings() -> Path | None:
+    """Try to fetch the current ratings file from the Hub; returns path or None."""
+    if not RATINGS_REPO:
+        return None
+    try:
+        return Path(
+            hf_hub_download(
+                repo_id=RATINGS_REPO,
+                filename=RATINGS_FILE,
+                repo_type="dataset",
+                token=HF_TOKEN,
+                cache_dir=tempfile.gettempdir(),
+            )
+        )
+    except Exception:
+        # File/repo may not exist yet – caller will create empty DF.
+        return None
+def load_ratings() -> pd.DataFrame:
+    """Return ratings DataFrame from remote repo (or empty if none)."""
+    remote = _download_remote_ratings()
+    if remote and remote.exists():
+        return pd.read_csv(remote)
+    return pd.DataFrame(columns=["user_id", "row_index", "choice", "timestamp"])
+def _upload_remote_ratings(df: pd.DataFrame):
+    """Upload CSV to the dataset repo with a commit per save."""
+    if not (RATINGS_REPO and api):
+        # Running locally (dev) – save to a temp file for inspection.
+        df.to_csv(RATINGS_FILE, index=False)
+        return
+    with tempfile.TemporaryDirectory() as tmpdir:
+        csv_path = Path(tmpdir) / RATINGS_FILE
+        csv_path.parent.mkdir(parents=True, exist_ok=True)
+        df.to_csv(csv_path, index=False)
+        api.upload_file(
+            path_or_fileobj=str(csv_path),
+            path_in_repo=RATINGS_FILE,
+            repo_id=RATINGS_REPO,
+            repo_type="dataset",
+            commit_message="Add/Update rating",
+        )
+def save_rating(user_id: str, row_index: int, choice: int):
+    """Append a rating (deduplicated) and push to the Hub."""
+    ratings = load_ratings()
+    duplicate = (ratings.user_id == user_id) & (ratings.row_index == row_index)
+    if duplicate.any():
+        return  # already stored
+    new_entry = {
+        "user_id": user_id,
+        "row_index": row_index,
+        "choice": choice,
+        "timestamp": datetime.utcnow().isoformat(),
+    }
+    ratings = pd.concat([ratings, pd.DataFrame([new_entry])], ignore_index=True)
+    _upload_remote_ratings(ratings)
+def get_next_unrated(df: pd.DataFrame, ratings: pd.DataFrame, user_id: str):
+    rated = ratings.loc[ratings.user_id == user_id, "row_index"].tolist()
+    unrated = df[~df.index.isin(rated)]
+    if unrated.empty:
+        return None
+    row = unrated.iloc[0]
+    return row.name, row.question, row.response1, row.response2
+# -----------------------------------------------------------------------------
+# Gradio callbacks
+# -----------------------------------------------------------------------------
+def start_or_resume(user_id: str, state_df):
+    if not user_id.strip():
+        return (
+            gr.update(value=user_id, visible=True),
+            gr.update(visible=False),  # eval_col
+            gr.update(visible=False),  # submit_btn
+            "",
+            "",
+            "",
+            "",  # q, a1, a2, idx
+            "Please enter a non-empty identifier to begin.",
+        )
+    ratings = load_ratings()
+    record = get_next_unrated(state_df, ratings, user_id)
+    if record is None:
+        return (
+            gr.update(value=user_id, visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            "",
+            "",
+            "",
+            "",
+            "🎉 You have evaluated every item – thank you!",
+        )
+    idx, q, a1, a2 = record
+    return (
+        gr.update(value=user_id, visible=True),
+        gr.update(visible=True),  # eval_col
+        gr.update(visible=True),  # submit_btn
+        "**" + q + "**",
+        a1,
+        a2,
+        str(idx),
+        "",
+    )
+def submit_preference(user_id: str, row_idx_str: str, choice: str, state_df):
+    if choice not in {"answer1", "answer2"}:
+        return (
+            "",
+            "",
+            "",
+            "",
+            "Please choose either Answer 1 or Answer 2 before submitting.",
+        )
+    row_idx = int(row_idx_str)
+    save_rating(user_id, row_idx, 1 if choice == "answer1" else 2)
+    ratings = load_ratings()
+    record = get_next_unrated(state_df, ratings, user_id)
+    if record is None:
+        return "", "", "", "", "🎉 You have evaluated every item – thank you!"
+    idx, q, a1, a2 = record
+    return "**" + q + "**", a1, a2, str(idx), ""
+# -----------------------------------------------------------------------------
+# Build Gradio interface
+# -----------------------------------------------------------------------------
+def build_demo():
+    df = load_data()
+    # CSS to constrain very tall answers
+    overflow_css = f"""
+    <style>
+      .answerbox {{
+          max-height: {MAX_HEIGHT_PX}px;
+          overflow-y: auto;
+          white-space: pre-wrap;
+      }}
+    </style>
+    """
+    with gr.Blocks(title="Question/Answer Preference Rater") as demo:
+        gr.HTML(overflow_css)
+        gr.Markdown(
+            """# Irish Grammatical Test\nEnter your identifier below to start or resume. Each sample is a pair of two sentences that varied by a grammatical feature. You should choose the one that you think is correct. Your progress is saved automatically so you can return at any time using the same identifier."""
+        )
+        state_df = gr.State(df)
+        state_row_idx = gr.State("")
+        # Identifier input
+        id_input = gr.Textbox(label="User Identifier", placeholder="e.g. alice")
+        start_btn = gr.Button("Start / Resume")
+        info_md = gr.Markdown("")
+        # Evaluation widgets
+        with gr.Column(visible=False) as eval_col:
+            question_md = gr.Markdown("")
+            with gr.Row():
+                answer1_md = gr.Markdown(label="Sentence A", elem_classes=["answerbox"])
+                answer2_md = gr.Markdown(label="Sentence B", elem_classes=["answerbox"])
+            choice_radio = gr.Radio(
+                ["answer1", "answer2"], label="Which sentence do you prefer?"
+            )
+        submit_btn = gr.Button("Submit Preference", visible=False)
+        # Callbacks wiring
+        start_btn.click(
+            fn=start_or_resume,
+            inputs=[id_input, state_df],
+            outputs=[
+                id_input,
+                eval_col,
+                submit_btn,
+                question_md,
+                answer1_md,
+                answer2_md,
+                state_row_idx,
+                info_md,
+            ],
+        )
+        submit_btn.click(
+            fn=submit_preference,
+            inputs=[id_input, state_row_idx, choice_radio, state_df],
+            outputs=[question_md, answer1_md, answer2_md, state_row_idx, info_md],
+        )
+    return demo
+if __name__ == "__main__":
+    build_demo().launch()

human_judgement/selected_samples.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a330651213a408872c7f956545e97a71ca5ba04f6663710d9ccf3138e9f823bb
+size 266536