llm_moderation_testing

Running

App Files Files Community

Yacine Jernite commited on 14 days ago

Commit

9c2c50e

1 Parent(s): 93b1033

can now edit policies

Browse files

Files changed (3) hide show

ui/tab_config.py +1 -1
ui/tab_policy.py +60 -44
utils/constants.py +2 -3

ui/tab_config.py CHANGED Viewed

@@ -91,7 +91,7 @@ def build_config_tab() -> dict:
         gr.Markdown("---")
         with gr.Accordion("Generation Parameters", open=False):
-            max_tokens = gr.Number(label="Max Tokens", value=4096, precision=0)
             temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.1, step=0.1)
             top_p = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1)

         gr.Markdown("---")
         with gr.Accordion("Generation Parameters", open=False):
+            max_tokens = gr.Number(label="Max Tokens", value=9192, precision=0)
             temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.1, step=0.1)
             top_p = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1)

ui/tab_policy.py CHANGED Viewed

@@ -13,58 +13,74 @@ from utils.helpers import load_policy_from_file, load_preset_policy
 def build_policy_tab(base_dir: str) -> dict:
     """Build the policy definition tab UI."""
     with gr.Tab("📋 Policy Definition"):
-        input_method = gr.Radio(label="Input Method", choices=["Upload Markdown", "Enter Manually", "Select Preset"], value="Select Preset")
-        upload_file = gr.File(label="Upload Markdown File", file_types=[".md"], visible=False)
-        upload_preview = gr.Textbox(label="File Preview", lines=10, interactive=False, visible=False)
-        load_upload_btn = gr.Button("Load Policy", visible=False)
-        manual_text = gr.Textbox(label="Policy Text", placeholder="Enter policy markdown...", lines=20, visible=False)
-        save_manual_btn = gr.Button("Save Policy", visible=False)
-        preset_dropdown = gr.Dropdown(
-            label="Select Preset", choices=["Hate Speech Policy", "Violence Policy", "Toxicity Policy"], value="Hate Speech Policy", visible=True
-        )
-        preset_preview = gr.Markdown(value="*Select a preset to preview*", visible=True)
-        load_preset_btn = gr.Button("Load Preset", visible=True)
-        gr.Markdown("---")
-        gr.Markdown("### Current Policy")
-        current_policy = gr.Markdown(value="*No policy loaded*")
-        clear_policy_btn = gr.Button("Clear Policy", variant="secondary")
         current_policy_state = gr.State(value="")
-        def update_ui(method):
-            return (
-                gr.update(visible=(method == "Upload Markdown")),
-                gr.update(visible=(method == "Upload Markdown")),
-                gr.update(visible=(method == "Upload Markdown")),
-                gr.update(visible=(method == "Enter Manually")),
-                gr.update(visible=(method == "Enter Manually")),
-                gr.update(visible=(method == "Select Preset")),
-                gr.update(visible=(method == "Select Preset")),
-                gr.update(visible=(method == "Select Preset")),
             )
-        input_method.change(update_ui, inputs=input_method, outputs=[upload_file, upload_preview, load_upload_btn, manual_text, save_manual_btn, preset_dropdown, preset_preview, load_preset_btn])
-        # Policy loading handlers
         load_preset_btn.click(
-            lambda name: load_preset_policy(name, base_dir),
             inputs=preset_dropdown,
-            outputs=[current_policy_state, current_policy],
         )
-        load_upload_btn.click(
-            lambda f: load_policy_from_file(f.name) if f else ("", ""),
             inputs=upload_file,
-            outputs=[current_policy_state, current_policy],
         )
-        upload_file.change(lambda f: open(f.name).read() if f else "", inputs=upload_file, outputs=upload_preview)
-        save_manual_btn.click(lambda t: (t, t), inputs=manual_text, outputs=[current_policy_state, current_policy])
-        clear_policy_btn.click(lambda: ("", "*No policy loaded*"), outputs=[current_policy_state, current_policy])
     return {
         "current_policy_state": current_policy_state,
-        "current_policy": current_policy,
     }

 def build_policy_tab(base_dir: str) -> dict:
     """Build the policy definition tab UI."""
     with gr.Tab("📋 Policy Definition"):
         current_policy_state = gr.State(value="")
+        # Existing Policy Accordion
+        with gr.Accordion("📥 Load Existing Policy", open=False):
+            with gr.Row():
+                with gr.Column():
+                    preset_dropdown = gr.Dropdown(
+                        label="Select Preset",
+                        choices=["Hate Speech Policy", "Violence Policy", "Toxicity Policy"],
+                        value=None
+                    )
+                    load_preset_btn = gr.Button("Load Preset")
+                with gr.Column():
+                    gr.Markdown("Upload a markdown file:")
+                    upload_file = gr.File(label="Upload Markdown File", file_types=[".md"])
+        # Manual Edition Accordion
+        with gr.Accordion("✏️ Manual Edition", open=True):
+            manual_text = gr.Textbox(
+                label="Policy Text",
+                placeholder="Enter or edit policy markdown...",
+                lines=20
             )
+            policy_preview = gr.Markdown(value="*No policy loaded*")
+        # Clear button
+        clear_policy_btn = gr.Button("Clear Policy", variant="secondary")
+        # Handlers
+        def load_preset_handler(name):
+            if not name:
+                return "", "*No policy loaded*", ""
+            policy_text, _ = load_preset_policy(name, base_dir)
+            return policy_text, policy_text, policy_text
         load_preset_btn.click(
+            load_preset_handler,
             inputs=preset_dropdown,
+            outputs=[current_policy_state, manual_text, policy_preview],
         )
+        def load_upload_handler(f):
+            if f:
+                policy_text, _ = load_policy_from_file(f.name)
+                return policy_text, policy_text, policy_text
+            return "", "", "*No policy loaded*"
+        upload_file.change(
+            load_upload_handler,
             inputs=upload_file,
+            outputs=[current_policy_state, manual_text, policy_preview],
+        )
+        def update_preview(text):
+            return text if text else "*No policy loaded*"
+        manual_text.change(
+            lambda t: (t, update_preview(t)),
+            inputs=manual_text,
+            outputs=[current_policy_state, policy_preview],
+        )
+        clear_policy_btn.click(
+            lambda: ("", "", "*No policy loaded*"),
+            outputs=[current_policy_state, manual_text, policy_preview],
         )
     return {
         "current_policy_state": current_policy_state,
     }

utils/constants.py CHANGED Viewed

@@ -77,15 +77,14 @@ LM_PROMPT_INSTRUCT = """
 """
 # Response format for GPT-OSS
-RESPONSE_FORMAT = """
-## Response Format
 Given a content item, please provide a JSON-formatted response with the following fields:
 - `label`: INT - 1 for a policy violation, 0 for no violation
 - `categories`: LIST[DCT] - a list of categories from the disallowed and allowed categories that the content item falls into along with the reasoning for each category. Each dict has the following keys:
   - `category`: STR - the category from the policy
   - `reasoning`: STR - the reasoning for why the content item falls into this category
-  - `policy_source`: STR - the specific text from the policy that best supports the reasoning. If you need more than one, use [...] to concatenate them.
 """
 # Test examples from notebook

 """
 # Response format for GPT-OSS
+RESPONSE_FORMAT = """## Response Format
 Given a content item, please provide a JSON-formatted response with the following fields:
 - `label`: INT - 1 for a policy violation, 0 for no violation
 - `categories`: LIST[DCT] - a list of categories from the disallowed and allowed categories that the content item falls into along with the reasoning for each category. Each dict has the following keys:
   - `category`: STR - the category from the policy
   - `reasoning`: STR - the reasoning for why the content item falls into this category
+  - `policy_source`: STR - specific text from the policy that best supports the reasoning. Use [...] to concatenate multi-part citations. Make sure to quote the policy text exactly and include all relevant passages.
 """
 # Test examples from notebook