Spaces:

MarioBarbeque
/

FixedRecall

Sleeping

App Files Files Community

John Graham Reynolds commited on Nov 6, 2024

Commit

a1adbdd

1 Parent(s): a51a7fa

add Space module

Browse files

Files changed (1) hide show

app.py +82 -3

app.py CHANGED Viewed

@@ -1,8 +1,87 @@
 import gradio as gr
-def greet(name):
-    return 'Hello, ' + name + '!'
-gr.Interface(fn=greet, inputs='text', outputs='text').launch()

+import sys
 import gradio as gr
+import pandas as pd
+import evaluate
+from evaluate.utils import infer_gradio_input_types, json_to_string_type, parse_readme, parse_test_cases
+# from evaluate.utils import launch_gradio_widget # using this directly is erroneous - lets fix this
+from fixed_recall import FixedRecall
+from pathlib import Path
+added_description = """
+See the 🤗 Space showing off how to combine various metrics:
+    [MarioBarbeque/CombinedEvaluationMetrics🪲](https://huggingface.co/spaces/MarioBarbeque/CombinedEvaluationMetrics). This collected fix thereby circumnavigates the
+    original, longstanding issue found [here](https://github.com/huggingface/evaluate/issues/234). We look forward to fixing this in a PR soon.
+In the specific use case of the `FixedRecall` metric, one writes the following:\n
+```python
+recall = FixedRecall(average=...)
+recall.add_batch(predictions=..., references=...)
+recall.compute()
+```\n
+where the `average` parameter can be chosen to configure the way recall scores across labels are averaged. Acceptable values include `[None, 'micro', 'macro', 'weighted']` (
+or `binary` if there exist only two labels). \n
+"""
+metric = FixedRecall()
+if isinstance(metric.features, list):
+    (feature_names, feature_types) = zip(*metric.features[0].items())
+else:
+    (feature_names, feature_types) = zip(*metric.features.items())
+gradio_input_types = infer_gradio_input_types(feature_types)
+local_path = Path(sys.path[0])
+# configure these randomly using randint generator and feature names?
+test_case_1 = [ {"predictions":[1,2,3,4,5], "references":[1,2,5,4,3]} ]
+test_case_2 = [ {"predictions":[9,8,7,6,5], "references":[7,8,9,6,5]} ]
+# configure this based on the input type, etc. for launch_gradio_widget
+def compute(input_df: pd.DataFrame, method: str):
+    metric = FixedRecall(average=method if method != "None" else None)
+    cols = [col for col in input_df.columns]
+    predicted = [int(num) for num in input_df[cols[0]].to_list()]
+    references = [int(num) for num in input_df[cols[1]].to_list()]
+    metric.add_batch(predictions=predicted, references=references)
+    outputs = metric.compute()
+    return f"The recall score for these predictions is: \n {outputs}"
+space = gr.Interface(
+    fn=compute,
+    inputs=[
+        gr.Dataframe(
+        headers=feature_names,
+        col_count=len(feature_names),
+        row_count=5,
+        datatype=json_to_string_type(gradio_input_types),
+        ),
+        gr.Radio(
+            ["weighted", "micro", "macro", "None", "binary"],
+            label="Averaging Method",
+            info="Method for averaging the recall score across labels. \n `binary` only works if you are evaluating a binary classification model."
+        )
+    ],
+    outputs=gr.Textbox(label=metric.name),
+    description=metric.info.description + added_description,
+    title="FixedRecall Metric", # think about how to generalize this with the launch_gradio_widget - it seems fine as is really
+    article=parse_readme(local_path / "README.md"),
+    examples=[
+        [
+            parse_test_cases(test_case_1, feature_names, gradio_input_types)[0], # notice how we unpack this for when we fix launch_gradio_widget
+            "weighted"
+        ],
+        [
+            parse_test_cases(test_case_2, feature_names, gradio_input_types)[0],
+            "micro"
+        ],
+    ],
+    cache_examples=False
+    )
+space.launch()