Spaces:
Runtime error
Runtime error
Update load_data for own dataset sync
Browse files- load_data.py +12 -9
load_data.py
CHANGED
|
@@ -10,7 +10,7 @@ from datasets import load_dataset, concatenate_datasets
|
|
| 10 |
from argilla.listeners import listener
|
| 11 |
|
| 12 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 13 |
-
HUB_DATASET_NAME =
|
| 14 |
|
| 15 |
@listener(
|
| 16 |
dataset="somos-alpaca-es",
|
|
@@ -44,20 +44,23 @@ class LoadDatasets:
|
|
| 44 |
print(e)
|
| 45 |
old_ds = None
|
| 46 |
|
| 47 |
-
dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
|
| 48 |
|
| 49 |
|
| 50 |
-
if old_ds:
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
|
| 56 |
-
dataset = dataset.remove_columns("metrics")
|
|
|
|
|
|
|
|
|
|
| 57 |
records = rg.DatasetForTextClassification.from_datasets(dataset)
|
| 58 |
|
| 59 |
settings = rg.TextClassificationSettings(
|
| 60 |
-
label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD"]
|
| 61 |
)
|
| 62 |
rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
|
| 63 |
|
|
|
|
| 10 |
from argilla.listeners import listener
|
| 11 |
|
| 12 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 13 |
+
HUB_DATASET_NAME = "mserras/alpaca-es-hackaton"
|
| 14 |
|
| 15 |
@listener(
|
| 16 |
dataset="somos-alpaca-es",
|
|
|
|
| 44 |
print(e)
|
| 45 |
old_ds = None
|
| 46 |
|
| 47 |
+
# dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
|
| 48 |
|
| 49 |
|
| 50 |
+
# if old_ds:
|
| 51 |
+
# print("Concatenating datasets")
|
| 52 |
+
# dataset = concatenate_datasets([dataset, old_ds])
|
| 53 |
+
# print("Concatenated dataset is:")
|
| 54 |
+
# print(dataset)
|
| 55 |
|
| 56 |
+
# dataset = dataset.remove_columns("metrics")
|
| 57 |
+
if not old_ds:
|
| 58 |
+
return
|
| 59 |
+
dataset = old_ds
|
| 60 |
records = rg.DatasetForTextClassification.from_datasets(dataset)
|
| 61 |
|
| 62 |
settings = rg.TextClassificationSettings(
|
| 63 |
+
label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD", "HALLUCINATION", "UNPROCESSABLE"]
|
| 64 |
)
|
| 65 |
rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
|
| 66 |
|