Spaces:
Runtime error
Runtime error
Updated for logging messages
Browse files- load_data.py +9 -7
load_data.py
CHANGED
|
@@ -38,11 +38,11 @@ class LoadDatasets:
|
|
| 38 |
# Leer el dataset del Hub
|
| 39 |
try:
|
| 40 |
print(f"Trying to sync with {HUB_DATASET_NAME}")
|
| 41 |
-
|
| 42 |
except Exception as e:
|
| 43 |
print(f"Not possible to sync with {HUB_DATASET_NAME}")
|
| 44 |
print(e)
|
| 45 |
-
|
| 46 |
|
| 47 |
# dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
|
| 48 |
|
|
@@ -53,17 +53,19 @@ class LoadDatasets:
|
|
| 53 |
# print("Concatenated dataset is:")
|
| 54 |
# print(dataset)
|
| 55 |
|
| 56 |
-
|
| 57 |
-
if not
|
|
|
|
| 58 |
return
|
| 59 |
-
dataset = old_ds
|
| 60 |
-
records = rg.DatasetForTextClassification.from_datasets(dataset)
|
| 61 |
|
|
|
|
|
|
|
| 62 |
settings = rg.TextClassificationSettings(
|
| 63 |
label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD", "HALLUCINATION", "UNPROCESSABLE"]
|
| 64 |
)
|
| 65 |
rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
|
| 66 |
-
|
|
|
|
| 67 |
# Log the dataset
|
| 68 |
rg.log(
|
| 69 |
records,
|
|
|
|
| 38 |
# Leer el dataset del Hub
|
| 39 |
try:
|
| 40 |
print(f"Trying to sync with {HUB_DATASET_NAME}")
|
| 41 |
+
dataset = load_dataset(HUB_DATASET_NAME, split="train")
|
| 42 |
except Exception as e:
|
| 43 |
print(f"Not possible to sync with {HUB_DATASET_NAME}")
|
| 44 |
print(e)
|
| 45 |
+
dataset = None
|
| 46 |
|
| 47 |
# dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
|
| 48 |
|
|
|
|
| 53 |
# print("Concatenated dataset is:")
|
| 54 |
# print(dataset)
|
| 55 |
|
| 56 |
+
dataset = dataset.remove_columns("metrics")
|
| 57 |
+
if not dataset:
|
| 58 |
+
print(f"There is no DATASET - Skipping!")
|
| 59 |
return
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
print(f"Generating records from the dataset")
|
| 62 |
+
records = rg.DatasetForTextClassification.from_datasets(dataset)
|
| 63 |
settings = rg.TextClassificationSettings(
|
| 64 |
label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD", "HALLUCINATION", "UNPROCESSABLE"]
|
| 65 |
)
|
| 66 |
rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
|
| 67 |
+
|
| 68 |
+
print("Logging the dataset!")
|
| 69 |
# Log the dataset
|
| 70 |
rg.log(
|
| 71 |
records,
|