Spaces:
Runtime error
Runtime error
Update load_data for own dataset sync
Browse files- load_data.py +12 -9
load_data.py
CHANGED
@@ -10,7 +10,7 @@ from datasets import load_dataset, concatenate_datasets
|
|
10 |
from argilla.listeners import listener
|
11 |
|
12 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
13 |
-
HUB_DATASET_NAME =
|
14 |
|
15 |
@listener(
|
16 |
dataset="somos-alpaca-es",
|
@@ -44,20 +44,23 @@ class LoadDatasets:
|
|
44 |
print(e)
|
45 |
old_ds = None
|
46 |
|
47 |
-
dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
|
48 |
|
49 |
|
50 |
-
if old_ds:
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
|
56 |
-
dataset = dataset.remove_columns("metrics")
|
|
|
|
|
|
|
57 |
records = rg.DatasetForTextClassification.from_datasets(dataset)
|
58 |
|
59 |
settings = rg.TextClassificationSettings(
|
60 |
-
label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD"]
|
61 |
)
|
62 |
rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
|
63 |
|
|
|
10 |
from argilla.listeners import listener
|
11 |
|
12 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
13 |
+
HUB_DATASET_NAME = "mserras/alpaca-es-hackaton"
|
14 |
|
15 |
@listener(
|
16 |
dataset="somos-alpaca-es",
|
|
|
44 |
print(e)
|
45 |
old_ds = None
|
46 |
|
47 |
+
# dataset = load_dataset("somosnlp/somos-clean-alpaca-es", split="train")
|
48 |
|
49 |
|
50 |
+
# if old_ds:
|
51 |
+
# print("Concatenating datasets")
|
52 |
+
# dataset = concatenate_datasets([dataset, old_ds])
|
53 |
+
# print("Concatenated dataset is:")
|
54 |
+
# print(dataset)
|
55 |
|
56 |
+
# dataset = dataset.remove_columns("metrics")
|
57 |
+
if not old_ds:
|
58 |
+
return
|
59 |
+
dataset = old_ds
|
60 |
records = rg.DatasetForTextClassification.from_datasets(dataset)
|
61 |
|
62 |
settings = rg.TextClassificationSettings(
|
63 |
+
label_schema=["BAD INSTRUCTION", "BAD INPUT", "BAD OUTPUT", "INAPPROPRIATE", "BIASED", "ALL GOOD", "HALLUCINATION", "UNPROCESSABLE"]
|
64 |
)
|
65 |
rg.configure_dataset(name="somos-alpaca-es", settings=settings, workspace="team")
|
66 |
|