Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,7 @@ https://huggingface.co/datasets/datacomp
|
|
17 |
# https://huggingface.co/datasets/ILSVRC/imagenet-1k
|
18 |
NUM_EXAMPLES = 1281167
|
19 |
# Arbitrary small number, only using in devv'ing (uncomment #.take(DEV_AMOUNT) below to use it).
|
|
|
20 |
DEV_AMOUNT = 10
|
21 |
GATED_IMAGENET = os.environ.get("GATED_IMAGENET")
|
22 |
|
@@ -33,8 +34,12 @@ def main(percentage=10):
|
|
33 |
|
34 |
# Load the dataset from the HF hub. Use streaming so as not to load the entire dataset at once.
|
35 |
# Use the .take(DEV_AMOUNT) to only grab a small chunk of instances to develop with.
|
36 |
-
|
|
|
37 |
trust_remote_code=True, token=GATED_IMAGENET).take(DEV_AMOUNT)
|
|
|
|
|
|
|
38 |
|
39 |
# Create a set of indices that are randomly chosen, to change their labels.
|
40 |
# Specifically, randomly choose NUM_EXAMPLES/percentage indices.
|
@@ -46,8 +51,13 @@ def main(percentage=10):
|
|
46 |
features=dataset.features, batched=True)
|
47 |
|
48 |
# Upload the new version of the dataset (this will take awhile)
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
end = time.time()
|
53 |
print("That took %d seconds" % (end - start))
|
|
|
17 |
# https://huggingface.co/datasets/ILSVRC/imagenet-1k
|
18 |
NUM_EXAMPLES = 1281167
|
19 |
# Arbitrary small number, only using in devv'ing (uncomment #.take(DEV_AMOUNT) below to use it).
|
20 |
+
DEV = False
|
21 |
DEV_AMOUNT = 10
|
22 |
GATED_IMAGENET = os.environ.get("GATED_IMAGENET")
|
23 |
|
|
|
34 |
|
35 |
# Load the dataset from the HF hub. Use streaming so as not to load the entire dataset at once.
|
36 |
# Use the .take(DEV_AMOUNT) to only grab a small chunk of instances to develop with.
|
37 |
+
if DEV:
|
38 |
+
dataset = load_dataset("ILSVRC/imagenet-1k", split="train", streaming=True,
|
39 |
trust_remote_code=True, token=GATED_IMAGENET).take(DEV_AMOUNT)
|
40 |
+
else:
|
41 |
+
dataset = load_dataset("ILSVRC/imagenet-1k", split="train", streaming=True,
|
42 |
+
trust_remote_code=True, token=GATED_IMAGENET)
|
43 |
|
44 |
# Create a set of indices that are randomly chosen, to change their labels.
|
45 |
# Specifically, randomly choose NUM_EXAMPLES/percentage indices.
|
|
|
51 |
features=dataset.features, batched=True)
|
52 |
|
53 |
# Upload the new version of the dataset (this will take awhile)
|
54 |
+
if DEV:
|
55 |
+
Dataset.from_generator(updated_dataset.__iter__).push_to_hub(
|
56 |
+
"datacomp/imagenet-1k-random-debug" + str(percentage), token=GATED_IMAGENET)
|
57 |
+
else:
|
58 |
+
Dataset.from_generator(updated_dataset.__iter__).push_to_hub(
|
59 |
+
"datacomp/imagenet-1k-random" + str(percentage), token=GATED_IMAGENET)
|
60 |
+
|
61 |
|
62 |
end = time.time()
|
63 |
print("That took %d seconds" % (end - start))
|