meg HF Staff commited on
Commit
3096e07
·
verified ·
1 Parent(s): 6a538fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -17,6 +17,7 @@ https://huggingface.co/datasets/datacomp
17
  # https://huggingface.co/datasets/ILSVRC/imagenet-1k
18
  NUM_EXAMPLES = 1281167
19
  # Arbitrary small number, only using in devv'ing (uncomment #.take(DEV_AMOUNT) below to use it).
 
20
  DEV_AMOUNT = 10
21
  GATED_IMAGENET = os.environ.get("GATED_IMAGENET")
22
 
@@ -33,8 +34,12 @@ def main(percentage=10):
33
 
34
  # Load the dataset from the HF hub. Use streaming so as not to load the entire dataset at once.
35
  # Use the .take(DEV_AMOUNT) to only grab a small chunk of instances to develop with.
36
- dataset = load_dataset("ILSVRC/imagenet-1k", split="train", streaming=True,
 
37
  trust_remote_code=True, token=GATED_IMAGENET).take(DEV_AMOUNT)
 
 
 
38
 
39
  # Create a set of indices that are randomly chosen, to change their labels.
40
  # Specifically, randomly choose NUM_EXAMPLES/percentage indices.
@@ -46,8 +51,13 @@ def main(percentage=10):
46
  features=dataset.features, batched=True)
47
 
48
  # Upload the new version of the dataset (this will take awhile)
49
- Dataset.from_generator(updated_dataset.__iter__).push_to_hub(
50
- "datacomp/imagenet-1k-random-debug" + str(percentage), token=GATED_IMAGENET)
 
 
 
 
 
51
 
52
  end = time.time()
53
  print("That took %d seconds" % (end - start))
 
17
  # https://huggingface.co/datasets/ILSVRC/imagenet-1k
18
  NUM_EXAMPLES = 1281167
19
  # Arbitrary small number, only using in devv'ing (uncomment #.take(DEV_AMOUNT) below to use it).
20
+ DEV = False
21
  DEV_AMOUNT = 10
22
  GATED_IMAGENET = os.environ.get("GATED_IMAGENET")
23
 
 
34
 
35
  # Load the dataset from the HF hub. Use streaming so as not to load the entire dataset at once.
36
  # Use the .take(DEV_AMOUNT) to only grab a small chunk of instances to develop with.
37
+ if DEV:
38
+ dataset = load_dataset("ILSVRC/imagenet-1k", split="train", streaming=True,
39
  trust_remote_code=True, token=GATED_IMAGENET).take(DEV_AMOUNT)
40
+ else:
41
+ dataset = load_dataset("ILSVRC/imagenet-1k", split="train", streaming=True,
42
+ trust_remote_code=True, token=GATED_IMAGENET)
43
 
44
  # Create a set of indices that are randomly chosen, to change their labels.
45
  # Specifically, randomly choose NUM_EXAMPLES/percentage indices.
 
51
  features=dataset.features, batched=True)
52
 
53
  # Upload the new version of the dataset (this will take awhile)
54
+ if DEV:
55
+ Dataset.from_generator(updated_dataset.__iter__).push_to_hub(
56
+ "datacomp/imagenet-1k-random-debug" + str(percentage), token=GATED_IMAGENET)
57
+ else:
58
+ Dataset.from_generator(updated_dataset.__iter__).push_to_hub(
59
+ "datacomp/imagenet-1k-random" + str(percentage), token=GATED_IMAGENET)
60
+
61
 
62
  end = time.time()
63
  print("That took %d seconds" % (end - start))