Spaces:

XAI
/

Cleaning-ImageNet-Hard

Paused

App Files Files Community

taesiri commited on Apr 27, 2023

Commit

e859cf6

1 Parent(s): fbfb369

backup

Browse files

Files changed (1) hide show

app.py +27 -16

app.py CHANGED Viewed

@@ -41,9 +41,8 @@ bad_items = open("./ex2.txt", "r").read().split("\n")
 bad_items = [x.split(".")[0] for x in bad_items]
 bad_items = [int(x) for x in bad_items if x != ""]
-NUMBER_OF_IMAGES = 100  # len(bad_items)
-# download and extract folders
 gdown.cached_download(
     url="https://huggingface.co/datasets/taesiri/imagenet_hard_review_samples/resolve/main/data.zip",
@@ -81,8 +80,6 @@ def update_snapshot(username):
         with open(file) as f:
             data = json.load(f)
             tdf = [data[x] for x in columns]
-            # add filename as a column
             rows.append(tdf)
     df = pd.DataFrame(rows, columns=columns)
@@ -98,15 +95,15 @@ def generate_dataset(username):
     all_images = set(bad_items)
     answered = set(df.id)
     remaining = list(all_images - answered)
-    if len(remaining) < NUMBER_OF_IMAGES and len(remaining) > 0:
-        NUMBER_OF_IMAGES = len(remaining)
-        random_indices = list(remaining)
-    elif len(remaining) == 0:
         return []
-    else:
-        random_indices = np.random.choice(remaining, NUMBER_OF_IMAGES, replace=False)
     random_images = [imagenet_hard[int(i)]["image"] for i in random_indices]
     random_gt_ids = [imagenet_hard[int(i)]["label"] for i in random_indices]
     random_gt_labels = [imagenet_hard[int(x)]["english_label"] for x in random_indices]
@@ -147,8 +144,6 @@ qid_to_sample = {
     int(x.split("/")[-1].split(".")[0].split("_")[0]): x for x in all_samples
 }
-# user-e3z5b
 def get_training_samples(qid):
     labels_id = imagenet_hard[int(qid)]["label"]
@@ -197,7 +192,9 @@ def preprocessing(data, current_index, history, username):
 def update_app(decision, data, current_index, history, username):
     global NUMBER_OF_IMAGES
     if current_index == -1:
-        return
     if current_index == NUMBER_OF_IMAGES - 1:
         time_stamp = int(time.time())
@@ -290,18 +287,32 @@ newcss = """
 }
 """
-with gr.Blocks(css=newcss) as demo:
     data_gr = gr.State({})
     current_index = gr.State(-1)
     history = gr.State({})
-    gr.Markdown("# Cleaning ImageNet-Hard!")
     random_str = "".join(
         random.choice(string.ascii_lowercase + string.digits) for _ in range(5)
     )
-    with gr.Row():
         username = gr.Textbox(label="Username", value=f"user-{random_str}")
         prepare_btn = gr.Button(value="Load Samples")

 bad_items = [x.split(".")[0] for x in bad_items]
 bad_items = [int(x) for x in bad_items if x != ""]
+NUMBER_OF_IMAGES = len(bad_items)
 gdown.cached_download(
     url="https://huggingface.co/datasets/taesiri/imagenet_hard_review_samples/resolve/main/data.zip",
         with open(file) as f:
             data = json.load(f)
             tdf = [data[x] for x in columns]
             rows.append(tdf)
     df = pd.DataFrame(rows, columns=columns)
     all_images = set(bad_items)
     answered = set(df.id)
     remaining = list(all_images - answered)
+    # shuffle remaining
+    random.shuffle(remaining)
+    NUMBER_OF_IMAGES = len(bad_items)
+    if NUMBER_OF_IMAGES == 0:
         return []
+    random_indices = remaining
     random_images = [imagenet_hard[int(i)]["image"] for i in random_indices]
     random_gt_ids = [imagenet_hard[int(i)]["label"] for i in random_indices]
     random_gt_labels = [imagenet_hard[int(x)]["english_label"] for x in random_indices]
     int(x.split("/")[-1].split(".")[0].split("_")[0]): x for x in all_samples
 }
 def get_training_samples(qid):
     labels_id = imagenet_hard[int(qid)]["label"]
 def update_app(decision, data, current_index, history, username):
     global NUMBER_OF_IMAGES
     if current_index == -1:
+        fake_plot = string_to_image("Please Enter your username and load samples")
+        empty_image = Image.new("RGB", (224, 224))
+        return empty_image, fake_plot, current_index, history, data, None
     if current_index == NUMBER_OF_IMAGES - 1:
         time_stamp = int(time.time())
 }
 """
+with gr.Blocks(css=newcss, theme=gr.themes.Soft()) as demo:
     data_gr = gr.State({})
     current_index = gr.State(-1)
     history = gr.State({})
+    gr.Markdown("# Help Us to Clean `ImageNet-Hard`!")
+    gr.Markdown("## Instructions")
+    gr.Markdown(
+        "Please enter your username and press `Load Samples`. The loading process might take up to a minute. Once the loading is done, you can start reviewing the samples."
+    )
+    gr.Markdown(
+        """For each image, please select one of the following options: `Accept`, `Not Sure!`, `Reject`.
+        - If you think any of the labels are correct, please select `Accept`.
+        - If you think none of the labels matching the image, please select `Reject`.
+        - If you are not sure about the label, please select `Not Sure!`.
+        You can refer to `Training samples` if you are not sure about the target label.
+        """
+    )
     random_str = "".join(
         random.choice(string.ascii_lowercase + string.digits) for _ in range(5)
     )
+    with gr.Column():
         username = gr.Textbox(label="Username", value=f"user-{random_str}")
         prepare_btn = gr.Button(value="Load Samples")