Spaces:
Paused
Paused
backup
Browse files
app.py
CHANGED
@@ -41,9 +41,8 @@ bad_items = open("./ex2.txt", "r").read().split("\n")
|
|
41 |
bad_items = [x.split(".")[0] for x in bad_items]
|
42 |
bad_items = [int(x) for x in bad_items if x != ""]
|
43 |
|
44 |
-
NUMBER_OF_IMAGES =
|
45 |
|
46 |
-
# download and extract folders
|
47 |
|
48 |
gdown.cached_download(
|
49 |
url="https://huggingface.co/datasets/taesiri/imagenet_hard_review_samples/resolve/main/data.zip",
|
@@ -81,8 +80,6 @@ def update_snapshot(username):
|
|
81 |
with open(file) as f:
|
82 |
data = json.load(f)
|
83 |
tdf = [data[x] for x in columns]
|
84 |
-
|
85 |
-
# add filename as a column
|
86 |
rows.append(tdf)
|
87 |
|
88 |
df = pd.DataFrame(rows, columns=columns)
|
@@ -98,15 +95,15 @@ def generate_dataset(username):
|
|
98 |
all_images = set(bad_items)
|
99 |
answered = set(df.id)
|
100 |
remaining = list(all_images - answered)
|
|
|
|
|
|
|
|
|
101 |
|
102 |
-
if
|
103 |
-
NUMBER_OF_IMAGES = len(remaining)
|
104 |
-
random_indices = list(remaining)
|
105 |
-
elif len(remaining) == 0:
|
106 |
return []
|
107 |
-
else:
|
108 |
-
random_indices = np.random.choice(remaining, NUMBER_OF_IMAGES, replace=False)
|
109 |
|
|
|
110 |
random_images = [imagenet_hard[int(i)]["image"] for i in random_indices]
|
111 |
random_gt_ids = [imagenet_hard[int(i)]["label"] for i in random_indices]
|
112 |
random_gt_labels = [imagenet_hard[int(x)]["english_label"] for x in random_indices]
|
@@ -147,8 +144,6 @@ qid_to_sample = {
|
|
147 |
int(x.split("/")[-1].split(".")[0].split("_")[0]): x for x in all_samples
|
148 |
}
|
149 |
|
150 |
-
# user-e3z5b
|
151 |
-
|
152 |
|
153 |
def get_training_samples(qid):
|
154 |
labels_id = imagenet_hard[int(qid)]["label"]
|
@@ -197,7 +192,9 @@ def preprocessing(data, current_index, history, username):
|
|
197 |
def update_app(decision, data, current_index, history, username):
|
198 |
global NUMBER_OF_IMAGES
|
199 |
if current_index == -1:
|
200 |
-
|
|
|
|
|
201 |
|
202 |
if current_index == NUMBER_OF_IMAGES - 1:
|
203 |
time_stamp = int(time.time())
|
@@ -290,18 +287,32 @@ newcss = """
|
|
290 |
}
|
291 |
"""
|
292 |
|
293 |
-
with gr.Blocks(css=newcss) as demo:
|
294 |
data_gr = gr.State({})
|
295 |
current_index = gr.State(-1)
|
296 |
history = gr.State({})
|
297 |
|
298 |
-
gr.Markdown("#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
|
300 |
random_str = "".join(
|
301 |
random.choice(string.ascii_lowercase + string.digits) for _ in range(5)
|
302 |
)
|
303 |
|
304 |
-
with gr.
|
305 |
username = gr.Textbox(label="Username", value=f"user-{random_str}")
|
306 |
prepare_btn = gr.Button(value="Load Samples")
|
307 |
|
|
|
41 |
bad_items = [x.split(".")[0] for x in bad_items]
|
42 |
bad_items = [int(x) for x in bad_items if x != ""]
|
43 |
|
44 |
+
NUMBER_OF_IMAGES = len(bad_items)
|
45 |
|
|
|
46 |
|
47 |
gdown.cached_download(
|
48 |
url="https://huggingface.co/datasets/taesiri/imagenet_hard_review_samples/resolve/main/data.zip",
|
|
|
80 |
with open(file) as f:
|
81 |
data = json.load(f)
|
82 |
tdf = [data[x] for x in columns]
|
|
|
|
|
83 |
rows.append(tdf)
|
84 |
|
85 |
df = pd.DataFrame(rows, columns=columns)
|
|
|
95 |
all_images = set(bad_items)
|
96 |
answered = set(df.id)
|
97 |
remaining = list(all_images - answered)
|
98 |
+
# shuffle remaining
|
99 |
+
random.shuffle(remaining)
|
100 |
+
|
101 |
+
NUMBER_OF_IMAGES = len(bad_items)
|
102 |
|
103 |
+
if NUMBER_OF_IMAGES == 0:
|
|
|
|
|
|
|
104 |
return []
|
|
|
|
|
105 |
|
106 |
+
random_indices = remaining
|
107 |
random_images = [imagenet_hard[int(i)]["image"] for i in random_indices]
|
108 |
random_gt_ids = [imagenet_hard[int(i)]["label"] for i in random_indices]
|
109 |
random_gt_labels = [imagenet_hard[int(x)]["english_label"] for x in random_indices]
|
|
|
144 |
int(x.split("/")[-1].split(".")[0].split("_")[0]): x for x in all_samples
|
145 |
}
|
146 |
|
|
|
|
|
147 |
|
148 |
def get_training_samples(qid):
|
149 |
labels_id = imagenet_hard[int(qid)]["label"]
|
|
|
192 |
def update_app(decision, data, current_index, history, username):
|
193 |
global NUMBER_OF_IMAGES
|
194 |
if current_index == -1:
|
195 |
+
fake_plot = string_to_image("Please Enter your username and load samples")
|
196 |
+
empty_image = Image.new("RGB", (224, 224))
|
197 |
+
return empty_image, fake_plot, current_index, history, data, None
|
198 |
|
199 |
if current_index == NUMBER_OF_IMAGES - 1:
|
200 |
time_stamp = int(time.time())
|
|
|
287 |
}
|
288 |
"""
|
289 |
|
290 |
+
with gr.Blocks(css=newcss, theme=gr.themes.Soft()) as demo:
|
291 |
data_gr = gr.State({})
|
292 |
current_index = gr.State(-1)
|
293 |
history = gr.State({})
|
294 |
|
295 |
+
gr.Markdown("# Help Us to Clean `ImageNet-Hard`!")
|
296 |
+
|
297 |
+
gr.Markdown("## Instructions")
|
298 |
+
gr.Markdown(
|
299 |
+
"Please enter your username and press `Load Samples`. The loading process might take up to a minute. Once the loading is done, you can start reviewing the samples."
|
300 |
+
)
|
301 |
+
gr.Markdown(
|
302 |
+
"""For each image, please select one of the following options: `Accept`, `Not Sure!`, `Reject`.
|
303 |
+
- If you think any of the labels are correct, please select `Accept`.
|
304 |
+
- If you think none of the labels matching the image, please select `Reject`.
|
305 |
+
- If you are not sure about the label, please select `Not Sure!`.
|
306 |
+
|
307 |
+
You can refer to `Training samples` if you are not sure about the target label.
|
308 |
+
"""
|
309 |
+
)
|
310 |
|
311 |
random_str = "".join(
|
312 |
random.choice(string.ascii_lowercase + string.digits) for _ in range(5)
|
313 |
)
|
314 |
|
315 |
+
with gr.Column():
|
316 |
username = gr.Textbox(label="Username", value=f"user-{random_str}")
|
317 |
prepare_btn = gr.Button(value="Load Samples")
|
318 |
|