taesiri commited on
Commit
43fb491
·
1 Parent(s): 17c1f31
Files changed (1) hide show
  1. app.py +891 -0
app.py ADDED
@@ -0,0 +1,891 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import base64
3
+ import json
4
+ import os
5
+ import shutil
6
+ import uuid
7
+ import glob
8
+ from huggingface_hub import CommitScheduler, HfApi, snapshot_download
9
+ from pathlib import Path
10
+ import git
11
+ from datasets import Dataset, Features, Value, Sequence, Image as ImageFeature
12
+ import threading
13
+ import time
14
+ from utils import process_and_push_dataset
15
+
16
+ api = HfApi(token=os.environ["HF_TOKEN"])
17
+ DATASET_REPO = "taesiri/BugsBunny-ManualEval-IntermediateSet"
18
+
19
+
20
+ # Download existing data from hub
21
+ def sync_with_hub():
22
+ """
23
+ Synchronize local data with the hub by cloning the dataset repo
24
+ """
25
+ print("Starting sync with hub...")
26
+ data_dir = Path("./data")
27
+ if data_dir.exists():
28
+ # Backup existing data
29
+ backup_dir = Path("./data_backup")
30
+ if backup_dir.exists():
31
+ shutil.rmtree(backup_dir)
32
+ shutil.copytree(data_dir, backup_dir)
33
+
34
+ # Clone/pull latest data from hub
35
+ repo_url = f"https://huggingface.co/datasets/{DATASET_REPO}"
36
+ hub_data_dir = Path("hub_data")
37
+
38
+ if hub_data_dir.exists():
39
+ # If repo exists, do a git pull
40
+ print("Pulling latest changes...")
41
+ repo = git.Repo(hub_data_dir)
42
+ origin = repo.remotes.origin
43
+ origin.pull()
44
+ else:
45
+ # Clone the repo
46
+ print("Cloning repository...")
47
+ git.Repo.clone_from(repo_url, hub_data_dir)
48
+
49
+ # Merge hub data with local data
50
+ hub_data_source = hub_data_dir / "data"
51
+ if hub_data_source.exists():
52
+ # Create data dir if it doesn't exist
53
+ data_dir.mkdir(exist_ok=True)
54
+
55
+ # Copy files from hub
56
+ for item in hub_data_source.glob("*"):
57
+ if item.is_dir():
58
+ dest = data_dir / item.name
59
+ if not dest.exists(): # Only copy if doesn't exist locally
60
+ shutil.copytree(item, dest)
61
+
62
+ # Clean up cloned repo
63
+ if hub_data_dir.exists():
64
+ shutil.rmtree(hub_data_dir)
65
+ print("Finished syncing with hub!")
66
+
67
+
68
+ scheduler = CommitScheduler(
69
+ repo_id=DATASET_REPO,
70
+ repo_type="dataset",
71
+ folder_path="./data",
72
+ path_in_repo="data",
73
+ every=1,
74
+ )
75
+
76
+
77
+ def load_existing_questions():
78
+ """
79
+ Load all existing questions from the data directory
80
+ Returns a list of tuples (question_id, question_preview)
81
+ """
82
+ questions = []
83
+ data_dir = "./data"
84
+ if not os.path.exists(data_dir):
85
+ return questions
86
+
87
+ for question_dir in glob.glob(os.path.join(data_dir, "*")):
88
+ if os.path.isdir(question_dir):
89
+ json_path = os.path.join(question_dir, "question.json")
90
+ if os.path.exists(json_path):
91
+ try:
92
+ with open(json_path, "r", encoding="utf-8") as f:
93
+ data = json.loads(f.read().strip())
94
+ question_id = os.path.basename(question_dir)
95
+ preview = (
96
+ f"{data['question'][:100]}..."
97
+ if len(data["question"]) > 100
98
+ else data["question"]
99
+ )
100
+ questions.append((question_id, f"{question_id}: {preview}"))
101
+ except:
102
+ continue
103
+
104
+ return sorted(questions, key=lambda x: x[1])
105
+
106
+
107
+ def load_question_data(question_id):
108
+ """
109
+ Load a specific question's data
110
+ Returns a tuple of all form fields
111
+ """
112
+ if not question_id:
113
+ return [None] * 26 + [None] # Changed from gr.State(value=None) to just None
114
+
115
+ # Extract the ID part before the colon from the dropdown selection
116
+ question_id = (
117
+ question_id.split(":")[0].strip() if ":" in question_id else question_id
118
+ )
119
+
120
+ json_path = os.path.join("./data", question_id, "question.json")
121
+ if not os.path.exists(json_path):
122
+ print(f"Question file not found: {json_path}")
123
+ return [None] * 26 + [None]
124
+
125
+ try:
126
+ with open(json_path, "r", encoding="utf-8") as f:
127
+ data = json.loads(f.read().strip())
128
+
129
+ # Load images
130
+ def load_image(image_path):
131
+ if not image_path:
132
+ return None
133
+ full_path = os.path.join(
134
+ "./data", question_id, os.path.basename(image_path)
135
+ )
136
+ return full_path if os.path.exists(full_path) else None
137
+
138
+ question_images = data.get("question_images", [])
139
+ rationale_images = data.get("rationale_images", [])
140
+
141
+ return [
142
+ data["author_info"]["name"],
143
+ data["author_info"]["email_address"],
144
+ data["author_info"]["institution"],
145
+ (
146
+ ",".join(data["question_categories"])
147
+ if isinstance(data["question_categories"], list)
148
+ else data["question_categories"]
149
+ ),
150
+ data.get("subquestions_1_text", "N/A"),
151
+ data.get("subquestions_1_answer", "N/A"),
152
+ data.get("subquestions_2_text", "N/A"),
153
+ data.get("subquestions_2_answer", "N/A"),
154
+ data.get("subquestions_3_text", "N/A"),
155
+ data.get("subquestions_3_answer", "N/A"),
156
+ data.get("subquestions_4_text", "N/A"),
157
+ data.get("subquestions_4_answer", "N/A"),
158
+ data.get("subquestions_5_text", "N/A"),
159
+ data.get("subquestions_5_answer", "N/A"),
160
+ data["question"],
161
+ data["final_answer"],
162
+ data.get("rationale_text", ""),
163
+ data["image_attribution"],
164
+ load_image(question_images[0] if question_images else None),
165
+ load_image(question_images[1] if len(question_images) > 1 else None),
166
+ load_image(question_images[2] if len(question_images) > 2 else None),
167
+ load_image(question_images[3] if len(question_images) > 3 else None),
168
+ load_image(rationale_images[0] if rationale_images else None),
169
+ load_image(rationale_images[1] if len(rationale_images) > 1 else None),
170
+ question_id, # Changed from gr.State(value=question_id) to just question_id
171
+ ]
172
+ except Exception as e:
173
+ print(f"Error loading question {question_id}: {str(e)}")
174
+ return [None] * 26 + [None]
175
+
176
+
177
+ def generate_json_files(
178
+ name,
179
+ email_address,
180
+ institution,
181
+ question_categories,
182
+ subquestion_1_text,
183
+ subquestion_1_answer,
184
+ subquestion_2_text,
185
+ subquestion_2_answer,
186
+ subquestion_3_text,
187
+ subquestion_3_answer,
188
+ subquestion_4_text,
189
+ subquestion_4_answer,
190
+ subquestion_5_text,
191
+ subquestion_5_answer,
192
+ question,
193
+ final_answer,
194
+ rationale_text,
195
+ image_attribution,
196
+ image1,
197
+ image2,
198
+ image3,
199
+ image4,
200
+ rationale_image1,
201
+ rationale_image2,
202
+ existing_id=None, # New parameter for updating existing questions
203
+ ):
204
+ """
205
+ For each request:
206
+ 1) Create a unique folder under ./data/ (or use existing if updating)
207
+ 2) Copy uploaded images (question + rationale) into that folder
208
+ 3) Produce JSON file with question data
209
+ 4) Return path to the JSON file
210
+ """
211
+
212
+ # Use existing ID if updating, otherwise generate new one
213
+ request_id = existing_id if existing_id else str(uuid.uuid4())
214
+
215
+ # Create parent data folder if it doesn't exist
216
+ parent_data_folder = "./data"
217
+ os.makedirs(parent_data_folder, exist_ok=True)
218
+
219
+ # Create or clean request folder
220
+ request_folder = os.path.join(parent_data_folder, request_id)
221
+ if os.path.exists(request_folder):
222
+ # If updating, remove old image files but only if new images are provided
223
+ for f in glob.glob(os.path.join(request_folder, "*.png")):
224
+ # Only remove if we have a new image to replace it
225
+ filename = os.path.basename(f)
226
+ if (
227
+ ("question_image_1" in filename and image1)
228
+ or ("question_image_2" in filename and image2)
229
+ or ("question_image_3" in filename and image3)
230
+ or ("question_image_4" in filename and image4)
231
+ or ("rationale_image_1" in filename and rationale_image1)
232
+ or ("rationale_image_2" in filename and rationale_image2)
233
+ ):
234
+ os.remove(f)
235
+ else:
236
+ os.makedirs(request_folder)
237
+
238
+ # Convert None strings
239
+ def safe_str(val):
240
+ return val if val is not None else ""
241
+
242
+ name = safe_str(name)
243
+ email_address = safe_str(email_address)
244
+ institution = safe_str(institution)
245
+ image_attribution = safe_str(image_attribution)
246
+ # Convert question_categories to list
247
+ question_categories = (
248
+ [cat.strip() for cat in safe_str(question_categories).split(",")]
249
+ if question_categories
250
+ else []
251
+ )
252
+ subquestion_1_text = safe_str(subquestion_1_text)
253
+ subquestion_1_answer = safe_str(subquestion_1_answer)
254
+ subquestion_2_text = safe_str(subquestion_2_text)
255
+ subquestion_2_answer = safe_str(subquestion_2_answer)
256
+ subquestion_3_text = safe_str(subquestion_3_text)
257
+ subquestion_3_answer = safe_str(subquestion_3_answer)
258
+ subquestion_4_text = safe_str(subquestion_4_text)
259
+ subquestion_4_answer = safe_str(subquestion_4_answer)
260
+ subquestion_5_text = safe_str(subquestion_5_text)
261
+ subquestion_5_answer = safe_str(subquestion_5_answer)
262
+ question = safe_str(question)
263
+ final_answer = safe_str(final_answer)
264
+ rationale_text = safe_str(rationale_text)
265
+
266
+ # Collect image-like fields so we can process them in one loop
267
+ all_images = [
268
+ ("question_image_1", image1),
269
+ ("question_image_2", image2),
270
+ ("question_image_3", image3),
271
+ ("question_image_4", image4),
272
+ ("rationale_image_1", rationale_image1),
273
+ ("rationale_image_2", rationale_image2),
274
+ ]
275
+
276
+ # If updating, load existing images that haven't been replaced
277
+ if existing_id:
278
+ json_path = os.path.join(parent_data_folder, existing_id, "question.json")
279
+ if os.path.exists(json_path):
280
+ try:
281
+ with open(json_path, "r", encoding="utf-8") as f:
282
+ existing_data = json.loads(f.read().strip())
283
+ existing_question_images = existing_data.get("question_images", [])
284
+ existing_rationale_images = existing_data.get(
285
+ "rationale_images", []
286
+ )
287
+
288
+ # Keep existing images if no new ones provided
289
+ if not image1 and existing_question_images:
290
+ all_images[0] = (
291
+ "question_image_1",
292
+ existing_question_images[0],
293
+ )
294
+ if not image2 and len(existing_question_images) > 1:
295
+ all_images[1] = (
296
+ "question_image_2",
297
+ existing_question_images[1],
298
+ )
299
+ if not image3 and len(existing_question_images) > 2:
300
+ all_images[2] = (
301
+ "question_image_3",
302
+ existing_question_images[2],
303
+ )
304
+ if not image4 and len(existing_question_images) > 3:
305
+ all_images[3] = (
306
+ "question_image_4",
307
+ existing_question_images[3],
308
+ )
309
+ if not rationale_image1 and existing_rationale_images:
310
+ all_images[4] = (
311
+ "rationale_image_1",
312
+ existing_rationale_images[0],
313
+ )
314
+ if not rationale_image2 and len(existing_rationale_images) > 1:
315
+ all_images[5] = (
316
+ "rationale_image_2",
317
+ existing_rationale_images[1],
318
+ )
319
+ except:
320
+ pass
321
+
322
+ files_list = []
323
+ for idx, (img_label, img_obj) in enumerate(all_images):
324
+ if img_obj is not None:
325
+ temp_path = os.path.join(request_folder, f"{img_label}.png")
326
+ if isinstance(img_obj, str):
327
+ # If image is a file path
328
+ if os.path.exists(img_obj):
329
+ if (
330
+ img_obj != temp_path
331
+ ): # Only copy if source and destination are different
332
+ shutil.copy2(img_obj, temp_path)
333
+ files_list.append((img_label, temp_path))
334
+ else:
335
+ # If image is a numpy array
336
+ gr.processing_utils.save_image(img_obj, temp_path)
337
+ files_list.append((img_label, temp_path))
338
+
339
+ # Build user content in two flavors: local file paths vs base64
340
+ # We'll store text fields as simple dictionaries, and then images separately.
341
+ content_list_urls = [
342
+ {"type": "field", "label": "name", "value": name},
343
+ {"type": "field", "label": "email_address", "value": email_address},
344
+ {"type": "field", "label": "institution", "value": institution},
345
+ {"type": "field", "label": "question_categories", "value": question_categories},
346
+ {"type": "field", "label": "image_attribution", "value": image_attribution},
347
+ {"type": "field", "label": "subquestion_1_text", "value": subquestion_1_text},
348
+ {
349
+ "type": "field",
350
+ "label": "subquestion_1_answer",
351
+ "value": subquestion_1_answer,
352
+ },
353
+ {"type": "field", "label": "subquestion_2_text", "value": subquestion_2_text},
354
+ {
355
+ "type": "field",
356
+ "label": "subquestion_2_answer",
357
+ "value": subquestion_2_answer,
358
+ },
359
+ {"type": "field", "label": "subquestion_3_text", "value": subquestion_3_text},
360
+ {
361
+ "type": "field",
362
+ "label": "subquestion_3_answer",
363
+ "value": subquestion_3_answer,
364
+ },
365
+ {"type": "field", "label": "subquestion_4_text", "value": subquestion_4_text},
366
+ {
367
+ "type": "field",
368
+ "label": "subquestion_4_answer",
369
+ "value": subquestion_4_answer,
370
+ },
371
+ {"type": "field", "label": "subquestion_5_text", "value": subquestion_5_text},
372
+ {
373
+ "type": "field",
374
+ "label": "subquestion_5_answer",
375
+ "value": subquestion_5_answer,
376
+ },
377
+ {"type": "field", "label": "question", "value": question},
378
+ {"type": "field", "label": "final_answer", "value": final_answer},
379
+ {"type": "field", "label": "rationale_text", "value": rationale_text},
380
+ ]
381
+
382
+ # Append image references
383
+ for img_label, file_path in files_list:
384
+ # 1) Local path (URL) version
385
+ rel_path = os.path.join(".", os.path.basename(file_path))
386
+ content_list_urls.append(
387
+ {
388
+ "type": "image_url",
389
+ "label": img_label,
390
+ "image_url": {"url": {"data:image/png;path": rel_path}},
391
+ }
392
+ )
393
+
394
+ # Build the final JSON structures for each approach
395
+ # A) URLs JSON
396
+ item_urls = {
397
+ "custom_id": f"question___{request_id}",
398
+ # Metadata at top level
399
+ "author_info": {
400
+ "name": name,
401
+ "email_address": email_address,
402
+ "institution": institution,
403
+ },
404
+ "question_categories": question_categories,
405
+ "image_attribution": image_attribution,
406
+ "question": question,
407
+ "question_images": [
408
+ item["image_url"]["url"]["data:image/png;path"]
409
+ for item in content_list_urls
410
+ if item.get("type") == "image_url"
411
+ and "question_image" in item.get("label", "")
412
+ ],
413
+ "final_answer": final_answer,
414
+ "rationale_text": rationale_text,
415
+ "rationale_images": [
416
+ item["image_url"]["url"]["data:image/png;path"]
417
+ for item in content_list_urls
418
+ if item.get("type") == "image_url"
419
+ and "rationale_image" in item.get("label", "")
420
+ ],
421
+ "subquestions_1_text": subquestion_1_text,
422
+ "subquestions_1_answer": subquestion_1_answer,
423
+ "subquestions_2_text": subquestion_2_text,
424
+ "subquestions_2_answer": subquestion_2_answer,
425
+ "subquestions_3_text": subquestion_3_text,
426
+ "subquestions_3_answer": subquestion_3_answer,
427
+ "subquestions_4_text": subquestion_4_text,
428
+ "subquestions_4_answer": subquestion_4_answer,
429
+ "subquestions_5_text": subquestion_5_text,
430
+ "subquestions_5_answer": subquestion_5_answer,
431
+ }
432
+
433
+ # Convert each to JSON line format
434
+ urls_json_line = json.dumps(item_urls, ensure_ascii=False)
435
+
436
+ # 3) Write out JSON file in request_folder
437
+ urls_jsonl_path = os.path.join(request_folder, "question.json")
438
+
439
+ with open(urls_jsonl_path, "w", encoding="utf-8") as f:
440
+ f.write(urls_json_line + "\n")
441
+
442
+ return urls_jsonl_path
443
+
444
+
445
+ # Build the Gradio app
446
+ with gr.Blocks() as demo:
447
+ gr.Markdown("# BugsBunny Eval Builder")
448
+ # Add a global state variable at the top level
449
+ loaded_question_id = gr.State()
450
+
451
+ with gr.Accordion("Instructions", open=True):
452
+ gr.HTML(
453
+ """
454
+ <h3>Instructions:</h3>
455
+ <p>Welcome to the Hugging Face space for collecting questions for the BugsBunny benchmark.</p>
456
+ TBA
457
+ """
458
+ )
459
+ gr.Markdown("## Author Information")
460
+ with gr.Row():
461
+ name_input = gr.Textbox(label="Name", lines=1)
462
+ email_address_input = gr.Textbox(label="Email Address", lines=1)
463
+ institution_input = gr.Textbox(
464
+ label="Institution or 'Independent'",
465
+ lines=1,
466
+ placeholder="e.g. MIT, Google, Independent, etc.",
467
+ )
468
+
469
+ gr.Markdown("## Question Information")
470
+
471
+ # image
472
+ gr.Markdown("### Images Attribution")
473
+ image_attribution_input = gr.Textbox(
474
+ label="Images Attribution",
475
+ lines=1,
476
+ placeholder="Include attribution information for the images used in this question (or 'Own' if you created/took them)",
477
+ )
478
+
479
+ # Question Images - Individual Tabs
480
+ with gr.Tabs():
481
+ with gr.Tab("Image 1"):
482
+ image1 = gr.Image(label="Question Image 1", type="filepath")
483
+ with gr.Tab("Image 2 (Optional)"):
484
+ image2 = gr.Image(label="Question Image 2", type="filepath")
485
+ with gr.Tab("Image 3 (Optional)"):
486
+ image3 = gr.Image(label="Question Image 3", type="filepath")
487
+ with gr.Tab("Image 4 (Optional)"):
488
+ image4 = gr.Image(label="Question Image 4", type="filepath")
489
+
490
+ question_input = gr.Textbox(
491
+ label="Question", lines=15, placeholder="Type your question here..."
492
+ )
493
+
494
+ question_categories_input = gr.Textbox(
495
+ label="Question Categories",
496
+ lines=1,
497
+ placeholder="Comma-separated tags, e.g. math, geometry",
498
+ )
499
+
500
+ # Answer Section
501
+ gr.Markdown("## Answer ")
502
+
503
+ final_answer_input = gr.Textbox(
504
+ label="Final Answer",
505
+ lines=1,
506
+ placeholder="Enter the short/concise final answer...",
507
+ )
508
+
509
+ rationale_text_input = gr.Textbox(
510
+ label="Rationale Text",
511
+ lines=5,
512
+ placeholder="Enter the reasoning or explanation for the answer...",
513
+ )
514
+
515
+ # Rationale Images - Individual Tabs
516
+ with gr.Tabs():
517
+ with gr.Tab("Rationale 1 (Optional)"):
518
+ rationale_image1 = gr.Image(label="Rationale Image 1", type="filepath")
519
+ with gr.Tab("Rationale 2 (Optional)"):
520
+ rationale_image2 = gr.Image(label="Rationale Image 2", type="filepath")
521
+
522
+ # Subquestions Section
523
+ gr.Markdown("## Subquestions")
524
+ with gr.Row():
525
+ subquestion_1_text_input = gr.Textbox(
526
+ label="Subquestion 1 Text",
527
+ lines=2,
528
+ placeholder="First sub-question...",
529
+ value="N/A",
530
+ )
531
+ subquestion_1_answer_input = gr.Textbox(
532
+ label="Subquestion 1 Answer",
533
+ lines=2,
534
+ placeholder="Answer to sub-question 1...",
535
+ value="N/A",
536
+ )
537
+
538
+ with gr.Row():
539
+ subquestion_2_text_input = gr.Textbox(
540
+ label="Subquestion 2 Text",
541
+ lines=2,
542
+ placeholder="Second sub-question...",
543
+ value="N/A",
544
+ )
545
+ subquestion_2_answer_input = gr.Textbox(
546
+ label="Subquestion 2 Answer",
547
+ lines=2,
548
+ placeholder="Answer to sub-question 2...",
549
+ value="N/A",
550
+ )
551
+
552
+ with gr.Row():
553
+ subquestion_3_text_input = gr.Textbox(
554
+ label="Subquestion 3 Text",
555
+ lines=2,
556
+ placeholder="Third sub-question...",
557
+ value="N/A",
558
+ )
559
+ subquestion_3_answer_input = gr.Textbox(
560
+ label="Subquestion 3 Answer",
561
+ lines=2,
562
+ placeholder="Answer to sub-question 3...",
563
+ value="N/A",
564
+ )
565
+
566
+ with gr.Row():
567
+ subquestion_4_text_input = gr.Textbox(
568
+ label="Subquestion 4 Text",
569
+ lines=2,
570
+ placeholder="Fourth sub-question...",
571
+ value="N/A",
572
+ )
573
+ subquestion_4_answer_input = gr.Textbox(
574
+ label="Subquestion 4 Answer",
575
+ lines=2,
576
+ placeholder="Answer to sub-question 4...",
577
+ value="N/A",
578
+ )
579
+
580
+ with gr.Row():
581
+ subquestion_5_text_input = gr.Textbox(
582
+ label="Subquestion 5 Text",
583
+ lines=2,
584
+ placeholder="Fifth sub-question...",
585
+ value="N/A",
586
+ )
587
+ subquestion_5_answer_input = gr.Textbox(
588
+ label="Subquestion 5 Answer",
589
+ lines=2,
590
+ placeholder="Answer to sub-question 5...",
591
+ value="N/A",
592
+ )
593
+
594
+ with gr.Row():
595
+ submit_button = gr.Button("Submit")
596
+ clear_button = gr.Button("Clear Form")
597
+
598
+ with gr.Row():
599
+ output_file_urls = gr.File(
600
+ label="Download URLs JSON", interactive=False, visible=False
601
+ )
602
+ output_file_base64 = gr.File(
603
+ label="Download Base64 JSON", interactive=False, visible=False
604
+ )
605
+
606
+ with gr.Accordion("Load Existing Question", open=False):
607
+ gr.Markdown("## Load Existing Question")
608
+
609
+ with gr.Row():
610
+ existing_questions = gr.Dropdown(
611
+ label="Load Existing Question",
612
+ choices=load_existing_questions(),
613
+ type="value",
614
+ allow_custom_value=False,
615
+ )
616
+ refresh_button = gr.Button("🔄 Refresh")
617
+ load_button = gr.Button("Load Selected Question")
618
+
619
+ def refresh_questions():
620
+ return gr.Dropdown(choices=load_existing_questions())
621
+
622
+ refresh_button.click(fn=refresh_questions, inputs=[], outputs=[existing_questions])
623
+
624
+ # Load button functionality
625
+ load_button.click(
626
+ fn=load_question_data,
627
+ inputs=[existing_questions],
628
+ outputs=[
629
+ name_input,
630
+ email_address_input,
631
+ institution_input,
632
+ question_categories_input,
633
+ subquestion_1_text_input,
634
+ subquestion_1_answer_input,
635
+ subquestion_2_text_input,
636
+ subquestion_2_answer_input,
637
+ subquestion_3_text_input,
638
+ subquestion_3_answer_input,
639
+ subquestion_4_text_input,
640
+ subquestion_4_answer_input,
641
+ subquestion_5_text_input,
642
+ subquestion_5_answer_input,
643
+ question_input,
644
+ final_answer_input,
645
+ rationale_text_input,
646
+ image_attribution_input,
647
+ image1,
648
+ image2,
649
+ image3,
650
+ image4,
651
+ rationale_image1,
652
+ rationale_image2,
653
+ loaded_question_id,
654
+ ],
655
+ )
656
+
657
+ # Modify validate_and_generate to handle updates
658
+ def validate_and_generate(
659
+ nm,
660
+ em,
661
+ inst,
662
+ qcats,
663
+ sq1t,
664
+ sq1a,
665
+ sq2t,
666
+ sq2a,
667
+ sq3t,
668
+ sq3a,
669
+ sq4t,
670
+ sq4a,
671
+ sq5t,
672
+ sq5a,
673
+ q,
674
+ fa,
675
+ rt,
676
+ ia,
677
+ i1,
678
+ i2,
679
+ i3,
680
+ i4,
681
+ ri1,
682
+ ri2,
683
+ stored_question_id, # Add this parameter
684
+ ):
685
+ # Validation code remains the same
686
+ missing_fields = []
687
+ if not nm or not nm.strip():
688
+ missing_fields.append("Name")
689
+ if not em or not em.strip():
690
+ missing_fields.append("Email Address")
691
+ if not inst or not inst.strip():
692
+ missing_fields.append("Institution")
693
+ if not q or not q.strip():
694
+ missing_fields.append("Question")
695
+ if not fa or not fa.strip():
696
+ missing_fields.append("Final Answer")
697
+ if not i1:
698
+ missing_fields.append("First Question Image")
699
+ if not ia or not ia.strip():
700
+ missing_fields.append("Image Attribution")
701
+ if not sq1t or not sq1t.strip() or not sq1a or not sq1a.strip():
702
+ missing_fields.append("First Sub-question and Answer")
703
+ if not sq2t or not sq2t.strip() or not sq2a or not sq2a.strip():
704
+ missing_fields.append("Second Sub-question and Answer")
705
+ if not sq3t or not sq3t.strip() or not sq3a or not sq3a.strip():
706
+ missing_fields.append("Third Sub-question and Answer")
707
+ if not sq4t or not sq4t.strip() or not sq4a or not sq4a.strip():
708
+ missing_fields.append("Fourth Sub-question and Answer")
709
+ if not sq5t or not sq5t.strip() or not sq5a or not sq5a.strip():
710
+ missing_fields.append("Fifth Sub-question and Answer")
711
+
712
+ if missing_fields:
713
+ warning_msg = f"Required fields missing: {', '.join(missing_fields)} ⛔️"
714
+ gr.Warning(warning_msg, duration=5)
715
+ return gr.Button(interactive=True), gr.Dropdown(
716
+ choices=load_existing_questions()
717
+ )
718
+
719
+ # Use the stored ID instead of extracting from dropdown
720
+ existing_id = stored_question_id if stored_question_id else None
721
+
722
+ results = generate_json_files(
723
+ nm,
724
+ em,
725
+ inst,
726
+ qcats,
727
+ sq1t,
728
+ sq1a,
729
+ sq2t,
730
+ sq2a,
731
+ sq3t,
732
+ sq3a,
733
+ sq4t,
734
+ sq4a,
735
+ sq5t,
736
+ sq5a,
737
+ q,
738
+ fa,
739
+ rt,
740
+ ia,
741
+ i1,
742
+ i2,
743
+ i3,
744
+ i4,
745
+ ri1,
746
+ ri2,
747
+ existing_id,
748
+ )
749
+
750
+ action = "updated" if existing_id else "created"
751
+ gr.Info(
752
+ f"Dataset item {action} successfully! 🎉 Clear the form to submit a new one"
753
+ )
754
+
755
+ return gr.update(interactive=False), gr.Dropdown(
756
+ choices=load_existing_questions()
757
+ )
758
+
759
+ # Update submit button click handler to match inputs/outputs correctly
760
+ submit_button.click(
761
+ fn=validate_and_generate,
762
+ inputs=[
763
+ name_input,
764
+ email_address_input,
765
+ institution_input,
766
+ question_categories_input,
767
+ subquestion_1_text_input,
768
+ subquestion_1_answer_input,
769
+ subquestion_2_text_input,
770
+ subquestion_2_answer_input,
771
+ subquestion_3_text_input,
772
+ subquestion_3_answer_input,
773
+ subquestion_4_text_input,
774
+ subquestion_4_answer_input,
775
+ subquestion_5_text_input,
776
+ subquestion_5_answer_input,
777
+ question_input,
778
+ final_answer_input,
779
+ rationale_text_input,
780
+ image_attribution_input,
781
+ image1,
782
+ image2,
783
+ image3,
784
+ image4,
785
+ rationale_image1,
786
+ rationale_image2,
787
+ loaded_question_id,
788
+ ],
789
+ outputs=[submit_button, existing_questions],
790
+ )
791
+
792
+ # Fix the clear_form_fields function
793
+ def clear_form_fields(name, email, inst, *args):
794
+ outputs = [
795
+ name, # Preserve name
796
+ email, # Preserve email
797
+ inst, # Preserve institution
798
+ gr.update(value=""), # Clear question categories
799
+ gr.update(value="N/A"), # Reset subquestion 1 text to N/A
800
+ gr.update(value="N/A"), # Reset subquestion 1 answer to N/A
801
+ gr.update(value="N/A"), # Reset subquestion 2 text to N/A
802
+ gr.update(value="N/A"), # Reset subquestion 2 answer to N/A
803
+ gr.update(value="N/A"), # Reset subquestion 3 text to N/A
804
+ gr.update(value="N/A"), # Reset subquestion 3 answer to N/A
805
+ gr.update(value="N/A"), # Reset subquestion 4 text to N/A
806
+ gr.update(value="N/A"), # Reset subquestion 4 answer to N/A
807
+ gr.update(value="N/A"), # Reset subquestion 5 text to N/A
808
+ gr.update(value="N/A"), # Reset subquestion 5 answer to N/A
809
+ gr.update(value=""), # Clear question
810
+ gr.update(value=""), # Clear final answer
811
+ gr.update(value=""), # Clear rationale text
812
+ gr.update(value=""), # Clear image attribution
813
+ None, # Clear image1
814
+ None, # Clear image2
815
+ None, # Clear image3
816
+ None, # Clear image4
817
+ None, # Clear rationale image1
818
+ None, # Clear rationale image2
819
+ None, # Clear output file urls
820
+ gr.Button(interactive=True), # Re-enable submit button
821
+ gr.update(choices=load_existing_questions()), # Update dropdown
822
+ None, # Changed from gr.State(value=None) to just None
823
+ ]
824
+ gr.Info("Form cleared! Ready for new submission 🔄")
825
+ return outputs
826
+
827
+ # Update the clear button click handler
828
+ clear_button.click(
829
+ fn=clear_form_fields,
830
+ inputs=[
831
+ name_input,
832
+ email_address_input,
833
+ institution_input,
834
+ ],
835
+ outputs=[
836
+ name_input,
837
+ email_address_input,
838
+ institution_input,
839
+ question_categories_input,
840
+ subquestion_1_text_input,
841
+ subquestion_1_answer_input,
842
+ subquestion_2_text_input,
843
+ subquestion_2_answer_input,
844
+ subquestion_3_text_input,
845
+ subquestion_3_answer_input,
846
+ subquestion_4_text_input,
847
+ subquestion_4_answer_input,
848
+ subquestion_5_text_input,
849
+ subquestion_5_answer_input,
850
+ question_input,
851
+ final_answer_input,
852
+ rationale_text_input,
853
+ image_attribution_input,
854
+ image1,
855
+ image2,
856
+ image3,
857
+ image4,
858
+ rationale_image1,
859
+ rationale_image2,
860
+ output_file_urls,
861
+ submit_button,
862
+ existing_questions,
863
+ loaded_question_id,
864
+ ],
865
+ )
866
+
867
+
868
+ def process_thread():
869
+ while True:
870
+ try:
871
+ process_and_push_dataset(
872
+ "./data",
873
+ "taesiri/BugsBunny-ManualEvaluationSet",
874
+ token=os.environ["HF_TOKEN"],
875
+ private=True,
876
+ )
877
+ except Exception as e:
878
+ print(f"Error in process thread: {e}")
879
+ time.sleep(120) # Sleep for 2 minutes
880
+
881
+
882
+ if __name__ == "__main__":
883
+ print("Initializing app...")
884
+ sync_with_hub() # Sync before launching the app
885
+ print("Starting Gradio interface...")
886
+
887
+ # Start the processing thread when the app starts
888
+ processing_thread = threading.Thread(target=process_thread, daemon=True)
889
+ processing_thread.start()
890
+
891
+ demo.launch()