throaway2854 commited on
Commit
b1c343c
·
verified ·
1 Parent(s): 9e78451

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -366
app.py CHANGED
@@ -1,393 +1,91 @@
 
1
  import gradio as gr
 
2
  import os
3
  import zipfile
4
- import json
5
- from io import BytesIO
6
- import base64
7
- from PIL import Image
8
- import uuid
9
- import tempfile
10
- import numpy as np
11
-
12
- def save_dataset_to_zip(dataset_name, dataset):
13
- temp_dir = tempfile.mkdtemp()
14
- dataset_path = os.path.join(temp_dir, dataset_name)
15
- os.makedirs(dataset_path, exist_ok=True)
16
- images_dir = os.path.join(dataset_path, 'images')
17
- os.makedirs(images_dir, exist_ok=True)
18
-
19
- annotations = []
20
- for idx, entry in enumerate(dataset):
21
- image_data = entry['image']
22
- prompt = entry['prompt']
23
-
24
- # Save image to images directory
25
- image_filename = f"{uuid.uuid4().hex}.png"
26
- image_path = os.path.join(images_dir, image_filename)
27
- # Decode the base64 image data
28
- image = Image.open(BytesIO(base64.b64decode(image_data.split(",")[1])))
29
- image.save(image_path)
30
-
31
- # Add annotation
32
- annotations.append({
33
- 'file_name': os.path.join('images', image_filename),
34
- 'text': prompt
35
- })
36
-
37
- # Save annotations to JSONL file
38
- annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
39
- with open(annotations_path, 'w') as f:
40
- for ann in annotations:
41
- f.write(json.dumps(ann) + '\n')
42
-
43
- # Create a zip file with the dataset_name as the top-level folder
44
- zip_buffer = BytesIO()
45
- with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
46
- for root, dirs, files in os.walk(dataset_path):
47
- for file in files:
48
- abs_file = os.path.join(root, file)
49
- rel_file = os.path.relpath(abs_file, temp_dir)
50
- zipf.write(abs_file, rel_file)
51
-
52
- zip_buffer.seek(0)
53
- return zip_buffer
54
-
55
- def load_dataset_from_zip(zip_file_path):
56
- temp_dir = tempfile.mkdtemp()
57
- try:
58
- with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
59
- zip_ref.extractall(temp_dir)
60
-
61
- # Get dataset name from zip file name
62
- dataset_name_guess = os.path.splitext(os.path.basename(zip_file_path))[0]
63
- dataset_path = os.path.join(temp_dir, dataset_name_guess)
64
-
65
- if os.path.exists(dataset_path):
66
- dataset_name = dataset_name_guess
67
- else:
68
- # If the dataset_name directory doesn't exist, try to find the top-level directory
69
- entries = [entry for entry in os.listdir(temp_dir) if os.path.isdir(os.path.join(temp_dir, entry))]
70
- if entries:
71
- dataset_name = entries[0]
72
- dataset_path = os.path.join(temp_dir, dataset_name)
73
- else:
74
- # Files are directly in temp_dir
75
- dataset_name = dataset_name_guess
76
- dataset_path = temp_dir
77
-
78
- annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
79
- dataset = []
80
-
81
- if os.path.exists(annotations_path):
82
- with open(annotations_path, 'r') as f:
83
- for line in f:
84
- ann = json.loads(line)
85
- file_name = ann['file_name']
86
- prompt = ann['text']
87
- image_path = os.path.join(dataset_path, file_name)
88
-
89
- # Read image and convert to base64
90
- with open(image_path, 'rb') as img_f:
91
- image_bytes = img_f.read()
92
- encoded = base64.b64encode(image_bytes).decode()
93
- mime_type = "image/png"
94
- image_data = f"data:{mime_type};base64,{encoded}"
95
-
96
- dataset.append({
97
- 'image': image_data,
98
- 'prompt': prompt
99
- })
100
- else:
101
- # If annotations file not found
102
- return None, []
103
-
104
- return dataset_name, dataset
105
- except Exception as e:
106
- print(f"Error loading dataset: {e}")
107
- return None, []
108
-
109
- def display_dataset_html(dataset, page_number=0, items_per_page=2):
110
- if dataset:
111
- start_idx = page_number * items_per_page
112
- end_idx = start_idx + items_per_page
113
- dataset_slice = dataset[start_idx:end_idx]
114
- html_content = '''
115
- <div style="display: flex; overflow-x: auto; padding: 10px; border: 1px solid #ccc;">
116
- '''
117
- for idx_offset, entry in enumerate(dataset_slice):
118
- idx = start_idx + idx_offset
119
- image_data = entry['image']
120
- prompt = entry['prompt']
121
- html_content += f"""
122
- <div style="display: flex; flex-direction: column; align-items: center; margin-right: 20px;">
123
- <div style="margin-bottom: 5px;">{idx}</div>
124
- <img src="{image_data}" alt="Image {idx}" style="max-height: 150px;"/>
125
- <div style="max-width: 150px; word-wrap: break-word; text-align: center;">{prompt}</div>
126
- </div>
127
- """
128
- html_content += '</div>'
129
- return html_content
130
- else:
131
- return "<div>No entries in dataset.</div>"
132
-
133
- #Interface
134
- with gr.Blocks() as demo:
135
- gr.Markdown("<h1 style='text-align: center; margin-bottom: 1px;'>Dataset Creator</h1>")
136
- gr.Markdown("You must create/upload a dataset before selecting one")
137
- datasets = gr.State({})
138
- current_dataset_name = gr.State("")
139
- current_page_number = gr.State(0)
140
-
141
- # Top-level components
142
- with gr.Column():
143
- dataset_selector = gr.Dropdown(label="Select Dataset", interactive=True)
144
- message_box = gr.Textbox(interactive=False, label="Message")
145
-
146
- # Dataset Viewer and Pagination Controls at the Bottom
147
- with gr.Column():
148
- gr.Markdown("### Dataset Viewer")
149
- dataset_html = gr.HTML()
150
- with gr.Row():
151
- prev_button = gr.Button("Previous Page")
152
- next_button = gr.Button("Next Page")
153
-
154
- # Tabs
155
- with gr.Tabs():
156
- with gr.TabItem("Create / Upload Dataset"):
157
- with gr.Row():
158
- with gr.Column():
159
- gr.Markdown("### Create a New Dataset")
160
- dataset_name_input = gr.Textbox(label="New Dataset Name")
161
- create_button = gr.Button("Create Dataset")
162
- with gr.Column():
163
- gr.Markdown("### Upload Existing Dataset")
164
- upload_input = gr.File(label="Upload Dataset Zip", type="filepath", file_types=['.zip'])
165
- upload_button = gr.Button("Upload Dataset")
166
-
167
- def create_dataset(name, datasets):
168
- if not name:
169
- return gr.update(), "Please enter a dataset name."
170
- if name in datasets:
171
- return gr.update(), f"Dataset '{name}' already exists."
172
- datasets[name] = []
173
- return gr.update(choices=list(datasets.keys()), value=name), f"Dataset '{name}' created."
174
-
175
- create_button.click(
176
- create_dataset,
177
- inputs=[dataset_name_input, datasets],
178
- outputs=[dataset_selector, message_box]
179
- )
180
-
181
- def upload_dataset(zip_file_path, datasets):
182
- if not zip_file_path:
183
- return gr.update(), "Please upload a zip file."
184
- dataset_name, dataset = load_dataset_from_zip(zip_file_path)
185
- if dataset_name is None:
186
- return gr.update(), "Failed to load dataset from zip file."
187
- if dataset_name in datasets:
188
- return gr.update(), f"Dataset '{dataset_name}' already exists."
189
- datasets[dataset_name] = dataset
190
- return gr.update(choices=list(datasets.keys()), value=dataset_name), f"Dataset '{dataset_name}' uploaded."
191
 
192
- upload_button.click(
193
- upload_dataset,
194
- inputs=[upload_input, datasets],
195
- outputs=[dataset_selector, message_box]
196
- )
197
 
198
- with gr.TabItem("Add Entry"):
199
- with gr.Row():
200
- image_input = gr.Image(label="Upload Image", type="numpy")
201
- prompt_input = gr.Textbox(label="Prompt")
202
- add_button = gr.Button("Add Entry")
 
203
 
204
- def add_entry(image_data, prompt, current_dataset_name, datasets):
205
- if not current_dataset_name:
206
- return datasets, gr.update(), gr.update(), "No dataset selected."
207
- if image_data is None or not prompt:
208
- return datasets, gr.update(), gr.update(), "Please provide both an image and a prompt."
209
- # Convert image_data to base64
210
- image = Image.fromarray(image_data.astype('uint8'))
211
- buffered = BytesIO()
212
- image.save(buffered, format="PNG")
213
- img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
214
- img_data = f"data:image/png;base64,{img_str}"
215
- datasets[current_dataset_name].append({'image': img_data, 'prompt': prompt})
216
- dataset = datasets[current_dataset_name]
217
- # Reset page number to 0 and refresh HTML
218
- page_number = 0
219
- dataset = datasets[current_dataset_name]
220
- html_content = display_dataset_html(dataset, page_number=page_number)
221
- return datasets, page_number, gr.update(value=html_content), f"Entry added to dataset '{current_dataset_name}'."
222
 
223
- add_button.click(
224
- add_entry,
225
- inputs=[image_input, prompt_input, current_dataset_name, datasets],
226
- outputs=[datasets, current_page_number, dataset_html, message_box]
227
- )
228
 
229
- with gr.TabItem("Edit / Delete Entry"):
230
- with gr.Column():
231
- selected_image = gr.Image(label="Selected Image", interactive=False, type="numpy")
232
- selected_prompt = gr.Textbox(label="Current Prompt", interactive=False)
233
- # Define entry_selector here
234
- entry_selector = gr.Dropdown(label="Select Entry to Edit/Delete")
235
- new_prompt_input = gr.Textbox(label="New Prompt (for Edit)")
236
- with gr.Row():
237
- edit_button = gr.Button("Edit Entry")
238
- delete_button = gr.Button("Delete Entry")
239
 
240
- def update_selected_entry(entry_option, current_dataset_name, datasets):
241
- if not current_dataset_name or not entry_option:
242
- return gr.update(), gr.update()
243
- index = int(entry_option.split(":")[0])
244
- entry = datasets[current_dataset_name][index]
245
- image_data = entry['image']
246
- prompt = entry['prompt']
247
- # Decode base64 image data to numpy array
248
- image_bytes = base64.b64decode(image_data.split(",")[1])
249
- image = Image.open(BytesIO(image_bytes))
250
- image_array = np.array(image)
251
- return gr.update(value=image_array), gr.update(value=prompt)
252
 
253
- entry_selector.change(
254
- update_selected_entry,
255
- inputs=[entry_selector, current_dataset_name, datasets],
256
- outputs=[selected_image, selected_prompt]
257
- )
 
 
258
 
259
- def edit_entry(entry_option, new_prompt, current_dataset_name, datasets, current_page_number):
260
- if not current_dataset_name:
261
- return datasets, gr.update(), gr.update(), gr.update(), f"No dataset selected."
262
- if not entry_option or not new_prompt.strip():
263
- return datasets, gr.update(), gr.update(), gr.update(), f"Please select an entry and provide a new prompt."
264
- index = int(entry_option.split(":")[0])
265
- datasets[current_dataset_name][index]['prompt'] = new_prompt
266
- dataset = datasets[current_dataset_name]
267
- html_content = display_dataset_html(dataset, page_number=current_page_number)
268
- # Update entry_selector options
269
- entry_options = [f"{idx}: {entry['prompt'][:30]}" for idx, entry in enumerate(dataset)]
270
- return datasets, gr.update(value=html_content), gr.update(choices=entry_options), gr.update(value=""), f"Entry {index} updated."
271
 
272
- edit_button.click(
273
- edit_entry,
274
- inputs=[entry_selector, new_prompt_input, current_dataset_name, datasets, current_page_number],
275
- outputs=[datasets, dataset_html, entry_selector, new_prompt_input, message_box]
276
- )
277
 
278
- def delete_entry(entry_option, current_dataset_name, datasets, current_page_number):
279
- if not current_dataset_name:
280
- return datasets, gr.update(), gr.update(), gr.update(), gr.update(), "No dataset selected."
281
- if not entry_option:
282
- return datasets, gr.update(), gr.update(), gr.update(), gr.update(), "Please select an entry to delete."
283
- index = int(entry_option.split(":")[0])
284
- del datasets[current_dataset_name][index]
285
- dataset = datasets[current_dataset_name]
286
- html_content = display_dataset_html(dataset, page_number=current_page_number)
287
- # Update entry_selector options
288
- entry_options = [f"{idx}: {entry['prompt'][:30]}" for idx, entry in enumerate(dataset)]
289
- return datasets, gr.update(value=html_content), gr.update(choices=entry_options), gr.update(value=None), f"Entry {index} deleted."
290
 
291
- delete_button.click(
292
- delete_entry,
293
- inputs=[entry_selector, current_dataset_name, datasets, current_page_number],
294
- outputs=[datasets, dataset_html, entry_selector, selected_image, message_box]
295
- )
296
 
297
- # Function to update entry_selector options
298
- def update_entry_selector(current_dataset_name, datasets):
299
- if current_dataset_name in datasets:
300
- dataset = datasets[current_dataset_name]
301
- entry_options = [f"{idx}: {entry['prompt'][:30]}" for idx, entry in enumerate(dataset)]
302
- return gr.update(choices=entry_options)
303
- else:
304
- return gr.update(choices=[])
305
 
306
- # Update entry_selector when dataset is selected
307
- dataset_selector.change(
308
- update_entry_selector,
309
- inputs=[current_dataset_name, datasets],
310
- outputs=[entry_selector]
311
- )
312
 
313
- # Also update entry_selector when an entry is added in "Add Entry" tab
314
- add_button.click(
315
- update_entry_selector,
316
- inputs=[current_dataset_name, datasets],
317
- outputs=[entry_selector]
318
- )
319
 
320
- with gr.TabItem("Download Dataset"):
321
- download_button = gr.Button("Download Dataset")
322
- download_output = gr.File(label="Download Zip", interactive=False)
323
 
324
- def download_dataset(current_dataset_name, datasets):
325
- if not current_dataset_name:
326
- return None, "No dataset selected."
327
- if not datasets[current_dataset_name]:
328
- return None, "Dataset is empty."
329
- zip_buffer = save_dataset_to_zip(current_dataset_name, datasets[current_dataset_name])
330
- # Write zip_buffer to a temporary file
331
- temp_dir = tempfile.mkdtemp()
332
- zip_path = os.path.join(temp_dir, f"{current_dataset_name}.zip")
333
- with open(zip_path, 'wb') as f:
334
- f.write(zip_buffer.getvalue())
335
- return zip_path, f"Dataset '{current_dataset_name}' is ready for download."
336
 
337
- download_button.click(
338
- download_dataset,
339
- inputs=[current_dataset_name, datasets],
340
- outputs=[download_output, message_box]
341
- )
342
 
343
- def select_dataset(dataset_name, datasets):
344
- if dataset_name in datasets:
345
- dataset = datasets[dataset_name]
346
- html_content = display_dataset_html(dataset, page_number=0)
347
- return dataset_name, 0, gr.update(value=html_content), f"Dataset '{dataset_name}' selected."
348
- else:
349
- return "", 0, gr.update(value="<div>Select a dataset.</div>"), ""
350
 
351
- dataset_selector.change(
352
- select_dataset,
353
- inputs=[dataset_selector, datasets],
354
- outputs=[current_dataset_name, current_page_number, dataset_html, message_box]
355
- )
356
-
357
- def change_page(action, current_page_number, datasets, current_dataset_name):
358
- if not current_dataset_name:
359
- return current_page_number, gr.update(), "No dataset selected."
360
- dataset = datasets[current_dataset_name]
361
- total_pages = (len(dataset) - 1) // 5 + 1
362
- if action == "next":
363
- if current_page_number + 1 < total_pages:
364
- current_page_number += 1
365
- elif action == "prev":
366
- if current_page_number > 0:
367
- current_page_number -= 1
368
- html_content = display_dataset_html(dataset, page_number=current_page_number)
369
- return current_page_number, gr.update(value=html_content), ""
370
-
371
- prev_button.click(
372
- fn=lambda current_page_number, datasets, current_dataset_name: change_page("prev", current_page_number, datasets, current_dataset_name),
373
- inputs=[current_page_number, datasets, current_dataset_name],
374
- outputs=[current_page_number, dataset_html, message_box]
375
- )
376
 
377
- next_button.click(
378
- fn=lambda current_page_number, datasets, current_dataset_name: change_page("next", current_page_number, datasets, current_dataset_name),
379
- inputs=[current_page_number, datasets, current_dataset_name],
380
- outputs=[current_page_number, dataset_html, message_box]
381
- )
382
 
383
- # Initialize dataset_selector
384
- def initialize_components(datasets):
385
- return gr.update(choices=list(datasets.keys()))
 
386
 
387
- demo.load(
388
- initialize_components,
389
- inputs=[datasets],
390
- outputs=[dataset_selector]
391
- )
392
 
 
393
  demo.launch()
 
1
+ ># Import necessary libraries
2
  import gradio as gr
3
+ import json
4
  import os
5
  import zipfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Define helper functions
 
 
 
 
8
 
9
+ def create_dataset(dataset_name):
10
+ dataset_path = f'{dataset_name}.zip'
11
+ if not os.path.exists(dataset_path):
12
+ with zipfile.ZipFile(dataset_path, 'w') as zip_file:
13
+ zip_file.writestr('images/', '')
14
+ zip_file.writestr('data.jsonl', '')
15
 
16
+ return dataset_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
 
 
 
 
 
18
 
19
+ def upload_pair(dataset_path, image, prompt):
20
+ with zipfile.ZipFile(dataset_path, 'a') as zip_file:
21
+ image_path = f'images/{image.name}'
22
+ zip_file.writestr(image_path, image.read())
23
+ data = {'image': image_path, 'prompt': prompt}
24
+ zip_file.writestr('data.jsonl', json.dumps(data) + '\n')
 
 
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ def edit_prompt(dataset_path, image_path, new_prompt):
28
+ with zipfile.ZipFile(dataset_path, 'r') as zip_file:
29
+ data = json.load(zip_file.open('data.jsonl'))
30
+ for item in data:
31
+ if item['image'] == image_path:
32
+ item['prompt'] = new_prompt
33
+ break
34
 
35
+ with zipfile.ZipFile(dataset_path, 'w') as zip_file:
36
+ zip_file.writestr('data.jsonl', json.dumps(data))
 
 
 
 
 
 
 
 
 
 
37
 
 
 
 
 
 
38
 
39
+ def delete_pair(dataset_path, image_path):
40
+ with zipfile.ZipFile(dataset_path, 'r') as zip_file:
41
+ data = json.load(zip_file.open('data.jsonl'))
42
+ data = [item for item in data if item['image'] != image_path]
 
 
 
 
 
 
 
 
43
 
44
+ with zipfile.ZipFile(dataset_path, 'w') as zip_file:
45
+ zip_file.writestr('data.jsonl', json.dumps(data))
 
 
 
46
 
 
 
 
 
 
 
 
 
47
 
48
+ def download_dataset(dataset_path):
49
+ return dataset_path
 
 
 
 
50
 
51
+ # Define Gradio application
 
 
 
 
 
52
 
53
+ demo = gr.Blocks()
 
 
54
 
55
+ with demo:
56
+ # Create dataset
57
+ dataset_name = gr.Textbox(label='Dataset Name')
58
+ create_button = gr.Button('Create Dataset')
59
+ create_button.click(create_dataset, inputs=[dataset_name], outputs=[])
 
 
 
 
 
 
 
60
 
61
+ # Upload pair
62
+ image_upload = gr.File(label='Image')
63
+ prompt = gr.Textbox(label='Prompt')
64
+ upload_button = gr.Button('Upload Pair')
65
+ upload_button.click(upload_pair, inputs=[dataset_name, image_upload, prompt], outputs=[])
66
 
67
+ # Edit prompt
68
+ image_path = gr.Textbox(label='Image Path')
69
+ new_prompt = gr.Textbox(label='New Prompt')
70
+ edit_button = gr.Button('Edit Prompt')
71
+ edit_button.click(edit_prompt, inputs=[dataset_name, image_path, new_prompt], outputs=[])
 
 
72
 
73
+ # Delete pair
74
+ delete_button = gr.Button('Delete Pair')
75
+ delete_button.click(delete_pair, inputs=[dataset_name, image_path], outputs=[])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # Download dataset
78
+ download_button = gr.Button('Download Dataset')
79
+ download_button.click(download_dataset, inputs=[dataset_name], outputs=[])
 
 
80
 
81
+ # Upload dataset
82
+ dataset_upload = gr.File(label='Dataset')
83
+ upload_dataset_button = gr.Button('Upload Dataset')
84
+ upload_dataset_button.click(create_dataset, inputs=[dataset_upload], outputs=[])
85
 
86
+ # Horizontal gallery
87
+ gallery = gr.Gallery(label='Dataset Gallery')
88
+ demo.append(gallery)
 
 
89
 
90
+ # Launch Gradio application
91
  demo.launch()