throaway2854 commited on
Commit
e500026
·
verified ·
1 Parent(s): 242dcd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -14
app.py CHANGED
@@ -10,16 +10,87 @@ import tempfile
10
  import numpy as np
11
 
12
  def save_dataset_to_zip(dataset_name, dataset):
13
- # Function implementation remains the same
14
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def load_dataset_from_zip(zip_file):
17
- # Function implementation remains the same
18
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def display_dataset_html(dataset):
21
- # Function implementation remains the same
22
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  with gr.Blocks() as demo:
25
  gr.Markdown("<h1 style='text-align: center; margin-bottom: 20px;'>Dataset Builder</h1>")
@@ -31,12 +102,23 @@ with gr.Blocks() as demo:
31
  message_box = gr.Textbox(interactive=False, label="Message")
32
 
33
  with gr.Tab("Create / Upload Dataset"):
34
- # Create / Upload Dataset components and functions
35
- # ...
 
 
 
 
 
 
 
36
 
37
  def create_dataset(name, datasets):
38
- # Function implementation remains the same
39
- # ...
 
 
 
 
40
 
41
  create_button.click(
42
  create_dataset,
@@ -45,8 +127,13 @@ with gr.Blocks() as demo:
45
  )
46
 
47
  def upload_dataset(zip_file, datasets):
48
- # Function implementation remains the same
49
- # ...
 
 
 
 
 
50
 
51
  upload_button.click(
52
  upload_dataset,
@@ -71,8 +158,10 @@ with gr.Blocks() as demo:
71
  )
72
 
73
  with gr.Tab("Add Entry"):
74
- # Add Entry components and functions
75
- # ...
 
 
76
 
77
  def add_entry(image_data, prompt, current_dataset_name, datasets):
78
  if not current_dataset_name:
 
10
  import numpy as np
11
 
12
  def save_dataset_to_zip(dataset_name, dataset):
13
+ # Create a temporary directory
14
+ temp_dir = tempfile.mkdtemp()
15
+ dataset_path = os.path.join(temp_dir, dataset_name)
16
+ os.makedirs(dataset_path, exist_ok=True)
17
+ images_dir = os.path.join(dataset_path, 'images')
18
+ os.makedirs(images_dir, exist_ok=True)
19
+ annotations = []
20
+ for idx, entry in enumerate(dataset):
21
+ image_data = entry['image']
22
+ prompt = entry['prompt']
23
+ # Save image to images directory
24
+ image_filename = f"{uuid.uuid4().hex}.png"
25
+ image_path = os.path.join(images_dir, image_filename)
26
+ # Decode the base64 image data
27
+ image = Image.open(BytesIO(base64.b64decode(image_data.split(",")[1])))
28
+ image.save(image_path)
29
+ # Add annotation
30
+ annotations.append({
31
+ 'file_name': os.path.join('images', image_filename),
32
+ 'text': prompt
33
+ })
34
+ # Save annotations to JSONL file
35
+ annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
36
+ with open(annotations_path, 'w') as f:
37
+ for ann in annotations:
38
+ f.write(json.dumps(ann) + '\n')
39
+ # Create a zip file
40
+ zip_buffer = BytesIO()
41
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
42
+ for root, dirs, files in os.walk(dataset_path):
43
+ for file in files:
44
+ abs_file = os.path.join(root, file)
45
+ rel_file = os.path.relpath(abs_file, dataset_path)
46
+ zipf.write(abs_file, rel_file)
47
+ zip_buffer.seek(0)
48
+ return zip_buffer
49
 
50
  def load_dataset_from_zip(zip_file):
51
+ temp_dir = tempfile.mkdtemp()
52
+ with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
53
+ zip_ref.extractall(temp_dir)
54
+ dataset_name = os.listdir(temp_dir)[0]
55
+ dataset_path = os.path.join(temp_dir, dataset_name)
56
+ dataset = []
57
+ images_dir = os.path.join(dataset_path, 'images')
58
+ annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
59
+ if os.path.exists(annotations_path):
60
+ with open(annotations_path, 'r') as f:
61
+ for line in f:
62
+ ann = json.loads(line)
63
+ file_name = ann['file_name']
64
+ prompt = ann['text']
65
+ image_path = os.path.join(dataset_path, file_name)
66
+ # Read image and convert to base64
67
+ with open(image_path, 'rb') as img_f:
68
+ image_bytes = img_f.read()
69
+ encoded = base64.b64encode(image_bytes).decode()
70
+ mime_type = "image/png"
71
+ image_data = f"data:{mime_type};base64,{encoded}"
72
+ dataset.append({
73
+ 'image': image_data,
74
+ 'prompt': prompt
75
+ })
76
+ return dataset_name, dataset
77
 
78
  def display_dataset_html(dataset):
79
+ if dataset:
80
+ html_content = ""
81
+ for idx, entry in enumerate(dataset):
82
+ image_data = entry['image']
83
+ prompt = entry['prompt']
84
+ html_content += f"""
85
+ <div style="display: flex; align-items: center; margin-bottom: 10px;">
86
+ <div style="width: 50px;">{idx}</div>
87
+ <img src="{image_data}" alt="Image {idx}" style="max-height: 100px; margin-right: 10px;"/>
88
+ <div>{prompt}</div>
89
+ </div>
90
+ """
91
+ return html_content
92
+ else:
93
+ return "<div>No entries in dataset.</div>"
94
 
95
  with gr.Blocks() as demo:
96
  gr.Markdown("<h1 style='text-align: center; margin-bottom: 20px;'>Dataset Builder</h1>")
 
102
  message_box = gr.Textbox(interactive=False, label="Message")
103
 
104
  with gr.Tab("Create / Upload Dataset"):
105
+ with gr.Row():
106
+ with gr.Column():
107
+ gr.Markdown("### Create a New Dataset")
108
+ dataset_name_input = gr.Textbox(label="New Dataset Name")
109
+ create_button = gr.Button("Create Dataset")
110
+ with gr.Column():
111
+ gr.Markdown("### Upload Existing Dataset")
112
+ upload_input = gr.File(label="Upload Dataset Zip", file_types=['.zip'])
113
+ upload_button = gr.Button("Upload Dataset")
114
 
115
  def create_dataset(name, datasets):
116
+ if not name:
117
+ return gr.update(), "Please enter a dataset name."
118
+ if name in datasets:
119
+ return gr.update(), f"Dataset '{name}' already exists."
120
+ datasets[name] = []
121
+ return gr.update(choices=list(datasets.keys()), value=name), f"Dataset '{name}' created."
122
 
123
  create_button.click(
124
  create_dataset,
 
127
  )
128
 
129
  def upload_dataset(zip_file, datasets):
130
+ if zip_file is None:
131
+ return gr.update(), "Please upload a zip file."
132
+ dataset_name, dataset = load_dataset_from_zip(zip_file)
133
+ if dataset_name in datasets:
134
+ return gr.update(), f"Dataset '{dataset_name}' already exists."
135
+ datasets[dataset_name] = dataset
136
+ return gr.update(choices=list(datasets.keys()), value=dataset_name), f"Dataset '{dataset_name}' uploaded."
137
 
138
  upload_button.click(
139
  upload_dataset,
 
158
  )
159
 
160
  with gr.Tab("Add Entry"):
161
+ with gr.Row():
162
+ image_input = gr.Image(label="Upload Image")
163
+ prompt_input = gr.Textbox(label="Prompt")
164
+ add_button = gr.Button("Add Entry")
165
 
166
  def add_entry(image_data, prompt, current_dataset_name, datasets):
167
  if not current_dataset_name: