throaway2854 commited on
Commit
c80f72f
·
verified ·
1 Parent(s): abf11ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -29
app.py CHANGED
@@ -16,64 +16,97 @@ def save_dataset_to_zip(dataset_name, dataset):
16
  os.makedirs(dataset_path, exist_ok=True)
17
  images_dir = os.path.join(dataset_path, 'images')
18
  os.makedirs(images_dir, exist_ok=True)
 
19
  annotations = []
20
  for idx, entry in enumerate(dataset):
21
  image_data = entry['image']
22
  prompt = entry['prompt']
 
23
  # Save image to images directory
24
  image_filename = f"{uuid.uuid4().hex}.png"
25
  image_path = os.path.join(images_dir, image_filename)
26
  # Decode the base64 image data
27
  image = Image.open(BytesIO(base64.b64decode(image_data.split(",")[1])))
28
  image.save(image_path)
 
29
  # Add annotation
30
  annotations.append({
31
  'file_name': os.path.join('images', image_filename),
32
  'text': prompt
33
  })
 
34
  # Save annotations to JSONL file
35
  annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
36
  with open(annotations_path, 'w') as f:
37
  for ann in annotations:
38
  f.write(json.dumps(ann) + '\n')
39
- # Create a zip file
 
40
  zip_buffer = BytesIO()
41
  with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
42
  for root, dirs, files in os.walk(dataset_path):
43
  for file in files:
44
  abs_file = os.path.join(root, file)
45
- rel_file = os.path.relpath(abs_file, dataset_path)
46
  zipf.write(abs_file, rel_file)
 
47
  zip_buffer.seek(0)
48
  return zip_buffer
49
 
50
  def load_dataset_from_zip(zip_file):
51
  temp_dir = tempfile.mkdtemp()
52
- with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
53
- zip_ref.extractall(temp_dir)
54
- dataset_name = os.listdir(temp_dir)[0]
55
- dataset_path = os.path.join(temp_dir, dataset_name)
56
- dataset = []
57
- images_dir = os.path.join(dataset_path, 'images')
58
- annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
59
- if os.path.exists(annotations_path):
60
- with open(annotations_path, 'r') as f:
61
- for line in f:
62
- ann = json.loads(line)
63
- file_name = ann['file_name']
64
- prompt = ann['text']
65
- image_path = os.path.join(dataset_path, file_name)
66
- # Read image and convert to base64
67
- with open(image_path, 'rb') as img_f:
68
- image_bytes = img_f.read()
69
- encoded = base64.b64encode(image_bytes).decode()
70
- mime_type = "image/png"
71
- image_data = f"data:{mime_type};base64,{encoded}"
72
- dataset.append({
73
- 'image': image_data,
74
- 'prompt': prompt
75
- })
76
- return dataset_name, dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  def display_dataset_html(dataset):
79
  if dataset:
@@ -97,7 +130,7 @@ with gr.Blocks() as demo:
97
  datasets = gr.State({})
98
  current_dataset_name = gr.State("")
99
  dataset_selector = gr.Dropdown(label="Select Dataset", interactive=True)
100
- entry_selector = gr.Dropdown(label="Select Entry to Edit/Delete") # Moved outside
101
  dataset_html = gr.HTML()
102
  message_box = gr.Textbox(interactive=False, label="Message")
103
 
@@ -109,7 +142,7 @@ with gr.Blocks() as demo:
109
  create_button = gr.Button("Create Dataset")
110
  with gr.Column():
111
  gr.Markdown("### Upload Existing Dataset")
112
- upload_input = gr.File(label="Upload Dataset Zip", file_types=['.zip'])
113
  upload_button = gr.Button("Upload Dataset")
114
 
115
  def create_dataset(name, datasets):
@@ -130,6 +163,8 @@ with gr.Blocks() as demo:
130
  if zip_file is None:
131
  return gr.update(), "Please upload a zip file."
132
  dataset_name, dataset = load_dataset_from_zip(zip_file)
 
 
133
  if dataset_name in datasets:
134
  return gr.update(), f"Dataset '{dataset_name}' already exists."
135
  datasets[dataset_name] = dataset
 
16
  os.makedirs(dataset_path, exist_ok=True)
17
  images_dir = os.path.join(dataset_path, 'images')
18
  os.makedirs(images_dir, exist_ok=True)
19
+
20
  annotations = []
21
  for idx, entry in enumerate(dataset):
22
  image_data = entry['image']
23
  prompt = entry['prompt']
24
+
25
  # Save image to images directory
26
  image_filename = f"{uuid.uuid4().hex}.png"
27
  image_path = os.path.join(images_dir, image_filename)
28
  # Decode the base64 image data
29
  image = Image.open(BytesIO(base64.b64decode(image_data.split(",")[1])))
30
  image.save(image_path)
31
+
32
  # Add annotation
33
  annotations.append({
34
  'file_name': os.path.join('images', image_filename),
35
  'text': prompt
36
  })
37
+
38
  # Save annotations to JSONL file
39
  annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
40
  with open(annotations_path, 'w') as f:
41
  for ann in annotations:
42
  f.write(json.dumps(ann) + '\n')
43
+
44
+ # Create a zip file with the dataset_name as the top-level folder
45
  zip_buffer = BytesIO()
46
  with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zipf:
47
  for root, dirs, files in os.walk(dataset_path):
48
  for file in files:
49
  abs_file = os.path.join(root, file)
50
+ rel_file = os.path.relpath(abs_file, temp_dir)
51
  zipf.write(abs_file, rel_file)
52
+
53
  zip_buffer.seek(0)
54
  return zip_buffer
55
 
56
  def load_dataset_from_zip(zip_file):
57
  temp_dir = tempfile.mkdtemp()
58
+ try:
59
+ with zipfile.ZipFile(zip_file.name, 'r') as zip_ref:
60
+ zip_ref.extractall(temp_dir)
61
+
62
+ # Get dataset name from zip file name
63
+ dataset_name_guess = os.path.splitext(os.path.basename(zip_file.name))[0]
64
+ dataset_path = os.path.join(temp_dir, dataset_name_guess)
65
+
66
+ if os.path.exists(dataset_path):
67
+ dataset_name = dataset_name_guess
68
+ else:
69
+ # If the dataset_name directory doesn't exist, try to find the top-level directory
70
+ entries = [entry for entry in os.listdir(temp_dir) if os.path.isdir(os.path.join(temp_dir, entry))]
71
+ if entries:
72
+ dataset_name = entries[0]
73
+ dataset_path = os.path.join(temp_dir, dataset_name)
74
+ else:
75
+ # Files are directly in temp_dir
76
+ dataset_name = dataset_name_guess
77
+ dataset_path = temp_dir
78
+
79
+ images_dir = os.path.join(dataset_path, 'images')
80
+ annotations_path = os.path.join(dataset_path, 'annotations.jsonl')
81
+ dataset = []
82
+
83
+ if os.path.exists(annotations_path):
84
+ with open(annotations_path, 'r') as f:
85
+ for line in f:
86
+ ann = json.loads(line)
87
+ file_name = ann['file_name']
88
+ prompt = ann['text']
89
+ image_path = os.path.join(dataset_path, file_name)
90
+
91
+ # Read image and convert to base64
92
+ with open(image_path, 'rb') as img_f:
93
+ image_bytes = img_f.read()
94
+ encoded = base64.b64encode(image_bytes).decode()
95
+ mime_type = "image/png"
96
+ image_data = f"data:{mime_type};base64,{encoded}"
97
+
98
+ dataset.append({
99
+ 'image': image_data,
100
+ 'prompt': prompt
101
+ })
102
+ else:
103
+ # If annotations file not found
104
+ return None, []
105
+
106
+ return dataset_name, dataset
107
+ except Exception as e:
108
+ print(f"Error loading dataset: {e}")
109
+ return None, []
110
 
111
  def display_dataset_html(dataset):
112
  if dataset:
 
130
  datasets = gr.State({})
131
  current_dataset_name = gr.State("")
132
  dataset_selector = gr.Dropdown(label="Select Dataset", interactive=True)
133
+ entry_selector = gr.Dropdown(label="Select Entry to Edit/Delete")
134
  dataset_html = gr.HTML()
135
  message_box = gr.Textbox(interactive=False, label="Message")
136
 
 
142
  create_button = gr.Button("Create Dataset")
143
  with gr.Column():
144
  gr.Markdown("### Upload Existing Dataset")
145
+ upload_input = gr.File(label="Upload Dataset Zip", type="file")
146
  upload_button = gr.Button("Upload Dataset")
147
 
148
  def create_dataset(name, datasets):
 
163
  if zip_file is None:
164
  return gr.update(), "Please upload a zip file."
165
  dataset_name, dataset = load_dataset_from_zip(zip_file)
166
+ if dataset_name is None:
167
+ return gr.update(), "Failed to load dataset from zip file."
168
  if dataset_name in datasets:
169
  return gr.update(), f"Dataset '{dataset_name}' already exists."
170
  datasets[dataset_name] = dataset