aashraychegu commited on
Commit
998873a
·
1 Parent(s): e55761e

Upload 2 files

Browse files
filter.ipynb CHANGED
@@ -39,15 +39,15 @@
39
  "source": [
40
  "# This is the first step of the process. Once you have the images and csvs organized in folders with their names, you need to create the offset file that contains the offset. This code creates the offset file if it doesn't exist\n",
41
  "\n",
42
- "testset = os.listdir(\"secondleg\")[8] # This is for listing out the contents of the folder\n",
43
  "print(testset)\n",
44
  "tiff = Image.open(pl.Path(\n",
45
- " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff')) # opens the tiff file\n",
46
  "csv = pd.read_csv(pl.Path(\n",
47
- " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv')) # opens the csv file\n",
48
  "with open(pl.Path( \n",
49
- " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt'),\"+x\") as f: # opens the offset file and creates it if it doesn't exist\n",
50
- " offset = f.read() # reads the offset file \n",
51
  " if offset != '':\n",
52
  " offset = int(offset)\n",
53
  " else:\n",
@@ -106,7 +106,7 @@
106
  "csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n",
107
  "# the CSV is backwards, so i am accouting for this and getting up the first mask data point\n",
108
  "line = csv.iloc[-1] # gets last row of csv file\n",
109
- "print(csv.head()) # prints first 5 rows of csv file\n",
110
  "\n",
111
  "# creates the image masks and shows the image for calibration\n",
112
  "draw = ImageDraw.Draw(img)\n",
@@ -129,26 +129,21 @@
129
  " # Print the name of the current file\n",
130
  " print(testset)\n",
131
  "\n",
132
- " # Open the .tiff image file from the specified path\n",
133
  " tiff = Image.open(pl.Path(\n",
134
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff'))\n",
135
  "\n",
136
- " # Read the .csv file from the specified path\n",
137
  " csv = pd.read_csv(pl.Path(\n",
138
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv'))\n",
139
  "\n",
140
- " # Open and read the offset.txt file from the specified path\n",
141
  " with open(pl.Path(\n",
142
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt')) as f:\n",
143
  " offset = f.read()\n",
144
- " # If the offset is empty, set it to 0\n",
145
  " if offset == \"\":\n",
146
  " offset = 0\n",
147
- " # Otherwise, convert the offset to an integer\n",
148
  " else:\n",
149
  " offset = int(offset)\n",
150
  "\n",
151
- " # Make a copy of the image and crop it\n",
152
  " img = tiff.copy()\n",
153
  " img = img.crop((0, 430, img.size[0], 1790))\n",
154
  "\n",
@@ -160,28 +155,22 @@
160
  " img.save(pl.Path(\n",
161
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n",
162
  "\n",
163
- " # Print the mode of the image\n",
164
- " print(img.mode)\n",
165
- "\n",
166
  " # Add the offset to the specified columns of the csv file and reverse the order\n",
167
  " csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n",
168
  " csv = csv[::-1].reset_index(drop=True)\n",
169
  "\n",
170
- " # Create new dataframes for the top and bottom of the image\n",
171
  " top = pd.DataFrame(\n",
172
  " {\"x_surface\": 0, \"y_surface\": csv.iloc[0][\"y_surface\"], \"x_bed\": 0, \"y_bed\": csv.iloc[0][\"y_bed\"]}, index=[0])\n",
173
  " bottom = pd.DataFrame({\"x_surface\": tiff.size[0], \"y_surface\": csv.iloc[-1]\n",
174
  " [\"y_surface\"], \"x_bed\": tiff.size[0], \"y_bed\": csv.iloc[-1][\"y_bed\"]}, index=[0])\n",
175
- "\n",
176
- " # Concatenate the top, csv, and bottom dataframes\n",
177
  " csv = pd.concat([top, csv, bottom], ignore_index=True)\n",
178
  "\n",
179
- " # Create a draw object for the image\n",
180
  " draw = ImageDraw.Draw(img)\n",
181
  "\n",
182
  " # Loop over the rows of the csv file\n",
183
  " for i in range(len(csv)-1):\n",
184
- " # Get the current and next row\n",
185
  " crow = csv.iloc[i]\n",
186
  " nrow = csv.iloc[i+1]\n",
187
  "\n",
@@ -212,19 +201,9 @@
212
  "\n",
213
  " # Save the image with the drawn polygons to the specified path\n",
214
  " img.save(pl.Path(\n",
215
- " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))\n",
216
- "\n",
217
- " # Print the mode of the image\n",
218
- " print(img.mode)\n"
219
  ]
220
  },
221
- {
222
- "cell_type": "code",
223
- "execution_count": null,
224
- "metadata": {},
225
- "outputs": [],
226
- "source": []
227
- },
228
  {
229
  "cell_type": "code",
230
  "execution_count": null,
@@ -235,14 +214,10 @@
235
  "\n",
236
  "# Loop over all the files in the \"secondleg\" directory\n",
237
  "for testset in os.listdir(\"secondleg\"):\n",
238
- " # Print the name of the current file\n",
239
- " print(testset)\n",
240
  "\n",
241
- " # Open the cropped image file from the specified path\n",
242
  " cimg = Image.open(pl.Path(\n",
243
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n",
244
  "\n",
245
- " # Open the image mask file from the specified path\n",
246
  " mask = Image.open(pl.Path(\n",
247
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))\n",
248
  "\n",
@@ -251,18 +226,14 @@
251
  "\n",
252
  " # Try to create directories for the cropped images and masks\n",
253
  " try:\n",
254
- " # Create a directory for the cropped images\n",
255
  " os.mkdir(pl.Path(\n",
256
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_images'))\n",
257
  "\n",
258
- " # Create a directory for the cropped masks\n",
259
  " os.mkdir(pl.Path(\n",
260
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_masks'))\n",
261
- " # If the directories already exist, pass\n",
262
  " except:\n",
263
  " pass\n",
264
  "\n",
265
- " # Loop over the sections to crop the image into\n",
266
  " for i in cropsection:\n",
267
  " # Crop the image to the current section, resize it to 400x400, and save it to the specified path\n",
268
  " cimg.crop((i[0], 0, i[1], cimg.size[1])).resize((400, 400)).save(pl.Path(\n",
@@ -275,41 +246,29 @@
275
  "metadata": {},
276
  "outputs": [],
277
  "source": [
278
- "# Import the notebook_login function from the huggingface_hub module\n",
279
  "from huggingface_hub import notebook_login\n",
280
  "\n",
281
- "# Import the Dataset, DatasetDict, and Image classes from the datasets module\n",
282
  "from datasets import Dataset, DatasetDict, Image\n",
283
  "\n",
284
- "# Import the glob function from the glob module\n",
285
  "from glob import glob\n",
286
  "\n",
287
- "# Use the glob function to get a list of all .png image file paths in the \"secondleg/*/cropped_images/\" directory\n",
288
  "images = glob(\"secondleg/*/cropped_images/*.png\")\n",
289
  "\n",
290
- "# Use the glob function to get a list of all .png mask file paths in the \"secondleg/*/cropped_masks/\" directory\n",
291
  "masks = glob(\"secondleg/*/cropped_masks/*.png\")\n",
292
  "\n",
293
  "# Define a function to create a dataset from image and label paths\n",
294
- "\n",
295
- "\n",
296
  "def create_dataset(image_paths, label_paths):\n",
297
  " # Create a Dataset object from a dictionary of image and label paths\n",
298
  " dataset = Dataset.from_dict({\"image\": sorted(image_paths),\n",
299
  " \"label\": sorted(label_paths)})\n",
300
- " # Cast the \"image\" column of the dataset to the Image class\n",
301
  " dataset = dataset.cast_column(\"image\", Image())\n",
302
- " # Cast the \"label\" column of the dataset to the Image class\n",
303
  " dataset = dataset.cast_column(\"label\", Image())\n",
304
  "\n",
305
- " # Return the dataset\n",
306
  " return dataset\n",
307
  "\n",
308
  "\n",
309
- "# Create a Dataset object using the create_dataset function and the image and mask file paths\n",
310
  "dataset = create_dataset(images, masks)\n",
311
  "\n",
312
- "# Call the notebook_login function to log in to Hugging Face\n",
313
  "notebook_login()\n"
314
  ]
315
  },
@@ -322,36 +281,6 @@
322
  "# Call the push_to_hub method on the dataset object, specifying the repository name and setting it to private\n",
323
  "dataset.push_to_hub(\"aashraychegu/glacier_scopes\", private=True)\n"
324
  ]
325
- },
326
- {
327
- "cell_type": "code",
328
- "execution_count": 1,
329
- "metadata": {},
330
- "outputs": [
331
- {
332
- "data": {
333
- "text/plain": [
334
- "8456"
335
- ]
336
- },
337
- "execution_count": 1,
338
- "metadata": {},
339
- "output_type": "execute_result"
340
- }
341
- ],
342
- "source": [
343
- "# Import the glob function from the glob module\n",
344
- "from glob import glob\n",
345
- "\n",
346
- "# Use the glob function to get a list of all .png image file paths in the \"secondleg/*/cropped_images/\" directory\n",
347
- "images = glob(\"secondleg/*/cropped_images/*.png\")\n",
348
- "\n",
349
- "# Use the glob function to get a list of all .png mask file paths in the \"secondleg/*/cropped_masks/\" directory\n",
350
- "masks = glob(\"secondleg/*/cropped_masks/*.png\")\n",
351
- "\n",
352
- "# Print the length of the images list, which represents the total number of image files found\n",
353
- "len(images)\n"
354
- ]
355
  }
356
  ],
357
  "metadata": {
 
39
  "source": [
40
  "# This is the first step of the process. Once you have the images and csvs organized in folders with their names, you need to create the offset file that contains the offset. This code creates the offset file if it doesn't exist\n",
41
  "\n",
42
+ "testset = os.listdir(\"secondleg\")[8]\n",
43
  "print(testset)\n",
44
  "tiff = Image.open(pl.Path(\n",
45
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff')) \n",
46
  "csv = pd.read_csv(pl.Path(\n",
47
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv')) \n",
48
  "with open(pl.Path( \n",
49
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt'),\"+x\") as f: \n",
50
+ " offset = f.read() \n",
51
  " if offset != '':\n",
52
  " offset = int(offset)\n",
53
  " else:\n",
 
106
  "csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n",
107
  "# the CSV is backwards, so i am accouting for this and getting up the first mask data point\n",
108
  "line = csv.iloc[-1] # gets last row of csv file\n",
109
+ "print(csv.head()) # prints first 5 rows of csv file to make sure that the offeset was applied properly\n",
110
  "\n",
111
  "# creates the image masks and shows the image for calibration\n",
112
  "draw = ImageDraw.Draw(img)\n",
 
129
  " # Print the name of the current file\n",
130
  " print(testset)\n",
131
  "\n",
 
132
  " tiff = Image.open(pl.Path(\n",
133
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff'))\n",
134
  "\n",
 
135
  " csv = pd.read_csv(pl.Path(\n",
136
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv'))\n",
137
  "\n",
 
138
  " with open(pl.Path(\n",
139
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt')) as f:\n",
140
  " offset = f.read()\n",
 
141
  " if offset == \"\":\n",
142
  " offset = 0\n",
 
143
  " else:\n",
144
  " offset = int(offset)\n",
145
  "\n",
146
+ " # Make a copy of the image and crop it to remove the unneeded parts\n",
147
  " img = tiff.copy()\n",
148
  " img = img.crop((0, 430, img.size[0], 1790))\n",
149
  "\n",
 
155
  " img.save(pl.Path(\n",
156
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n",
157
  "\n",
 
 
 
158
  " # Add the offset to the specified columns of the csv file and reverse the order\n",
159
  " csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n",
160
  " csv = csv[::-1].reset_index(drop=True)\n",
161
  "\n",
162
+ " # Create new dataframes for the top and bottom of the image and concatenate them to the previous dataframe\n",
163
  " top = pd.DataFrame(\n",
164
  " {\"x_surface\": 0, \"y_surface\": csv.iloc[0][\"y_surface\"], \"x_bed\": 0, \"y_bed\": csv.iloc[0][\"y_bed\"]}, index=[0])\n",
165
  " bottom = pd.DataFrame({\"x_surface\": tiff.size[0], \"y_surface\": csv.iloc[-1]\n",
166
  " [\"y_surface\"], \"x_bed\": tiff.size[0], \"y_bed\": csv.iloc[-1][\"y_bed\"]}, index=[0])\n",
 
 
167
  " csv = pd.concat([top, csv, bottom], ignore_index=True)\n",
168
  "\n",
169
+ " # Create a draw object for the image for drawing the polygons\n",
170
  " draw = ImageDraw.Draw(img)\n",
171
  "\n",
172
  " # Loop over the rows of the csv file\n",
173
  " for i in range(len(csv)-1):\n",
 
174
  " crow = csv.iloc[i]\n",
175
  " nrow = csv.iloc[i+1]\n",
176
  "\n",
 
201
  "\n",
202
  " # Save the image with the drawn polygons to the specified path\n",
203
  " img.save(pl.Path(\n",
204
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))"
 
 
 
205
  ]
206
  },
 
 
 
 
 
 
 
207
  {
208
  "cell_type": "code",
209
  "execution_count": null,
 
214
  "\n",
215
  "# Loop over all the files in the \"secondleg\" directory\n",
216
  "for testset in os.listdir(\"secondleg\"):\n",
 
 
217
  "\n",
 
218
  " cimg = Image.open(pl.Path(\n",
219
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n",
220
  "\n",
 
221
  " mask = Image.open(pl.Path(\n",
222
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))\n",
223
  "\n",
 
226
  "\n",
227
  " # Try to create directories for the cropped images and masks\n",
228
  " try:\n",
 
229
  " os.mkdir(pl.Path(\n",
230
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_images'))\n",
231
  "\n",
 
232
  " os.mkdir(pl.Path(\n",
233
  " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_masks'))\n",
 
234
  " except:\n",
235
  " pass\n",
236
  "\n",
 
237
  " for i in cropsection:\n",
238
  " # Crop the image to the current section, resize it to 400x400, and save it to the specified path\n",
239
  " cimg.crop((i[0], 0, i[1], cimg.size[1])).resize((400, 400)).save(pl.Path(\n",
 
246
  "metadata": {},
247
  "outputs": [],
248
  "source": [
 
249
  "from huggingface_hub import notebook_login\n",
250
  "\n",
 
251
  "from datasets import Dataset, DatasetDict, Image\n",
252
  "\n",
 
253
  "from glob import glob\n",
254
  "\n",
 
255
  "images = glob(\"secondleg/*/cropped_images/*.png\")\n",
256
  "\n",
 
257
  "masks = glob(\"secondleg/*/cropped_masks/*.png\")\n",
258
  "\n",
259
  "# Define a function to create a dataset from image and label paths\n",
 
 
260
  "def create_dataset(image_paths, label_paths):\n",
261
  " # Create a Dataset object from a dictionary of image and label paths\n",
262
  " dataset = Dataset.from_dict({\"image\": sorted(image_paths),\n",
263
  " \"label\": sorted(label_paths)})\n",
 
264
  " dataset = dataset.cast_column(\"image\", Image())\n",
 
265
  " dataset = dataset.cast_column(\"label\", Image())\n",
266
  "\n",
 
267
  " return dataset\n",
268
  "\n",
269
  "\n",
 
270
  "dataset = create_dataset(images, masks)\n",
271
  "\n",
 
272
  "notebook_login()\n"
273
  ]
274
  },
 
281
  "# Call the push_to_hub method on the dataset object, specifying the repository name and setting it to private\n",
282
  "dataset.push_to_hub(\"aashraychegu/glacier_scopes\", private=True)\n"
283
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  }
285
  ],
286
  "metadata": {
semanticallysegmentdeezglaciers.ipynb CHANGED
The diff for this file is too large to render. See raw diff