aashraychegu commited on
Commit
f31484b
·
1 Parent(s): a3aa0c0

Upload 2 files

Browse files
filter.ipynb ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# Importing all required libraries\n",
10
+ "\n",
11
+ "# these are needed for path processing \n",
12
+ "import os\n",
13
+ "import pathlib as pl\n",
14
+ "\n",
15
+ "#image processing and display\n",
16
+ "import numpy as np\n",
17
+ "import PIL\n",
18
+ "import PIL.Image as Image\n",
19
+ "import PIL.ImageDraw as ImageDraw\n",
20
+ "import matplotlib.pyplot as plt\n",
21
+ "\n",
22
+ "#these are needed for data processing\n",
23
+ "import pandas as pd"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": 17,
29
+ "metadata": {},
30
+ "outputs": [
31
+ {
32
+ "name": "stderr",
33
+ "output_type": "stream",
34
+ "text": [
35
+ "UsageError: Line magic function `%%script` not found.\n"
36
+ ]
37
+ }
38
+ ],
39
+ "source": [
40
+ "if not create_offset_files:\n",
41
+ " %%script echo skipping\n",
42
+ "testset = os.listdir(\"secondleg\")[8] # This is for listing out the contents of the folder\n",
43
+ "print(testset)\n",
44
+ "tiff = Image.open(pl.Path(\n",
45
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff')) # opens the tiff file\n",
46
+ "csv = pd.read_csv(pl.Path(\n",
47
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv')) # opens the csv file\n",
48
+ "with open(pl.Path( \n",
49
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt'),\"+x\") as f: # opens the offset file and creates it if it doesn't exist\n",
50
+ " offset = f.read() # reads the offset file \n",
51
+ " if offset != '':\n",
52
+ " offset = int(offset)\n",
53
+ " else:\n",
54
+ " offset = 0\n"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": null,
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "# This is a helper method for chopping up a large glacial scope image into smaller chunks with a width of parameter length and a certain amount of overlap\n",
64
+ "def window_with_remainder(length, overlap, input_size):\n",
65
+ " testarray = np.arange(0, input_size)\n",
66
+ " return np.vstack((testarray[0:length], np.lib.stride_tricks.sliding_window_view(testarray[len(testarray) % length:], length)[::overlap]))[:, [0, -1]] + [0, 1]"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": null,
72
+ "metadata": {},
73
+ "outputs": [],
74
+ "source": [
75
+ "# This code draws a rectangle from (40,0) to (100, y_surface) in green, and from (40, y_surface) to (100, y_bed) in white.\n",
76
+ "# The y_surface and y_bed variables are read from the csv file, and the csv file is read in as a pandas dataframe.\n",
77
+ "# The first 5 rows of the csv file are also printed.\n",
78
+ "# this is done to help calibrate the offsets \n",
79
+ "\n",
80
+ "testset = os.listdir(\"secondleg\")[10]\n",
81
+ "print(testset)\n",
82
+ "\n",
83
+ "tiff = Image.open(pl.Path(\n",
84
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff'))\n",
85
+ "csv = pd.read_csv(pl.Path(\n",
86
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv'))\n",
87
+ "with open(pl.Path(\n",
88
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt')) as f:\n",
89
+ " offset = f.read()\n",
90
+ " if offset == \"\":\n",
91
+ " offset = 0\n",
92
+ " else:\n",
93
+ " offset = int(offset)\n",
94
+ "print(offset)\n",
95
+ "img = tiff.copy()\n",
96
+ "img = img.crop((0,430,img.size[0],1790)) \n",
97
+ "print(csv.head()) # prints first 5 rows of csv file\n",
98
+ "csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n",
99
+ "line = csv.iloc[-1] # gets last row of csv file\n",
100
+ "print(csv.head()) # prints first 5 rows of csv file\n",
101
+ "\n",
102
+ "\n",
103
+ "draw = ImageDraw.Draw(img)\n",
104
+ "draw.rectangle([(40, 0), (100, line[\"y_surface\"])], fill=\"green\") # draws rectangle from (40,0) to (100, y_surface) in green\n",
105
+ "draw.rectangle([(40, line[\"y_surface\"]),\n",
106
+ " (100, line[\"y_bed\"])], fill=\"white\") # draws rectangle from (40, y_surface) to (100, y_bed) in white\n"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": null,
112
+ "metadata": {},
113
+ "outputs": [],
114
+ "source": [
115
+ "# This code draws the segmentation masks for each scope from the csv file and saves them\n",
116
+ "\n",
117
+ "# Loop over all the files in the \"secondleg\" directory\n",
118
+ "for testset in os.listdir(\"secondleg\"):\n",
119
+ " # Print the name of the current file\n",
120
+ " print(testset)\n",
121
+ "\n",
122
+ " # Open the .tiff image file from the specified path\n",
123
+ " tiff = Image.open(pl.Path(\n",
124
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff'))\n",
125
+ "\n",
126
+ " # Read the .csv file from the specified path\n",
127
+ " csv = pd.read_csv(pl.Path(\n",
128
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv'))\n",
129
+ "\n",
130
+ " # Open and read the offset.txt file from the specified path\n",
131
+ " with open(pl.Path(\n",
132
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt')) as f:\n",
133
+ " offset = f.read()\n",
134
+ " # If the offset is empty, set it to 0\n",
135
+ " if offset == \"\":\n",
136
+ " offset = 0\n",
137
+ " # Otherwise, convert the offset to an integer\n",
138
+ " else:\n",
139
+ " offset = int(offset)\n",
140
+ "\n",
141
+ " # Make a copy of the image and crop it\n",
142
+ " img = tiff.copy()\n",
143
+ " img = img.crop((0, 430, img.size[0], 1790))\n",
144
+ "\n",
145
+ " # Convert the image to float and then to grayscale\n",
146
+ " img_float = Image.fromarray(np.divide(np.array(img), 2**8-1))\n",
147
+ " img = img_float.convert(\"L\")\n",
148
+ "\n",
149
+ " # Save the cropped and converted image to the specified path\n",
150
+ " img.save(pl.Path(\n",
151
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n",
152
+ "\n",
153
+ " # Print the mode of the image\n",
154
+ " print(img.mode)\n",
155
+ "\n",
156
+ " # Add the offset to the specified columns of the csv file and reverse the order\n",
157
+ " csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n",
158
+ " csv = csv[::-1].reset_index(drop=True)\n",
159
+ "\n",
160
+ " # Create new dataframes for the top and bottom of the image\n",
161
+ " top = pd.DataFrame(\n",
162
+ " {\"x_surface\": 0, \"y_surface\": csv.iloc[0][\"y_surface\"], \"x_bed\": 0, \"y_bed\": csv.iloc[0][\"y_bed\"]}, index=[0])\n",
163
+ " bottom = pd.DataFrame({\"x_surface\": tiff.size[0], \"y_surface\": csv.iloc[-1]\n",
164
+ " [\"y_surface\"], \"x_bed\": tiff.size[0], \"y_bed\": csv.iloc[-1][\"y_bed\"]}, index=[0])\n",
165
+ "\n",
166
+ " # Concatenate the top, csv, and bottom dataframes\n",
167
+ " csv = pd.concat([top, csv, bottom], ignore_index=True)\n",
168
+ "\n",
169
+ " # Create a draw object for the image\n",
170
+ " draw = ImageDraw.Draw(img)\n",
171
+ "\n",
172
+ " # Loop over the rows of the csv file\n",
173
+ " for i in range(len(csv)-1):\n",
174
+ " # Get the current and next row\n",
175
+ " crow = csv.iloc[i]\n",
176
+ " nrow = csv.iloc[i+1]\n",
177
+ "\n",
178
+ " # Define the coordinates for the sky, bed, and bottom polygons\n",
179
+ " skycooords = [\n",
180
+ " (crow[\"x_surface\"], 0),\n",
181
+ " (nrow[\"x_surface\"], 0),\n",
182
+ " (nrow[\"x_surface\"], nrow[\"y_surface\"]),\n",
183
+ " (crow[\"x_surface\"], crow[\"y_surface\"])\n",
184
+ " ]\n",
185
+ " bedcoords = [\n",
186
+ " (crow[\"x_surface\"], crow[\"y_surface\"]),\n",
187
+ " (nrow[\"x_surface\"], nrow[\"y_surface\"]),\n",
188
+ " (nrow[\"x_bed\"], nrow[\"y_bed\"]),\n",
189
+ " (crow[\"x_bed\"], crow[\"y_bed\"])\n",
190
+ " ]\n",
191
+ " btmcoords = [\n",
192
+ " (crow[\"x_bed\"], crow[\"y_bed\"]),\n",
193
+ " (nrow[\"x_bed\"], nrow[\"y_bed\"]),\n",
194
+ " (nrow[\"x_bed\"], tiff.size[1]),\n",
195
+ " (crow[\"x_bed\"], tiff.size[1])\n",
196
+ " ]\n",
197
+ "\n",
198
+ " # Draw the polygons on the image\n",
199
+ " draw.polygon(skycooords, fill=\"#000000\")\n",
200
+ " draw.polygon(bedcoords, fill=\"#010101\")\n",
201
+ " draw.polygon(btmcoords, fill=\"#020202\")\n",
202
+ "\n",
203
+ " # Save the image with the drawn polygons to the specified path\n",
204
+ " img.save(pl.Path(\n",
205
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))\n",
206
+ "\n",
207
+ " # Print the mode of the image\n",
208
+ " print(img.mode)\n"
209
+ ]
210
+ },
211
+ {
212
+ "cell_type": "code",
213
+ "execution_count": null,
214
+ "metadata": {},
215
+ "outputs": [],
216
+ "source": []
217
+ },
218
+ {
219
+ "cell_type": "code",
220
+ "execution_count": null,
221
+ "metadata": {},
222
+ "outputs": [],
223
+ "source": [
224
+ "# This code is used to crop the images and masks in the second leg data set into 400x400 images.\n",
225
+ "\n",
226
+ "# Loop over all the files in the \"secondleg\" directory\n",
227
+ "for testset in os.listdir(\"secondleg\"):\n",
228
+ " # Print the name of the current file\n",
229
+ " print(testset)\n",
230
+ "\n",
231
+ " # Open the cropped image file from the specified path\n",
232
+ " cimg = Image.open(pl.Path(\n",
233
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n",
234
+ "\n",
235
+ " # Open the image mask file from the specified path\n",
236
+ " mask = Image.open(pl.Path(\n",
237
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))\n",
238
+ "\n",
239
+ " # Calculate the sections to crop the image into, with each section being 400 pixels wide and an overlap of 80 pixels\n",
240
+ " cropsection = window_with_remainder(400, 80, cimg.size[0])\n",
241
+ "\n",
242
+ " # Try to create directories for the cropped images and masks\n",
243
+ " try:\n",
244
+ " # Create a directory for the cropped images\n",
245
+ " os.mkdir(pl.Path(\n",
246
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_images'))\n",
247
+ "\n",
248
+ " # Create a directory for the cropped masks\n",
249
+ " os.mkdir(pl.Path(\n",
250
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_masks'))\n",
251
+ " # If the directories already exist, pass\n",
252
+ " except:\n",
253
+ " pass\n",
254
+ "\n",
255
+ " # Loop over the sections to crop the image into\n",
256
+ " for i in cropsection:\n",
257
+ " # Crop the image to the current section, resize it to 400x400, and save it to the specified path\n",
258
+ " cimg.crop((i[0], 0, i[1], cimg.size[1])).resize((400, 400)).save(pl.Path(\n",
259
+ " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_images\\cimg-{testset}_{i[0]}_{i[1]}.png'))\n"
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": null,
265
+ "metadata": {},
266
+ "outputs": [],
267
+ "source": [
268
+ "# Import the notebook_login function from the huggingface_hub module\n",
269
+ "from huggingface_hub import notebook_login\n",
270
+ "\n",
271
+ "# Import the Dataset, DatasetDict, and Image classes from the datasets module\n",
272
+ "from datasets import Dataset, DatasetDict, Image\n",
273
+ "\n",
274
+ "# Import the glob function from the glob module\n",
275
+ "from glob import glob\n",
276
+ "\n",
277
+ "# Use the glob function to get a list of all .png image file paths in the \"secondleg/*/cropped_images/\" directory\n",
278
+ "images = glob(\"secondleg/*/cropped_images/*.png\")\n",
279
+ "\n",
280
+ "# Use the glob function to get a list of all .png mask file paths in the \"secondleg/*/cropped_masks/\" directory\n",
281
+ "masks = glob(\"secondleg/*/cropped_masks/*.png\")\n",
282
+ "\n",
283
+ "# Define a function to create a dataset from image and label paths\n",
284
+ "\n",
285
+ "\n",
286
+ "def create_dataset(image_paths, label_paths):\n",
287
+ " # Create a Dataset object from a dictionary of image and label paths\n",
288
+ " dataset = Dataset.from_dict({\"image\": sorted(image_paths),\n",
289
+ " \"label\": sorted(label_paths)})\n",
290
+ " # Cast the \"image\" column of the dataset to the Image class\n",
291
+ " dataset = dataset.cast_column(\"image\", Image())\n",
292
+ " # Cast the \"label\" column of the dataset to the Image class\n",
293
+ " dataset = dataset.cast_column(\"label\", Image())\n",
294
+ "\n",
295
+ " # Return the dataset\n",
296
+ " return dataset\n",
297
+ "\n",
298
+ "\n",
299
+ "# Create a Dataset object using the create_dataset function and the image and mask file paths\n",
300
+ "dataset = create_dataset(images, masks)\n",
301
+ "\n",
302
+ "# Call the notebook_login function to log in to Hugging Face\n",
303
+ "notebook_login()\n"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": null,
309
+ "metadata": {},
310
+ "outputs": [],
311
+ "source": [
312
+ "# Call the push_to_hub method on the dataset object, specifying the repository name and setting it to private\n",
313
+ "dataset.push_to_hub(\"aashraychegu/glacier_scopes\", private=True)\n"
314
+ ]
315
+ },
316
+ {
317
+ "cell_type": "code",
318
+ "execution_count": 1,
319
+ "metadata": {},
320
+ "outputs": [
321
+ {
322
+ "data": {
323
+ "text/plain": [
324
+ "8456"
325
+ ]
326
+ },
327
+ "execution_count": 1,
328
+ "metadata": {},
329
+ "output_type": "execute_result"
330
+ }
331
+ ],
332
+ "source": [
333
+ "# Import the glob function from the glob module\n",
334
+ "from glob import glob\n",
335
+ "\n",
336
+ "# Use the glob function to get a list of all .png image file paths in the \"secondleg/*/cropped_images/\" directory\n",
337
+ "images = glob(\"secondleg/*/cropped_images/*.png\")\n",
338
+ "\n",
339
+ "# Use the glob function to get a list of all .png mask file paths in the \"secondleg/*/cropped_masks/\" directory\n",
340
+ "masks = glob(\"secondleg/*/cropped_masks/*.png\")\n",
341
+ "\n",
342
+ "# Print the length of the images list, which represents the total number of image files found\n",
343
+ "len(images)\n"
344
+ ]
345
+ }
346
+ ],
347
+ "metadata": {
348
+ "kernelspec": {
349
+ "display_name": "Python 3",
350
+ "language": "python",
351
+ "name": "python3"
352
+ },
353
+ "language_info": {
354
+ "codemirror_mode": {
355
+ "name": "ipython",
356
+ "version": 3
357
+ },
358
+ "file_extension": ".py",
359
+ "mimetype": "text/x-python",
360
+ "name": "python",
361
+ "nbconvert_exporter": "python",
362
+ "pygments_lexer": "ipython3",
363
+ "version": "3.10.7"
364
+ },
365
+ "orig_nbformat": 4
366
+ },
367
+ "nbformat": 4,
368
+ "nbformat_minor": 2
369
+ }
semanticallysegmentdeezglaciers.ipynb ADDED
The diff for this file is too large to render. See raw diff