{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Importing all required libraries\n", "\n", "# these are needed for path processing \n", "import os\n", "import pathlib as pl\n", "\n", "#image processing and display\n", "import numpy as np\n", "import PIL\n", "import PIL.Image as Image\n", "import PIL.ImageDraw as ImageDraw\n", "import matplotlib.pyplot as plt\n", "\n", "#these are needed for data processing\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "UsageError: Line magic function `%%script` not found.\n" ] } ], "source": [ "if not create_offset_files:\n", " %%script echo skipping\n", "testset = os.listdir(\"secondleg\")[8] # This is for listing out the contents of the folder\n", "print(testset)\n", "tiff = Image.open(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff')) # opens the tiff file\n", "csv = pd.read_csv(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv')) # opens the csv file\n", "with open(pl.Path( \n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt'),\"+x\") as f: # opens the offset file and creates it if it doesn't exist\n", " offset = f.read() # reads the offset file \n", " if offset != '':\n", " offset = int(offset)\n", " else:\n", " offset = 0\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This is a helper method for chopping up a large glacial scope image into smaller chunks with a width of parameter length and a certain amount of overlap\n", "def window_with_remainder(length, overlap, input_size):\n", " testarray = np.arange(0, input_size)\n", " return np.vstack((testarray[0:length], np.lib.stride_tricks.sliding_window_view(testarray[len(testarray) % length:], length)[::overlap]))[:, [0, -1]] + [0, 1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This code draws a rectangle from (40,0) to (100, y_surface) in green, and from (40, y_surface) to (100, y_bed) in white.\n", "# The y_surface and y_bed variables are read from the csv file, and the csv file is read in as a pandas dataframe.\n", "# The first 5 rows of the csv file are also printed.\n", "# this is done to help calibrate the offsets \n", "\n", "testset = os.listdir(\"secondleg\")[10]\n", "print(testset)\n", "\n", "tiff = Image.open(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff'))\n", "csv = pd.read_csv(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv'))\n", "with open(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt')) as f:\n", " offset = f.read()\n", " if offset == \"\":\n", " offset = 0\n", " else:\n", " offset = int(offset)\n", "print(offset)\n", "img = tiff.copy()\n", "img = img.crop((0,430,img.size[0],1790)) \n", "print(csv.head()) # prints first 5 rows of csv file\n", "csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n", "line = csv.iloc[-1] # gets last row of csv file\n", "print(csv.head()) # prints first 5 rows of csv file\n", "\n", "\n", "draw = ImageDraw.Draw(img)\n", "draw.rectangle([(40, 0), (100, line[\"y_surface\"])], fill=\"green\") # draws rectangle from (40,0) to (100, y_surface) in green\n", "draw.rectangle([(40, line[\"y_surface\"]),\n", " (100, line[\"y_bed\"])], fill=\"white\") # draws rectangle from (40, y_surface) to (100, y_bed) in white\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This code draws the segmentation masks for each scope from the csv file and saves them\n", "\n", "# Loop over all the files in the \"secondleg\" directory\n", "for testset in os.listdir(\"secondleg\"):\n", " # Print the name of the current file\n", " print(testset)\n", "\n", " # Open the .tiff image file from the specified path\n", " tiff = Image.open(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff'))\n", "\n", " # Read the .csv file from the specified path\n", " csv = pd.read_csv(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv'))\n", "\n", " # Open and read the offset.txt file from the specified path\n", " with open(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt')) as f:\n", " offset = f.read()\n", " # If the offset is empty, set it to 0\n", " if offset == \"\":\n", " offset = 0\n", " # Otherwise, convert the offset to an integer\n", " else:\n", " offset = int(offset)\n", "\n", " # Make a copy of the image and crop it\n", " img = tiff.copy()\n", " img = img.crop((0, 430, img.size[0], 1790))\n", "\n", " # Convert the image to float and then to grayscale\n", " img_float = Image.fromarray(np.divide(np.array(img), 2**8-1))\n", " img = img_float.convert(\"L\")\n", "\n", " # Save the cropped and converted image to the specified path\n", " img.save(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n", "\n", " # Print the mode of the image\n", " print(img.mode)\n", "\n", " # Add the offset to the specified columns of the csv file and reverse the order\n", " csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n", " csv = csv[::-1].reset_index(drop=True)\n", "\n", " # Create new dataframes for the top and bottom of the image\n", " top = pd.DataFrame(\n", " {\"x_surface\": 0, \"y_surface\": csv.iloc[0][\"y_surface\"], \"x_bed\": 0, \"y_bed\": csv.iloc[0][\"y_bed\"]}, index=[0])\n", " bottom = pd.DataFrame({\"x_surface\": tiff.size[0], \"y_surface\": csv.iloc[-1]\n", " [\"y_surface\"], \"x_bed\": tiff.size[0], \"y_bed\": csv.iloc[-1][\"y_bed\"]}, index=[0])\n", "\n", " # Concatenate the top, csv, and bottom dataframes\n", " csv = pd.concat([top, csv, bottom], ignore_index=True)\n", "\n", " # Create a draw object for the image\n", " draw = ImageDraw.Draw(img)\n", "\n", " # Loop over the rows of the csv file\n", " for i in range(len(csv)-1):\n", " # Get the current and next row\n", " crow = csv.iloc[i]\n", " nrow = csv.iloc[i+1]\n", "\n", " # Define the coordinates for the sky, bed, and bottom polygons\n", " skycooords = [\n", " (crow[\"x_surface\"], 0),\n", " (nrow[\"x_surface\"], 0),\n", " (nrow[\"x_surface\"], nrow[\"y_surface\"]),\n", " (crow[\"x_surface\"], crow[\"y_surface\"])\n", " ]\n", " bedcoords = [\n", " (crow[\"x_surface\"], crow[\"y_surface\"]),\n", " (nrow[\"x_surface\"], nrow[\"y_surface\"]),\n", " (nrow[\"x_bed\"], nrow[\"y_bed\"]),\n", " (crow[\"x_bed\"], crow[\"y_bed\"])\n", " ]\n", " btmcoords = [\n", " (crow[\"x_bed\"], crow[\"y_bed\"]),\n", " (nrow[\"x_bed\"], nrow[\"y_bed\"]),\n", " (nrow[\"x_bed\"], tiff.size[1]),\n", " (crow[\"x_bed\"], tiff.size[1])\n", " ]\n", "\n", " # Draw the polygons on the image\n", " draw.polygon(skycooords, fill=\"#000000\")\n", " draw.polygon(bedcoords, fill=\"#010101\")\n", " draw.polygon(btmcoords, fill=\"#020202\")\n", "\n", " # Save the image with the drawn polygons to the specified path\n", " img.save(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))\n", "\n", " # Print the mode of the image\n", " print(img.mode)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This code is used to crop the images and masks in the second leg data set into 400x400 images.\n", "\n", "# Loop over all the files in the \"secondleg\" directory\n", "for testset in os.listdir(\"secondleg\"):\n", " # Print the name of the current file\n", " print(testset)\n", "\n", " # Open the cropped image file from the specified path\n", " cimg = Image.open(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n", "\n", " # Open the image mask file from the specified path\n", " mask = Image.open(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))\n", "\n", " # Calculate the sections to crop the image into, with each section being 400 pixels wide and an overlap of 80 pixels\n", " cropsection = window_with_remainder(400, 80, cimg.size[0])\n", "\n", " # Try to create directories for the cropped images and masks\n", " try:\n", " # Create a directory for the cropped images\n", " os.mkdir(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_images'))\n", "\n", " # Create a directory for the cropped masks\n", " os.mkdir(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_masks'))\n", " # If the directories already exist, pass\n", " except:\n", " pass\n", "\n", " # Loop over the sections to crop the image into\n", " for i in cropsection:\n", " # Crop the image to the current section, resize it to 400x400, and save it to the specified path\n", " cimg.crop((i[0], 0, i[1], cimg.size[1])).resize((400, 400)).save(pl.Path(\n", " rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_images\\cimg-{testset}_{i[0]}_{i[1]}.png'))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Import the notebook_login function from the huggingface_hub module\n", "from huggingface_hub import notebook_login\n", "\n", "# Import the Dataset, DatasetDict, and Image classes from the datasets module\n", "from datasets import Dataset, DatasetDict, Image\n", "\n", "# Import the glob function from the glob module\n", "from glob import glob\n", "\n", "# Use the glob function to get a list of all .png image file paths in the \"secondleg/*/cropped_images/\" directory\n", "images = glob(\"secondleg/*/cropped_images/*.png\")\n", "\n", "# Use the glob function to get a list of all .png mask file paths in the \"secondleg/*/cropped_masks/\" directory\n", "masks = glob(\"secondleg/*/cropped_masks/*.png\")\n", "\n", "# Define a function to create a dataset from image and label paths\n", "\n", "\n", "def create_dataset(image_paths, label_paths):\n", " # Create a Dataset object from a dictionary of image and label paths\n", " dataset = Dataset.from_dict({\"image\": sorted(image_paths),\n", " \"label\": sorted(label_paths)})\n", " # Cast the \"image\" column of the dataset to the Image class\n", " dataset = dataset.cast_column(\"image\", Image())\n", " # Cast the \"label\" column of the dataset to the Image class\n", " dataset = dataset.cast_column(\"label\", Image())\n", "\n", " # Return the dataset\n", " return dataset\n", "\n", "\n", "# Create a Dataset object using the create_dataset function and the image and mask file paths\n", "dataset = create_dataset(images, masks)\n", "\n", "# Call the notebook_login function to log in to Hugging Face\n", "notebook_login()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Call the push_to_hub method on the dataset object, specifying the repository name and setting it to private\n", "dataset.push_to_hub(\"aashraychegu/glacier_scopes\", private=True)\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8456" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Import the glob function from the glob module\n", "from glob import glob\n", "\n", "# Use the glob function to get a list of all .png image file paths in the \"secondleg/*/cropped_images/\" directory\n", "images = glob(\"secondleg/*/cropped_images/*.png\")\n", "\n", "# Use the glob function to get a list of all .png mask file paths in the \"secondleg/*/cropped_masks/\" directory\n", "masks = glob(\"secondleg/*/cropped_masks/*.png\")\n", "\n", "# Print the length of the images list, which represents the total number of image files found\n", "len(images)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.7" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }