File size: 14,722 Bytes

f31484b

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Importing all required libraries\n",
    "\n",
    "# these are needed for path processing \n",
    "import os\n",
    "import pathlib as pl\n",
    "\n",
    "#image processing and display\n",
    "import numpy as np\n",
    "import PIL\n",
    "import PIL.Image as Image\n",
    "import PIL.ImageDraw as ImageDraw\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "#these are needed for data processing\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "UsageError: Line magic function `%%script` not found.\n"
     ]
    }
   ],
   "source": [
    "if not create_offset_files:\n",
    "    %%script echo skipping\n",
    "testset = os.listdir(\"secondleg\")[8] # This is for listing out the contents of the folder\n",
    "print(testset)\n",
    "tiff = Image.open(pl.Path(\n",
    "    rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff')) # opens the tiff file\n",
    "csv = pd.read_csv(pl.Path(\n",
    "    rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv')) # opens the csv file\n",
    "with open(pl.Path( \n",
    "    rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt'),\"+x\") as f: # opens the offset file and creates it if it doesn't exist\n",
    "    offset = f.read() # reads the offset file \n",
    "    if offset != '':\n",
    "        offset = int(offset)\n",
    "    else:\n",
    "        offset = 0\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This is a helper method for chopping up a large glacial scope image into smaller chunks with a width of parameter length and a certain amount of overlap\n",
    "def window_with_remainder(length, overlap, input_size):\n",
    "    testarray = np.arange(0, input_size)\n",
    "    return np.vstack((testarray[0:length], np.lib.stride_tricks.sliding_window_view(testarray[len(testarray) % length:], length)[::overlap]))[:, [0, -1]] + [0, 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This code draws a rectangle from (40,0) to (100, y_surface) in green, and from (40, y_surface) to (100, y_bed) in white.\n",
    "# The y_surface and y_bed variables are read from the csv file, and the csv file is read in as a pandas dataframe.\n",
    "# The first 5 rows of the csv file are also printed.\n",
    "# this is done to help calibrate the offsets \n",
    "\n",
    "testset =  os.listdir(\"secondleg\")[10]\n",
    "print(testset)\n",
    "\n",
    "tiff = Image.open(pl.Path(\n",
    "    rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff'))\n",
    "csv = pd.read_csv(pl.Path(\n",
    "    rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv'))\n",
    "with open(pl.Path(\n",
    "        rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt')) as f:\n",
    "    offset = f.read()\n",
    "    if offset == \"\":\n",
    "        offset = 0\n",
    "    else:\n",
    "        offset = int(offset)\n",
    "print(offset)\n",
    "img = tiff.copy()\n",
    "img = img.crop((0,430,img.size[0],1790)) \n",
    "print(csv.head()) # prints first 5 rows of csv file\n",
    "csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n",
    "line = csv.iloc[-1] # gets last row of csv file\n",
    "print(csv.head()) # prints first 5 rows of csv file\n",
    "\n",
    "\n",
    "draw = ImageDraw.Draw(img)\n",
    "draw.rectangle([(40, 0), (100, line[\"y_surface\"])], fill=\"green\") # draws rectangle from (40,0) to (100, y_surface) in green\n",
    "draw.rectangle([(40, line[\"y_surface\"]),\n",
    "            (100, line[\"y_bed\"])], fill=\"white\") # draws rectangle from (40, y_surface) to (100, y_bed) in white\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This code draws the segmentation masks for each scope from the csv file and saves them\n",
    "\n",
    "# Loop over all the files in the \"secondleg\" directory\n",
    "for testset in os.listdir(\"secondleg\"):\n",
    "    # Print the name of the current file\n",
    "    print(testset)\n",
    "\n",
    "    # Open the .tiff image file from the specified path\n",
    "    tiff = Image.open(pl.Path(\n",
    "        rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.tiff'))\n",
    "\n",
    "    # Read the .csv file from the specified path\n",
    "    csv = pd.read_csv(pl.Path(\n",
    "        rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\{testset}.csv'))\n",
    "\n",
    "    # Open and read the offset.txt file from the specified path\n",
    "    with open(pl.Path(\n",
    "            rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\offset.txt')) as f:\n",
    "        offset = f.read()\n",
    "        # If the offset is empty, set it to 0\n",
    "        if offset == \"\":\n",
    "            offset = 0\n",
    "        # Otherwise, convert the offset to an integer\n",
    "        else:\n",
    "            offset = int(offset)\n",
    "\n",
    "    # Make a copy of the image and crop it\n",
    "    img = tiff.copy()\n",
    "    img = img.crop((0, 430, img.size[0], 1790))\n",
    "\n",
    "    # Convert the image to float and then to grayscale\n",
    "    img_float = Image.fromarray(np.divide(np.array(img), 2**8-1))\n",
    "    img = img_float.convert(\"L\")\n",
    "\n",
    "    # Save the cropped and converted image to the specified path\n",
    "    img.save(pl.Path(\n",
    "        rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n",
    "\n",
    "    # Print the mode of the image\n",
    "    print(img.mode)\n",
    "\n",
    "    # Add the offset to the specified columns of the csv file and reverse the order\n",
    "    csv = csv[[\"x_surface\", \"y_surface\", \"x_bed\", \"y_bed\"]]+offset\n",
    "    csv = csv[::-1].reset_index(drop=True)\n",
    "\n",
    "    # Create new dataframes for the top and bottom of the image\n",
    "    top = pd.DataFrame(\n",
    "        {\"x_surface\": 0, \"y_surface\": csv.iloc[0][\"y_surface\"], \"x_bed\": 0, \"y_bed\": csv.iloc[0][\"y_bed\"]}, index=[0])\n",
    "    bottom = pd.DataFrame({\"x_surface\": tiff.size[0], \"y_surface\": csv.iloc[-1]\n",
    "                          [\"y_surface\"], \"x_bed\": tiff.size[0], \"y_bed\": csv.iloc[-1][\"y_bed\"]}, index=[0])\n",
    "\n",
    "    # Concatenate the top, csv, and bottom dataframes\n",
    "    csv = pd.concat([top, csv, bottom], ignore_index=True)\n",
    "\n",
    "    # Create a draw object for the image\n",
    "    draw = ImageDraw.Draw(img)\n",
    "\n",
    "    # Loop over the rows of the csv file\n",
    "    for i in range(len(csv)-1):\n",
    "        # Get the current and next row\n",
    "        crow = csv.iloc[i]\n",
    "        nrow = csv.iloc[i+1]\n",
    "\n",
    "        # Define the coordinates for the sky, bed, and bottom polygons\n",
    "        skycooords = [\n",
    "            (crow[\"x_surface\"], 0),\n",
    "            (nrow[\"x_surface\"], 0),\n",
    "            (nrow[\"x_surface\"], nrow[\"y_surface\"]),\n",
    "            (crow[\"x_surface\"], crow[\"y_surface\"])\n",
    "        ]\n",
    "        bedcoords = [\n",
    "            (crow[\"x_surface\"], crow[\"y_surface\"]),\n",
    "            (nrow[\"x_surface\"], nrow[\"y_surface\"]),\n",
    "            (nrow[\"x_bed\"], nrow[\"y_bed\"]),\n",
    "            (crow[\"x_bed\"], crow[\"y_bed\"])\n",
    "        ]\n",
    "        btmcoords = [\n",
    "            (crow[\"x_bed\"], crow[\"y_bed\"]),\n",
    "            (nrow[\"x_bed\"], nrow[\"y_bed\"]),\n",
    "            (nrow[\"x_bed\"], tiff.size[1]),\n",
    "            (crow[\"x_bed\"], tiff.size[1])\n",
    "        ]\n",
    "\n",
    "        # Draw the polygons on the image\n",
    "        draw.polygon(skycooords, fill=\"#000000\")\n",
    "        draw.polygon(bedcoords, fill=\"#010101\")\n",
    "        draw.polygon(btmcoords, fill=\"#020202\")\n",
    "\n",
    "    # Save the image with the drawn polygons to the specified path\n",
    "    img.save(pl.Path(\n",
    "        rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))\n",
    "\n",
    "    # Print the mode of the image\n",
    "    print(img.mode)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This code is used to crop the images and masks in the second leg data set into 400x400 images.\n",
    "\n",
    "# Loop over all the files in the \"secondleg\" directory\n",
    "for testset in os.listdir(\"secondleg\"):\n",
    "    # Print the name of the current file\n",
    "    print(testset)\n",
    "\n",
    "    # Open the cropped image file from the specified path\n",
    "    cimg = Image.open(pl.Path(\n",
    "        rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_img_{testset}.png'))\n",
    "\n",
    "    # Open the image mask file from the specified path\n",
    "    mask = Image.open(pl.Path(\n",
    "        rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\img_mask_{testset}.png'))\n",
    "\n",
    "    # Calculate the sections to crop the image into, with each section being 400 pixels wide and an overlap of 80 pixels\n",
    "    cropsection = window_with_remainder(400, 80, cimg.size[0])\n",
    "\n",
    "    # Try to create directories for the cropped images and masks\n",
    "    try:\n",
    "        # Create a directory for the cropped images\n",
    "        os.mkdir(pl.Path(\n",
    "            rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_images'))\n",
    "\n",
    "        # Create a directory for the cropped masks\n",
    "        os.mkdir(pl.Path(\n",
    "            rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_masks'))\n",
    "    # If the directories already exist, pass\n",
    "    except:\n",
    "        pass\n",
    "\n",
    "    # Loop over the sections to crop the image into\n",
    "    for i in cropsection:\n",
    "        # Crop the image to the current section, resize it to 400x400, and save it to the specified path\n",
    "        cimg.crop((i[0], 0, i[1], cimg.size[1])).resize((400, 400)).save(pl.Path(\n",
    "            rf'C:\\Users\\aashr\\Desktop\\research\\glaciers\\secondleg\\{testset}\\cropped_images\\cimg-{testset}_{i[0]}_{i[1]}.png'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import the notebook_login function from the huggingface_hub module\n",
    "from huggingface_hub import notebook_login\n",
    "\n",
    "# Import the Dataset, DatasetDict, and Image classes from the datasets module\n",
    "from datasets import Dataset, DatasetDict, Image\n",
    "\n",
    "# Import the glob function from the glob module\n",
    "from glob import glob\n",
    "\n",
    "# Use the glob function to get a list of all .png image file paths in the \"secondleg/*/cropped_images/\" directory\n",
    "images = glob(\"secondleg/*/cropped_images/*.png\")\n",
    "\n",
    "# Use the glob function to get a list of all .png mask file paths in the \"secondleg/*/cropped_masks/\" directory\n",
    "masks = glob(\"secondleg/*/cropped_masks/*.png\")\n",
    "\n",
    "# Define a function to create a dataset from image and label paths\n",
    "\n",
    "\n",
    "def create_dataset(image_paths, label_paths):\n",
    "    # Create a Dataset object from a dictionary of image and label paths\n",
    "    dataset = Dataset.from_dict({\"image\": sorted(image_paths),\n",
    "                                \"label\": sorted(label_paths)})\n",
    "    # Cast the \"image\" column of the dataset to the Image class\n",
    "    dataset = dataset.cast_column(\"image\", Image())\n",
    "    # Cast the \"label\" column of the dataset to the Image class\n",
    "    dataset = dataset.cast_column(\"label\", Image())\n",
    "\n",
    "    # Return the dataset\n",
    "    return dataset\n",
    "\n",
    "\n",
    "# Create a Dataset object using the create_dataset function and the image and mask file paths\n",
    "dataset = create_dataset(images, masks)\n",
    "\n",
    "# Call the notebook_login function to log in to Hugging Face\n",
    "notebook_login()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Call the push_to_hub method on the dataset object, specifying the repository name and setting it to private\n",
    "dataset.push_to_hub(\"aashraychegu/glacier_scopes\", private=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8456"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Import the glob function from the glob module\n",
    "from glob import glob\n",
    "\n",
    "# Use the glob function to get a list of all .png image file paths in the \"secondleg/*/cropped_images/\" directory\n",
    "images = glob(\"secondleg/*/cropped_images/*.png\")\n",
    "\n",
    "# Use the glob function to get a list of all .png mask file paths in the \"secondleg/*/cropped_masks/\" directory\n",
    "masks = glob(\"secondleg/*/cropped_masks/*.png\")\n",
    "\n",
    "# Print the length of the images list, which represents the total number of image files found\n",
    "len(images)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}