{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "1107981a-6197-4184-8327-03ed05b31a5e",
   "metadata": {},
   "source": [
    "# II. BACKGROUND SUBSTRACTION NOTEBOOK"
   ]
  },
  {
   "cell_type": "raw",
   "id": "a0c470f4-c306-4b84-8aa5-d8b3daf9c810",
   "metadata": {},
   "source": [
    "10/01/24\n",
    "Modifications by Zoé Gerber\n",
    "from an original code from Marilyne Labrie"
   ]
  },
  {
   "cell_type": "raw",
   "id": "d53c8eb3-f434-4f16-bbd4-4f757747b501",
   "metadata": {},
   "source": [
    "II.1. PACKAGES IMPORT\n",
    "II.2. DIRECTORIES\n",
    "II.3. FILES\n",
    "    II.3.1. METADATA\n",
    "    II.3.2. NOT_INTENSITIES\n",
    "    II.3.3. FULL_TO_SHORT_COLUMN_NAMES\n",
    "    II.3.4. SHORT_TO_FULL_COLUMN_NAMES\n",
    "    II.3.5. SAMPLES COLORS\n",
    "    II.3.6. CHANNELS COLORS\n",
    "    II.3.7. ROUNDS COLORS\n",
    "    II.3.8. DATA\n",
    "II.4. FILTERING\n",
    "II.5. CELL TYPES COLORS\n",
    "II.6. CELL SUBTYPES COLORS\n",
    "\n",
    "II.7. BACKGROUND SUBSTRACTION\n",
    "II.8. SAVE"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "125cf03e-b740-4daa-9b16-21057959faee",
   "metadata": {},
   "source": [
    "## II.1. PACKAGES IMPORT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "b4faaea6-5510-44e5-9e8d-b9160dc4b3b5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "        <script type=\"text/javascript\">\n",
       "        window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
       "        if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
       "        if (typeof require !== 'undefined') {\n",
       "        require.undef(\"plotly\");\n",
       "        requirejs.config({\n",
       "            paths: {\n",
       "                'plotly': ['https://cdn.plot.ly/plotly-2.29.1.min']\n",
       "            }\n",
       "        });\n",
       "        require(['plotly'], function(Plotly) {\n",
       "            window._Plotly = Plotly;\n",
       "        });\n",
       "        }\n",
       "        </script>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "import random\n",
    "import re\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sb\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.colors as mplc\n",
    "import subprocess\n",
    "import warnings\n",
    "\n",
    "from scipy import signal\n",
    "\n",
    "import plotly.figure_factory as ff\n",
    "import plotly\n",
    "import plotly.graph_objs as go\n",
    "from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot \n",
    "import plotly.express as px\n",
    "init_notebook_mode(connected = True)\n",
    "\n",
    "from my_modules import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "41c1e319-4dfb-43a2-b38b-83b789498988",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Silence FutureWarnings & UserWarnings\n",
    "warnings.filterwarnings('ignore', category= FutureWarning)\n",
    "warnings.filterwarnings('ignore', category= UserWarning)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7946a507-14f5-4dd0-b2f7-a8b1ced9c3df",
   "metadata": {},
   "source": [
    "## II.2. *DIRECTORIES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "108f1f6d-4cd5-495f-91b7-b826f3d1f772",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set base directory\n",
    "\n",
    "##### MAC WORKSTATION #####\n",
    "#base_dir = r'/Volumes/LaboLabrie/Projets/OC_TMA_Pejovic/Temp/Zoe/CyCIF_pipeline/'\n",
    "###########################\n",
    "\n",
    "##### WINDOWS WORKSTATION #####\n",
    "#base_dir = r'C:\\Users\\LaboLabrie\\gerz2701\\cyCIF-pipeline\\Set_B'\n",
    "###############################\n",
    "\n",
    "##### LOCAL WORKSTATION #####\n",
    "base_dir = r'/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/'\n",
    "#############################\n",
    "\n",
    "#set_name = 'Set_A'\n",
    "set_name = 'test'"
   ]
  },
  {
   "cell_type": "raw",
   "id": "aa48c54f-12f6-4f27-bb71-edcae686bb2b",
   "metadata": {},
   "source": [
    "The project is organized as :\n",
    "main dir \n",
    "    code\n",
    "    proj_data             > all csv files\n",
    "    proj_metadata         > exposure time csv file, images dir,...\n",
    "    proj_qc_eda           > csv after the QC/EDA step\n",
    "    proj_bs               > csv after the BS step"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "a64af03a-7a84-4121-8eaa-7f08e6b3b21e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/ directory already exists !\n",
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda directory already exists !\n",
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs directory already exists !\n",
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/images directory already exists !\n",
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata directory already exists !\n",
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/images directory already exists !\n"
     ]
    }
   ],
   "source": [
    "project_name = set_name               # Project name\n",
    "step_suffix = 'bs'                    # Curent part (here part II)\n",
    "previous_step_suffix_long = \"_qc_eda\" # Previous part (here QC/EDA NOTEBOOK)\n",
    "\n",
    "# Initial input data directory\n",
    "input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long) \n",
    "\n",
    "# BS output directories\n",
    "output_data_dir = os.path.join(base_dir, project_name + \"_\" + step_suffix)\n",
    "# BS images subdirectory\n",
    "output_images_dir = os.path.join(output_data_dir,\"images\")\n",
    "\n",
    "# Data and Metadata directories\n",
    "# Metadata directories\n",
    "metadata_dir = os.path.join(base_dir, project_name + \"_metadata\")\n",
    "# images subdirectory\n",
    "metadata_images_dir = os.path.join(metadata_dir,\"images\")\n",
    "\n",
    "# Create directories if they don't already exist\n",
    "for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:\n",
    "    if not os.path.exists(d):\n",
    "        print(\"Creation of the\" , d, \"directory...\")\n",
    "        os.makedirs(d)\n",
    "    else :\n",
    "        print(\"The\", d, \"directory already exists !\")\n",
    "\n",
    "os.chdir(input_data_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "3396590c-e964-4053-be52-ef079e2d8e46",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "base_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/\n",
      "input_data_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_qc_eda\n",
      "output_data_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs\n",
      "output_images_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/images\n",
      "metadata_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata\n",
      "metadata_images_dir : /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/images\n"
     ]
    }
   ],
   "source": [
    "# Verify paths\n",
    "print('base_dir :', base_dir)\n",
    "print('input_data_dir :', input_data_dir)\n",
    "print('output_data_dir :', output_data_dir)\n",
    "print('output_images_dir :', output_images_dir)\n",
    "print('metadata_dir :', metadata_dir)\n",
    "print('metadata_images_dir :', metadata_images_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8fce1113-2492-49a1-bb79-2553cb4a4fcd",
   "metadata": {},
   "source": [
    "## II.3. FILES"
   ]
  },
  {
   "cell_type": "raw",
   "id": "41524daf-bcaa-4407-96aa-7a11a2dff993",
   "metadata": {},
   "source": [
    "Don't forget to put your data in the projname_data directory !"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3d6665ba-cb34-4e75-bb77-085888c8af8b",
   "metadata": {},
   "source": [
    "### II.3.1. METADATA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "558e4ac8-3fd8-45fb-acdc-803baaf8a8a5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/marker_intensity_metadata.csv file was imported for further analysis!\n",
      "WARNING: 'Marker metadata file' has the following unexpected item(s): \n",
      "['Exp']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Round</th>\n",
       "      <th>Target</th>\n",
       "      <th>Exp</th>\n",
       "      <th>Channel</th>\n",
       "      <th>target_lower</th>\n",
       "      <th>full_column</th>\n",
       "      <th>marker</th>\n",
       "      <th>localisation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF488</td>\n",
       "      <td>300</td>\n",
       "      <td>c2</td>\n",
       "      <td>af488</td>\n",
       "      <td>AF488_Cell_Intensity_Average</td>\n",
       "      <td>AF488</td>\n",
       "      <td>cell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF488</td>\n",
       "      <td>300</td>\n",
       "      <td>c2</td>\n",
       "      <td>af488</td>\n",
       "      <td>AF488_Cytoplasm_Intensity_Average</td>\n",
       "      <td>AF488</td>\n",
       "      <td>cytoplasm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF488</td>\n",
       "      <td>300</td>\n",
       "      <td>c2</td>\n",
       "      <td>af488</td>\n",
       "      <td>AF488_Nucleus_Intensity_Average</td>\n",
       "      <td>AF488</td>\n",
       "      <td>nucleus</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF555</td>\n",
       "      <td>1500</td>\n",
       "      <td>c3</td>\n",
       "      <td>af555</td>\n",
       "      <td>AF555_Cell_Intensity_Average</td>\n",
       "      <td>AF555</td>\n",
       "      <td>cell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF555</td>\n",
       "      <td>1500</td>\n",
       "      <td>c3</td>\n",
       "      <td>af555</td>\n",
       "      <td>AF555_Cytoplasm_Intensity_Average</td>\n",
       "      <td>AF555</td>\n",
       "      <td>cytoplasm</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Round Target   Exp Channel target_lower                        full_column  \\\n",
       "0    R0  AF488   300      c2        af488       AF488_Cell_Intensity_Average   \n",
       "1    R0  AF488   300      c2        af488  AF488_Cytoplasm_Intensity_Average   \n",
       "2    R0  AF488   300      c2        af488    AF488_Nucleus_Intensity_Average   \n",
       "3    R0  AF555  1500      c3        af555       AF555_Cell_Intensity_Average   \n",
       "4    R0  AF555  1500      c3        af555  AF555_Cytoplasm_Intensity_Average   \n",
       "\n",
       "  marker localisation  \n",
       "0  AF488         cell  \n",
       "1  AF488    cytoplasm  \n",
       "2  AF488      nucleus  \n",
       "3  AF555         cell  \n",
       "4  AF555    cytoplasm  "
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Import all metadata we need from the QC/EDA chapter\n",
    "\n",
    "# METADATA\n",
    "filename = \"marker_intensity_metadata.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "\n",
    "# Check file exists\n",
    "if not os.path.exists(filename):\n",
    "    print(\"WARNING: Could not find desired file: \"+filename)\n",
    "else :\n",
    "    print(\"The\",filename,\"file was imported for further analysis!\")\n",
    "    \n",
    "# Open, read in information\n",
    "metadata = pd.read_csv(filename)\n",
    "\n",
    "# Verify size with verify_line_no() function in my_modules.py\n",
    "#verify_line_no(filename, metadata.shape[0] + 1)\n",
    "\n",
    "# Verify headers\n",
    "exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']\n",
    "compare_headers(exp_cols, metadata.columns.values, \"Marker metadata file\")\n",
    "\n",
    "metadata = metadata.dropna()\n",
    "metadata.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "85e0a921-eeb1-4ece-8252-1df8325bc883",
   "metadata": {},
   "source": [
    "### II.3.2. NOT_INTENSITIES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "f5965d04-1254-45d8-be20-f207068d25c4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/not_intensities.csv file was imported for further analysis!\n",
      "Verifying data read from file is the correct length...\n",
      "\n",
      "not_intensities =\n",
      " ['Cytoplasm_Size', 'Nuc_X', 'Primary_chem(1)_vs_surg(0)', 'cluster', 'immune_checkpoint', 'Sample_ID', 'Nucleus_Roundness', 'Unique_ROI_index', 'Nuc_Y', 'Nuc_X_Inv', 'Cell_ID', 'cell_subtype', 'ID', 'Nuc_Y_Inv', 'Patient', 'replicate_ID', 'cell_type', 'ROI_index', 'Cell_Size', 'Nucleus_Size']\n"
     ]
    }
   ],
   "source": [
    "# NOT_INTENSITIES\n",
    "filename = \"not_intensities.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "\n",
    "# Check file exists\n",
    "if not os.path.exists(filename):\n",
    "    print(\"WARNING: Could not find desired file: \"+filename)\n",
    "else :\n",
    "    print(\"The\",filename,\"file was imported for further analysis!\")\n",
    "\n",
    "# Open, read in information\n",
    "not_intensities = []\n",
    "with open(filename, 'r') as fh:\n",
    "    not_intensities = fh.read().strip().split(\"\\n\")\n",
    "    # take str, strip whitespace, split on new line character\n",
    "\n",
    "# Verify size\n",
    "print(\"Verifying data read from file is the correct length...\\n\")\n",
    "#verify_line_no(filename, len(not_intensities))\n",
    "\n",
    "# Print to console\n",
    "print(\"not_intensities =\\n\", not_intensities)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4568f560-eca8-4010-8d61-ef585d0bc5b3",
   "metadata": {},
   "source": [
    "### II.3.3. FULL_TO_SHORT_COLUMN_NAMES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "4fccb7b8-27e0-47e3-8b86-392ac2dfed8d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/full_to_short_column_names.csv file was imported for further analysis!\n",
      "Verifying data read from file is the correct length...\n",
      "\n",
      "full_to_short_names =\n",
      " {'AF488_Cell_Intensity_Average': 'AF488_Cell', 'AF488_Cytoplasm_Intensity_Average': 'AF488_Cytoplasm', 'AF488_Nucleus_Intensity_Average': 'AF488_Nucleus', 'AF555_Cell_Intensity_Average': 'AF555_Cell', 'AF555_Cytoplasm_Intensity_Average': 'AF555_Cytoplasm', 'AF555_Nucleus_Intensity_Average': 'AF555_Nucleus', 'AF647_Cell_Intensity_Average': 'AF647_Cell', 'AF647_Cytoplasm_Intensity_Average': 'AF647_Cytoplasm', 'AF647_Nucleus_Intensity_Average': 'AF647_Nucleus', 'AF750_Cell_Intensity_Average': 'AF750_Cell', 'AF750_Cytoplasm_Intensity_Average': 'AF750_Cytoplasm', 'AF750_Nucleus_Intensity_Average': 'AF750_Nucleus', 'aSMA_Cell_Intensity_Average': 'aSMA_Cell', 'aSMA_Cytoplasm_Intensity_Average': 'aSMA_Cytoplasm', 'aSMA_Nucleus_Intensity_Average': 'aSMA_Nucleus', 'AXL_Cell_Intensity_Average': 'AXL_Cell', 'AXL_Cytoplasm_Intensity_Average': 'AXL_Cytoplasm', 'AXL_Nucleus_Intensity_Average': 'AXL_Nucleus', 'B7H4_Cell_Intensity_Average': 'B7H4_Cell', 'B7H4_Cytoplasm_Intensity_Average': 'B7H4_Cytoplasm', 'B7H4_Nucleus_Intensity_Average': 'B7H4_Nucleus', 'CA9_Cell_Intensity_Average': 'CA9_Cell', 'CA9_Cytoplasm_Intensity_Average': 'CA9_Cytoplasm', 'CA9_Nucleus_Intensity_Average': 'CA9_Nucleus', 'CD4_Cell_Intensity_Average': 'CD4_Cell', 'CD4_Cytoplasm_Intensity_Average': 'CD4_Cytoplasm', 'CD4_Nucleus_Intensity_Average': 'CD4_Nucleus', 'CD8_Cell_Intensity_Average': 'CD8_Cell', 'CD8_Cytoplasm_Intensity_Average': 'CD8_Cytoplasm', 'CD8_Nucleus_Intensity_Average': 'CD8_Nucleus', 'CD11b_Cell_Intensity_Average': 'CD11b_Cell', 'CD11b_Cytoplasm_Intensity_Average': 'CD11b_Cytoplasm', 'CD11b_Nucleus_Intensity_Average': 'CD11b_Nucleus', 'CD11c_Cell_Intensity_Average': 'CD11c_Cell', 'CD11c_Cytoplasm_Intensity_Average': 'CD11c_Cytoplasm', 'CD11c_Nucleus_Intensity_Average': 'CD11c_Nucleus', 'CD20_Cell_Intensity_Average': 'CD20_Cell', 'CD20_Cytoplasm_Intensity_Average': 'CD20_Cytoplasm', 'CD20_Nucleus_Intensity_Average': 'CD20_Nucleus', 'CD31_Cell_Intensity_Average': 'CD31_Cell', 'CD31_Cytoplasm_Intensity_Average': 'CD31_Cytoplasm', 'CD31_Nucleus_Intensity_Average': 'CD31_Nucleus', 'CD44_Cell_Intensity_Average': 'CD44_Cell', 'CD44_Cytoplasm_Intensity_Average': 'CD44_Cytoplasm', 'CD44_Nucleus_Intensity_Average': 'CD44_Nucleus', 'CD45_Cell_Intensity_Average': 'CD45_Cell', 'CD45_Cytoplasm_Intensity_Average': 'CD45_Cytoplasm', 'CD45_Nucleus_Intensity_Average': 'CD45_Nucleus', 'CD68_Cell_Intensity_Average': 'CD68_Cell', 'CD68_Cytoplasm_Intensity_Average': 'CD68_Cytoplasm', 'CD68_Nucleus_Intensity_Average': 'CD68_Nucleus', 'CD163_Cell_Intensity_Average': 'CD163_Cell', 'CD163_Cytoplasm_Intensity_Average': 'CD163_Cytoplasm', 'CD163_Nucleus_Intensity_Average': 'CD163_Nucleus', 'CKs_Cell_Intensity_Average': 'CKs_Cell', 'CKs_Cytoplasm_Intensity_Average': 'CKs_Cytoplasm', 'CKs_Nucleus_Intensity_Average': 'CKs_Nucleus', 'ColVI_Cell_Intensity_Average': 'ColVI_Cell', 'ColVI_Cytoplasm_Intensity_Average': 'ColVI_Cytoplasm', 'ColVI_Nucleus_Intensity_Average': 'ColVI_Nucleus', 'Desmin_Cell_Intensity_Average': 'Desmin_Cell', 'Desmin_Cytoplasm_Intensity_Average': 'Desmin_Cytoplasm', 'Desmin_Nucleus_Intensity_Average': 'Desmin_Nucleus', 'Ecad_Cell_Intensity_Average': 'Ecad_Cell', 'Ecad_Cytoplasm_Intensity_Average': 'Ecad_Cytoplasm', 'Ecad_Nucleus_Intensity_Average': 'Ecad_Nucleus', 'Fibronectin_Cell_Intensity_Average': 'Fibronectin_Cell', 'Fibronectin_Cytoplasm_Intensity_Average': 'Fibronectin_Cytoplasm', 'Fibronectin_Nucleus_Intensity_Average': 'Fibronectin_Nucleus', 'FOXP3_Cell_Intensity_Average': 'FOXP3_Cell', 'FOXP3_Cytoplasm_Intensity_Average': 'FOXP3_Cytoplasm', 'FOXP3_Nucleus_Intensity_Average': 'FOXP3_Nucleus', 'GATA3_Cell_Intensity_Average': 'GATA3_Cell', 'GATA3_Cytoplasm_Intensity_Average': 'GATA3_Cytoplasm', 'GATA3_Nucleus_Intensity_Average': 'GATA3_Nucleus', 'HLA_Cell_Intensity_Average': 'HLA_Cell', 'HLA_Cytoplasm_Intensity_Average': 'HLA_Cytoplasm', 'HLA_Nucleus_Intensity_Average': 'HLA_Nucleus', 'Ki67_Cell_Intensity_Average': 'Ki67_Cell', 'Ki67_Cytoplasm_Intensity_Average': 'Ki67_Cytoplasm', 'Ki67_Nucleus_Intensity_Average': 'Ki67_Nucleus', 'MMP9_Cell_Intensity_Average': 'MMP9_Cell', 'MMP9_Cytoplasm_Intensity_Average': 'MMP9_Cytoplasm', 'MMP9_Nucleus_Intensity_Average': 'MMP9_Nucleus', 'PD1_Cell_Intensity_Average': 'PD1_Cell', 'PD1_Cytoplasm_Intensity_Average': 'PD1_Cytoplasm', 'PD1_Nucleus_Intensity_Average': 'PD1_Nucleus', 'PDGFR_Cell_Intensity_Average': 'PDGFR_Cell', 'PDGFR_Cytoplasm_Intensity_Average': 'PDGFR_Cytoplasm', 'PDGFR_Nucleus_Intensity_Average': 'PDGFR_Nucleus', 'PDL1_Cell_Intensity_Average': 'PDL1_Cell', 'PDL1_Cytoplasm_Intensity_Average': 'PDL1_Cytoplasm', 'PDL1_Nucleus_Intensity_Average': 'PDL1_Nucleus', 'r5c2_Cell_Intensity_Average': 'r5c2_Cell', 'r5c2_Cytoplasm_Intensity_Average': 'r5c2_Cytoplasm', 'r5c2_Nucleus_Intensity_Average': 'r5c2_Nucleus', 'r7c2_Cell_Intensity_Average': 'r7c2_Cell', 'r7c2_Cytoplasm_Intensity_Average': 'r7c2_Cytoplasm', 'r7c2_Nucleus_Intensity_Average': 'r7c2_Nucleus', 'r8c2_Cell_Intensity_Average': 'r8c2_Cell', 'r8c2_Cytoplasm_Intensity_Average': 'r8c2_Cytoplasm', 'r8c2_Nucleus_Intensity_Average': 'r8c2_Nucleus', 'Sting_Cell_Intensity_Average': 'Sting_Cell', 'Sting_Cytoplasm_Intensity_Average': 'Sting_Cytoplasm', 'Sting_Nucleus_Intensity_Average': 'Sting_Nucleus', 'Vimentin_Cell_Intensity_Average': 'Vimentin_Cell', 'Vimentin_Cytoplasm_Intensity_Average': 'Vimentin_Cytoplasm', 'Vimentin_Nucleus_Intensity_Average': 'Vimentin_Nucleus'}\n"
     ]
    }
   ],
   "source": [
    "# FULL_TO_SHORT_COLUMN_NAMES\n",
    "filename = \"full_to_short_column_names.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "\n",
    "# Check file exists\n",
    "if not os.path.exists(filename):\n",
    "    print(\"WARNING: Could not find desired file: \" + filename)\n",
    "else :\n",
    "    print(\"The\",filename,\"file was imported for further analysis!\")\n",
    "    \n",
    "# Open, read in information\n",
    "df = pd.read_csv(filename, header = 0)\n",
    "\n",
    "# Verify size\n",
    "print(\"Verifying data read from file is the correct length...\\n\")\n",
    "#verify_line_no(filename, df.shape[0] + 1)\n",
    "\n",
    "# Turn into dictionary\n",
    "full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]\n",
    "\n",
    "# Print information\n",
    "print('full_to_short_names =\\n',full_to_short_names)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f5858f81-2f63-44b7-abee-9c00ed6c7aba",
   "metadata": {},
   "source": [
    "### II.3.4. SHORT_TO_FULL_COLUMN_NAMES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "b47edf92-9401-4d16-b532-0156fb493c0c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/short_to_full_column_names.csv file was imported for further analysis!\n",
      "Verifying data read from file is the correct length...\n",
      "\n",
      "short_to_full_names =\n",
      " {'AF488_Cell': 'AF488_Cell_Intensity_Average', 'AF488_Cytoplasm': 'AF488_Cytoplasm_Intensity_Average', 'AF488_Nucleus': 'AF488_Nucleus_Intensity_Average', 'AF555_Cell': 'AF555_Cell_Intensity_Average', 'AF555_Cytoplasm': 'AF555_Cytoplasm_Intensity_Average', 'AF555_Nucleus': 'AF555_Nucleus_Intensity_Average', 'AF647_Cell': 'AF647_Cell_Intensity_Average', 'AF647_Cytoplasm': 'AF647_Cytoplasm_Intensity_Average', 'AF647_Nucleus': 'AF647_Nucleus_Intensity_Average', 'AF750_Cell': 'AF750_Cell_Intensity_Average', 'AF750_Cytoplasm': 'AF750_Cytoplasm_Intensity_Average', 'AF750_Nucleus': 'AF750_Nucleus_Intensity_Average', 'aSMA_Cell': 'aSMA_Cell_Intensity_Average', 'aSMA_Cytoplasm': 'aSMA_Cytoplasm_Intensity_Average', 'aSMA_Nucleus': 'aSMA_Nucleus_Intensity_Average', 'AXL_Cell': 'AXL_Cell_Intensity_Average', 'AXL_Cytoplasm': 'AXL_Cytoplasm_Intensity_Average', 'AXL_Nucleus': 'AXL_Nucleus_Intensity_Average', 'B7H4_Cell': 'B7H4_Cell_Intensity_Average', 'B7H4_Cytoplasm': 'B7H4_Cytoplasm_Intensity_Average', 'B7H4_Nucleus': 'B7H4_Nucleus_Intensity_Average', 'CA9_Cell': 'CA9_Cell_Intensity_Average', 'CA9_Cytoplasm': 'CA9_Cytoplasm_Intensity_Average', 'CA9_Nucleus': 'CA9_Nucleus_Intensity_Average', 'CD4_Cell': 'CD4_Cell_Intensity_Average', 'CD4_Cytoplasm': 'CD4_Cytoplasm_Intensity_Average', 'CD4_Nucleus': 'CD4_Nucleus_Intensity_Average', 'CD8_Cell': 'CD8_Cell_Intensity_Average', 'CD8_Cytoplasm': 'CD8_Cytoplasm_Intensity_Average', 'CD8_Nucleus': 'CD8_Nucleus_Intensity_Average', 'CD11b_Cell': 'CD11b_Cell_Intensity_Average', 'CD11b_Cytoplasm': 'CD11b_Cytoplasm_Intensity_Average', 'CD11b_Nucleus': 'CD11b_Nucleus_Intensity_Average', 'CD11c_Cell': 'CD11c_Cell_Intensity_Average', 'CD11c_Cytoplasm': 'CD11c_Cytoplasm_Intensity_Average', 'CD11c_Nucleus': 'CD11c_Nucleus_Intensity_Average', 'CD20_Cell': 'CD20_Cell_Intensity_Average', 'CD20_Cytoplasm': 'CD20_Cytoplasm_Intensity_Average', 'CD20_Nucleus': 'CD20_Nucleus_Intensity_Average', 'CD31_Cell': 'CD31_Cell_Intensity_Average', 'CD31_Cytoplasm': 'CD31_Cytoplasm_Intensity_Average', 'CD31_Nucleus': 'CD31_Nucleus_Intensity_Average', 'CD44_Cell': 'CD44_Cell_Intensity_Average', 'CD44_Cytoplasm': 'CD44_Cytoplasm_Intensity_Average', 'CD44_Nucleus': 'CD44_Nucleus_Intensity_Average', 'CD45_Cell': 'CD45_Cell_Intensity_Average', 'CD45_Cytoplasm': 'CD45_Cytoplasm_Intensity_Average', 'CD45_Nucleus': 'CD45_Nucleus_Intensity_Average', 'CD68_Cell': 'CD68_Cell_Intensity_Average', 'CD68_Cytoplasm': 'CD68_Cytoplasm_Intensity_Average', 'CD68_Nucleus': 'CD68_Nucleus_Intensity_Average', 'CD163_Cell': 'CD163_Cell_Intensity_Average', 'CD163_Cytoplasm': 'CD163_Cytoplasm_Intensity_Average', 'CD163_Nucleus': 'CD163_Nucleus_Intensity_Average', 'CKs_Cell': 'CKs_Cell_Intensity_Average', 'CKs_Cytoplasm': 'CKs_Cytoplasm_Intensity_Average', 'CKs_Nucleus': 'CKs_Nucleus_Intensity_Average', 'ColVI_Cell': 'ColVI_Cell_Intensity_Average', 'ColVI_Cytoplasm': 'ColVI_Cytoplasm_Intensity_Average', 'ColVI_Nucleus': 'ColVI_Nucleus_Intensity_Average', 'Desmin_Cell': 'Desmin_Cell_Intensity_Average', 'Desmin_Cytoplasm': 'Desmin_Cytoplasm_Intensity_Average', 'Desmin_Nucleus': 'Desmin_Nucleus_Intensity_Average', 'Ecad_Cell': 'Ecad_Cell_Intensity_Average', 'Ecad_Cytoplasm': 'Ecad_Cytoplasm_Intensity_Average', 'Ecad_Nucleus': 'Ecad_Nucleus_Intensity_Average', 'Fibronectin_Cell': 'Fibronectin_Cell_Intensity_Average', 'Fibronectin_Cytoplasm': 'Fibronectin_Cytoplasm_Intensity_Average', 'Fibronectin_Nucleus': 'Fibronectin_Nucleus_Intensity_Average', 'FOXP3_Cell': 'FOXP3_Cell_Intensity_Average', 'FOXP3_Cytoplasm': 'FOXP3_Cytoplasm_Intensity_Average', 'FOXP3_Nucleus': 'FOXP3_Nucleus_Intensity_Average', 'GATA3_Cell': 'GATA3_Cell_Intensity_Average', 'GATA3_Cytoplasm': 'GATA3_Cytoplasm_Intensity_Average', 'GATA3_Nucleus': 'GATA3_Nucleus_Intensity_Average', 'HLA_Cell': 'HLA_Cell_Intensity_Average', 'HLA_Cytoplasm': 'HLA_Cytoplasm_Intensity_Average', 'HLA_Nucleus': 'HLA_Nucleus_Intensity_Average', 'Ki67_Cell': 'Ki67_Cell_Intensity_Average', 'Ki67_Cytoplasm': 'Ki67_Cytoplasm_Intensity_Average', 'Ki67_Nucleus': 'Ki67_Nucleus_Intensity_Average', 'MMP9_Cell': 'MMP9_Cell_Intensity_Average', 'MMP9_Cytoplasm': 'MMP9_Cytoplasm_Intensity_Average', 'MMP9_Nucleus': 'MMP9_Nucleus_Intensity_Average', 'PD1_Cell': 'PD1_Cell_Intensity_Average', 'PD1_Cytoplasm': 'PD1_Cytoplasm_Intensity_Average', 'PD1_Nucleus': 'PD1_Nucleus_Intensity_Average', 'PDGFR_Cell': 'PDGFR_Cell_Intensity_Average', 'PDGFR_Cytoplasm': 'PDGFR_Cytoplasm_Intensity_Average', 'PDGFR_Nucleus': 'PDGFR_Nucleus_Intensity_Average', 'PDL1_Cell': 'PDL1_Cell_Intensity_Average', 'PDL1_Cytoplasm': 'PDL1_Cytoplasm_Intensity_Average', 'PDL1_Nucleus': 'PDL1_Nucleus_Intensity_Average', 'r5c2_Cell': 'r5c2_Cell_Intensity_Average', 'r5c2_Cytoplasm': 'r5c2_Cytoplasm_Intensity_Average', 'r5c2_Nucleus': 'r5c2_Nucleus_Intensity_Average', 'r7c2_Cell': 'r7c2_Cell_Intensity_Average', 'r7c2_Cytoplasm': 'r7c2_Cytoplasm_Intensity_Average', 'r7c2_Nucleus': 'r7c2_Nucleus_Intensity_Average', 'r8c2_Cell': 'r8c2_Cell_Intensity_Average', 'r8c2_Cytoplasm': 'r8c2_Cytoplasm_Intensity_Average', 'r8c2_Nucleus': 'r8c2_Nucleus_Intensity_Average', 'Sting_Cell': 'Sting_Cell_Intensity_Average', 'Sting_Cytoplasm': 'Sting_Cytoplasm_Intensity_Average', 'Sting_Nucleus': 'Sting_Nucleus_Intensity_Average', 'Vimentin_Cell': 'Vimentin_Cell_Intensity_Average', 'Vimentin_Cytoplasm': 'Vimentin_Cytoplasm_Intensity_Average', 'Vimentin_Nucleus': 'Vimentin_Nucleus_Intensity_Average'}\n"
     ]
    }
   ],
   "source": [
    "# SHORT_TO_FULL_COLUMN_NAMES\n",
    "filename = \"short_to_full_column_names.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "\n",
    "# Check file exists\n",
    "if not os.path.exists(filename):\n",
    "    print(\"WARNING: Could not find desired file: \" + filename)\n",
    "else :\n",
    "    print(\"The\",filename,\"file was imported for further analysis!\")\n",
    "\n",
    "# Open, read in information\n",
    "df = pd.read_csv(filename, header = 0)\n",
    "\n",
    "# Verify size\n",
    "print(\"Verifying data read from file is the correct length...\\n\")\n",
    "#verify_line_no(filename, df.shape[0] + 1)\n",
    "\n",
    "# Turn into dictionary\n",
    "short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]\n",
    "\n",
    "# Print information\n",
    "print('short_to_full_names =\\n',short_to_full_names)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "450d6f35-64de-47da-a359-7125a677cfd6",
   "metadata": {},
   "source": [
    "### II.3.5. SAMPLES COLORS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "4a72b8c0-bdc2-4d2a-8549-2b24cefba020",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/sample_color_data.csv file was imported for further analysis!\n",
      "Verifying data read from file is the correct length...\n",
      "\n",
      "sample_color_dict =\n",
      " {'DD3S1.csv': (0.9677975592919913, 0.44127456009157356, 0.5358103155058701), 'DD3S2.csv': (0.5920891529639701, 0.6418467016378244, 0.1935069134991043), 'DD3S3.csv': (0.21044753832183283, 0.6773105080456748, 0.6433941168468681), 'TMA.csv': (0.5019607843137255, 0.5019607843137255, 0.5019607843137255)}\n"
     ]
    }
   ],
   "source": [
    "# COLORS INFORMATION\n",
    "filename = \"sample_color_data.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "\n",
    "# Check file exists\n",
    "if not os.path.exists(filename):\n",
    "    print(\"WARNING: Could not find desired file: \" + filename)\n",
    "else :\n",
    "    print(\"The\",filename,\"file was imported for further analysis!\")\n",
    "    \n",
    "# Open, read in information\n",
    "df = pd.read_csv(filename, header = 0)\n",
    "df = df.drop(columns = ['hex'])\n",
    "\n",
    "# our tuple of float values for rgb, (r, g, b) was read in \n",
    "# as a string '(r, g, b)'. We need to extract the r-, g-, and b-\n",
    "# substrings and convert them back into floats\n",
    "df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)\n",
    "\n",
    "# Verify size\n",
    "print(\"Verifying data read from file is the correct length...\\n\")\n",
    "#verify_line_no(filename, df.shape[0] + 1)\n",
    "\n",
    "# Turn into dictionary\n",
    "sample_color_dict = df.set_index('Sample_ID').T.to_dict('rgb')[0]\n",
    "\n",
    "# Print information\n",
    "print('sample_color_dict =\\n',sample_color_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f6249335-5aef-4d35-9c80-163d68ae1432",
   "metadata": {},
   "source": [
    "### II.3.6. CHANNELS COLORS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "e460924e-82b4-4542-b297-399cd8fd6e68",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/channel_color_data.csv file was imported for further analysis!\n",
      "Verifying data read from file is the correct length...\n",
      "\n",
      "channel_color_dict =\n",
      " {'c2': (0.00784313725490196, 0.24313725490196078, 1.0), 'c3': (1.0, 0.48627450980392156, 0.0), 'c4': (0.10196078431372549, 0.788235294117647, 0.2196078431372549), 'c5': (0.9098039215686274, 0.0, 0.043137254901960784)}\n"
     ]
    }
   ],
   "source": [
    "# CHANNELS\n",
    "filename = \"channel_color_data.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "\n",
    "# Check file exists\n",
    "if not os.path.exists(filename):\n",
    "    print(\"WARNING: Could not find desired file: \"+filename)\n",
    "else :\n",
    "    print(\"The\",filename,\"file was imported for further analysis!\")\n",
    "\n",
    "# Open, read in information\n",
    "df = pd.read_csv(filename, header = 0)\n",
    "df = df.drop(columns = ['hex'])\n",
    "\n",
    "# our tuple of float values for rgb, (r, g, b) was read in \n",
    "# as a string '(r, g, b)'. We need to extract the r-, g-, and b-\n",
    "# substrings and convert them back into floats\n",
    "df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)\n",
    "\n",
    "# Verify size\n",
    "print(\"Verifying data read from file is the correct length...\\n\")\n",
    "#verify_line_no(filename, df.shape[0] + 1)\n",
    "\n",
    "# Turn into dictionary\n",
    "channel_color_dict = df.set_index('Channel').T.to_dict('rgb')[0]\n",
    "\n",
    "# Print information\n",
    "print('channel_color_dict =\\n',channel_color_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "051455b8-048b-4392-b5b5-9145d03391a8",
   "metadata": {},
   "source": [
    "### II.3.7. ROUNDS COLORS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "636ec133-d4e8-4f61-9893-f369d1538c83",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/round_color_data.csv file was imported for further analysis!\n",
      "Verifying data read from file is the correct length...\n",
      "\n",
      "round_color_dict =\n",
      " {'R0': (0.28685356234627135, 0.13009829239513535, 0.23110332132624437), 'R1': (0.36541462435986094, 0.2025447048359916, 0.37693310021636883), 'R2': (0.40867533458903105, 0.2940761173840091, 0.5166711878800253), 'R3': (0.42890613750051265, 0.4082290173220481, 0.6335348887063806), 'R4': (0.4444462906865238, 0.5264664993764805, 0.7056321892616532), 'R5': (0.47707206309601013, 0.6427061780374552, 0.7418477948908153), 'R6': (0.5414454866716836, 0.7466759172596551, 0.7572905778378964), 'R7': (0.6414710091647722, 0.8321551072276492, 0.7746773027952071), 'R8': (0.7684256891219349, 0.8992667116749021, 0.8171383269422353)}\n"
     ]
    }
   ],
   "source": [
    "# ROUND\n",
    "filename = \"round_color_data.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "\n",
    "# Check file exists\n",
    "if not os.path.exists(filename):\n",
    "    print(\"WARNING: Could not find desired file: \"+filename)\n",
    "else :\n",
    "    print(\"The\",filename,\"file was imported for further analysis!\")\n",
    "    \n",
    "# Open, read in information\n",
    "df = pd.read_csv(filename, header = 0)\n",
    "df = df.drop(columns = ['hex'])\n",
    "\n",
    "# our tuple of float values for rgb, (r, g, b) was read in \n",
    "# as a string '(r, g, b)'. We need to extract the r-, g-, and b-\n",
    "# substrings and convert them back into floats\n",
    "df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)\n",
    "\n",
    "# Verify size\n",
    "print(\"Verifying data read from file is the correct length...\\n\")\n",
    "#verify_line_no(filename, df.shape[0] + 1)\n",
    "\n",
    "# Turn into dictionary\n",
    "round_color_dict = df.set_index('Round').T.to_dict('rgb')[0]\n",
    "\n",
    "# Print information\n",
    "print('round_color_dict =\\n',round_color_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "05ad70c3-4525-4e98-ac85-2398bde990ab",
   "metadata": {},
   "source": [
    "### II.3.8. DATA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "eb2392ef-dd07-44cb-b94e-0a044abd0a47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The following CSV files were detected:\n",
      "['DD3S3_qc_eda.csv', 'TMA_qc_eda.csv', 'DD3S1_qc_eda.csv', 'DD3S2_qc_eda.csv']\n"
     ]
    }
   ],
   "source": [
    "# DATA\n",
    "# List files in the directory\n",
    "# Check if the directory exists\n",
    "if os.path.exists(input_data_dir):\n",
    "    ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith(\"_qc_eda.csv\")]\n",
    "\n",
    "    print(\"The following CSV files were detected:\")\n",
    "    print([sample for sample in ls_samples])\n",
    "else:\n",
    "    print(f\"The directory {input_data_dir} does not exist.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "99e5b1af-1b1a-432e-8e75-e31e0c70e7c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Nuc_X' 'Sample_ID' 'Nucleus_Roundness' 'Nuc_Y_Inv' 'ROI_index'\n",
      " 'Cell_Size' 'Nucleus_Size' 'AF488_Cell_Intensity_Average'\n",
      " 'AF488_Cytoplasm_Intensity_Average' 'AF488_Nucleus_Intensity_Average'\n",
      " 'AF555_Cell_Intensity_Average' 'AF555_Cytoplasm_Intensity_Average'\n",
      " 'AF555_Nucleus_Intensity_Average' 'AF647_Cell_Intensity_Average'\n",
      " 'AF647_Cytoplasm_Intensity_Average' 'AF647_Nucleus_Intensity_Average'\n",
      " 'AF750_Cell_Intensity_Average' 'AF750_Cytoplasm_Intensity_Average'\n",
      " 'AF750_Nucleus_Intensity_Average' 'aSMA_Cell_Intensity_Average'\n",
      " 'aSMA_Cytoplasm_Intensity_Average' 'aSMA_Nucleus_Intensity_Average'\n",
      " 'AXL_Cell_Intensity_Average' 'AXL_Cytoplasm_Intensity_Average'\n",
      " 'AXL_Nucleus_Intensity_Average' 'B7H4_Cell_Intensity_Average'\n",
      " 'B7H4_Cytoplasm_Intensity_Average' 'B7H4_Nucleus_Intensity_Average'\n",
      " 'CA9_Cell_Intensity_Average' 'CA9_Cytoplasm_Intensity_Average'\n",
      " 'CA9_Nucleus_Intensity_Average' 'CD4_Cell_Intensity_Average'\n",
      " 'CD4_Cytoplasm_Intensity_Average' 'CD4_Nucleus_Intensity_Average'\n",
      " 'CD8_Cell_Intensity_Average' 'CD8_Cytoplasm_Intensity_Average'\n",
      " 'CD8_Nucleus_Intensity_Average' 'CD11b_Cell_Intensity_Average'\n",
      " 'CD11b_Cytoplasm_Intensity_Average' 'CD11b_Nucleus_Intensity_Average'\n",
      " 'CD11c_Cell_Intensity_Average' 'CD11c_Cytoplasm_Intensity_Average'\n",
      " 'CD11c_Nucleus_Intensity_Average' 'CD20_Cell_Intensity_Average'\n",
      " 'CD20_Cytoplasm_Intensity_Average' 'CD20_Nucleus_Intensity_Average'\n",
      " 'CD31_Cell_Intensity_Average' 'CD31_Cytoplasm_Intensity_Average'\n",
      " 'CD31_Nucleus_Intensity_Average' 'CD44_Cell_Intensity_Average'\n",
      " 'CD44_Cytoplasm_Intensity_Average' 'CD44_Nucleus_Intensity_Average'\n",
      " 'CD45_Cell_Intensity_Average' 'CD45_Cytoplasm_Intensity_Average'\n",
      " 'CD45_Nucleus_Intensity_Average' 'CD68_Cell_Intensity_Average'\n",
      " 'CD68_Cytoplasm_Intensity_Average' 'CD68_Nucleus_Intensity_Average'\n",
      " 'CD163_Cell_Intensity_Average' 'CD163_Cytoplasm_Intensity_Average'\n",
      " 'CD163_Nucleus_Intensity_Average' 'CKs_Cell_Intensity_Average'\n",
      " 'CKs_Cytoplasm_Intensity_Average' 'CKs_Nucleus_Intensity_Average'\n",
      " 'ColVI_Cell_Intensity_Average' 'ColVI_Cytoplasm_Intensity_Average'\n",
      " 'ColVI_Nucleus_Intensity_Average' 'Desmin_Cell_Intensity_Average'\n",
      " 'Desmin_Cytoplasm_Intensity_Average' 'Desmin_Nucleus_Intensity_Average'\n",
      " 'Ecad_Cell_Intensity_Average' 'Ecad_Cytoplasm_Intensity_Average'\n",
      " 'Ecad_Nucleus_Intensity_Average' 'Fibronectin_Cell_Intensity_Average'\n",
      " 'Fibronectin_Cytoplasm_Intensity_Average'\n",
      " 'Fibronectin_Nucleus_Intensity_Average' 'FOXP3_Cell_Intensity_Average'\n",
      " 'FOXP3_Cytoplasm_Intensity_Average' 'FOXP3_Nucleus_Intensity_Average'\n",
      " 'GATA3_Cell_Intensity_Average' 'GATA3_Cytoplasm_Intensity_Average'\n",
      " 'GATA3_Nucleus_Intensity_Average' 'HLA_Cell_Intensity_Average'\n",
      " 'HLA_Cytoplasm_Intensity_Average' 'HLA_Nucleus_Intensity_Average'\n",
      " 'Ki67_Cell_Intensity_Average' 'Ki67_Cytoplasm_Intensity_Average'\n",
      " 'Ki67_Nucleus_Intensity_Average' 'MMP9_Cell_Intensity_Average'\n",
      " 'MMP9_Cytoplasm_Intensity_Average' 'MMP9_Nucleus_Intensity_Average'\n",
      " 'PD1_Cell_Intensity_Average' 'PD1_Cytoplasm_Intensity_Average'\n",
      " 'PD1_Nucleus_Intensity_Average' 'PDGFR_Cell_Intensity_Average'\n",
      " 'PDGFR_Cytoplasm_Intensity_Average' 'PDGFR_Nucleus_Intensity_Average'\n",
      " 'PDL1_Cell_Intensity_Average' 'PDL1_Cytoplasm_Intensity_Average'\n",
      " 'PDL1_Nucleus_Intensity_Average' 'r5c2_Cell_Intensity_Average'\n",
      " 'r5c2_Cytoplasm_Intensity_Average' 'r5c2_Nucleus_Intensity_Average'\n",
      " 'r7c2_Cell_Intensity_Average' 'r7c2_Cytoplasm_Intensity_Average'\n",
      " 'r7c2_Nucleus_Intensity_Average' 'r8c2_Cell_Intensity_Average'\n",
      " 'r8c2_Cytoplasm_Intensity_Average' 'r8c2_Nucleus_Intensity_Average'\n",
      " 'Sting_Cell_Intensity_Average' 'Sting_Cytoplasm_Intensity_Average'\n",
      " 'Sting_Nucleus_Intensity_Average' 'Vimentin_Cell_Intensity_Average'\n",
      " 'Vimentin_Cytoplasm_Intensity_Average'\n",
      " 'Vimentin_Nucleus_Intensity_Average']\n",
      "DD3S3_qc_eda.csv file is processed !\n",
      "\n",
      "TMA_qc_eda.csv file is processed !\n",
      "\n",
      "DD3S1_qc_eda.csv file is processed !\n",
      "\n",
      "DD3S2_qc_eda.csv file is processed !\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Import all the others files\n",
    "dfs = {}\n",
    "\n",
    "# Set variable to hold default header values\n",
    "# First gather information on expected headers using first file in ls_samples\n",
    "# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples\n",
    "df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)\n",
    "expected_headers = df.columns.values\n",
    "print(expected_headers)\n",
    "\n",
    "###############################\n",
    "# !! This may take a while !! #\n",
    "###############################\n",
    "for sample in ls_samples:\n",
    "    file_path = os.path.join(input_data_dir,sample)\n",
    "   \n",
    "    try:\n",
    "        # Read the CSV file\n",
    "        df = pd.read_csv(file_path, index_col=0)\n",
    "        # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it\n",
    "        \n",
    "        if not df.empty:\n",
    "            # Reorder the columns to match the expected headers list\n",
    "            df = df.reindex(columns=expected_headers)\n",
    "            print(sample, \"file is processed !\\n\")\n",
    "            #print(df) \n",
    "   \n",
    "    except pd.errors.EmptyDataError:\n",
    "        print(f'\\nEmpty data error in {sample} file. Removing from analysis...')\n",
    "        ls_samples.remove(sample)      \n",
    "    \n",
    "    # Add df to dfs \n",
    "    dfs[sample] = df\n",
    "\n",
    "#print(dfs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "2e2def98-8ea5-46ee-bad3-10d19feb56db",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                   Nuc_X  Sample_ID  Nucleus_Roundness     Nuc_Y_Inv  \\\n",
      "ID                                                                     \n",
      "DD3S3_Cell_0  823.567871  DD3S3.csv           0.835324  15699.382812   \n",
      "DD3S3_Cell_1  852.840027  DD3S3.csv           0.523421  15690.533203   \n",
      "DD3S3_Cell_2  868.272705  DD3S3.csv           0.686147  15682.994141   \n",
      "DD3S3_Cell_3  842.131592  DD3S3.csv           0.881136  15688.894531   \n",
      "DD3S3_Cell_4  704.337280  DD3S3.csv           0.757623  15683.059570   \n",
      "\n",
      "              ROI_index  Cell_Size  Nucleus_Size  \\\n",
      "ID                                                 \n",
      "DD3S3_Cell_0          0        281            81   \n",
      "DD3S3_Cell_1          0        200            75   \n",
      "DD3S3_Cell_2          0        425           165   \n",
      "DD3S3_Cell_3          0        114            38   \n",
      "DD3S3_Cell_4          0        418           169   \n",
      "\n",
      "              AF488_Cell_Intensity_Average  AF488_Cytoplasm_Intensity_Average  \\\n",
      "ID                                                                              \n",
      "DD3S3_Cell_0                   1996.348755                        1883.150024   \n",
      "DD3S3_Cell_1                   2523.199951                        2450.087891   \n",
      "DD3S3_Cell_2                   2864.345947                        2765.046143   \n",
      "DD3S3_Cell_3                   2328.473633                        2267.855225   \n",
      "DD3S3_Cell_4                   3349.023926                        3018.883545   \n",
      "\n",
      "              AF488_Nucleus_Intensity_Average  ...  \\\n",
      "ID                                             ...   \n",
      "DD3S3_Cell_0                      2275.851807  ...   \n",
      "DD3S3_Cell_1                      2645.053223  ...   \n",
      "DD3S3_Cell_2                      3020.818115  ...   \n",
      "DD3S3_Cell_3                      2449.710449  ...   \n",
      "DD3S3_Cell_4                      3835.443848  ...   \n",
      "\n",
      "              r7c2_Nucleus_Intensity_Average  r8c2_Cell_Intensity_Average  \\\n",
      "ID                                                                          \n",
      "DD3S3_Cell_0                      252.555557                   268.523132   \n",
      "DD3S3_Cell_1                      271.226654                   310.339996   \n",
      "DD3S3_Cell_2                      333.078796                   349.395294   \n",
      "DD3S3_Cell_3                      263.500000                   291.289459   \n",
      "DD3S3_Cell_4                      375.662720                   384.011963   \n",
      "\n",
      "              r8c2_Cytoplasm_Intensity_Average  \\\n",
      "ID                                               \n",
      "DD3S3_Cell_0                        257.945007   \n",
      "DD3S3_Cell_1                        301.056000   \n",
      "DD3S3_Cell_2                        339.603851   \n",
      "DD3S3_Cell_3                        282.789459   \n",
      "DD3S3_Cell_4                        350.662659   \n",
      "\n",
      "              r8c2_Nucleus_Intensity_Average  Sting_Cell_Intensity_Average  \\\n",
      "ID                                                                           \n",
      "DD3S3_Cell_0                      294.641968                   1358.562256   \n",
      "DD3S3_Cell_1                      325.813324                   1856.005005   \n",
      "DD3S3_Cell_2                      364.824249                   1969.552979   \n",
      "DD3S3_Cell_3                      308.289459                   1319.236816   \n",
      "DD3S3_Cell_4                      433.147919                   1398.476074   \n",
      "\n",
      "              Sting_Cytoplasm_Intensity_Average  \\\n",
      "ID                                                \n",
      "DD3S3_Cell_0                        1318.834961   \n",
      "DD3S3_Cell_1                        1954.375977   \n",
      "DD3S3_Cell_2                        2171.303955   \n",
      "DD3S3_Cell_3                        1249.684204   \n",
      "DD3S3_Cell_4                        1301.931763   \n",
      "\n",
      "              Sting_Nucleus_Intensity_Average  \\\n",
      "ID                                              \n",
      "DD3S3_Cell_0                      1456.654297   \n",
      "DD3S3_Cell_1                      1692.053345   \n",
      "DD3S3_Cell_2                      1651.642456   \n",
      "DD3S3_Cell_3                      1458.342163   \n",
      "DD3S3_Cell_4                      1540.721924   \n",
      "\n",
      "              Vimentin_Cell_Intensity_Average  \\\n",
      "ID                                              \n",
      "DD3S3_Cell_0                      2862.252686   \n",
      "DD3S3_Cell_1                      3007.534912   \n",
      "DD3S3_Cell_2                      1955.188232   \n",
      "DD3S3_Cell_3                      3467.263184   \n",
      "DD3S3_Cell_4                      1861.600464   \n",
      "\n",
      "              Vimentin_Cytoplasm_Intensity_Average  \\\n",
      "ID                                                   \n",
      "DD3S3_Cell_0                           2464.334961   \n",
      "DD3S3_Cell_1                           3135.496094   \n",
      "DD3S3_Cell_2                           2099.076904   \n",
      "DD3S3_Cell_3                           3773.605225   \n",
      "DD3S3_Cell_4                           1678.666626   \n",
      "\n",
      "              Vimentin_Nucleus_Intensity_Average  \n",
      "ID                                                \n",
      "DD3S3_Cell_0                         3844.765381  \n",
      "DD3S3_Cell_1                         2794.266602  \n",
      "DD3S3_Cell_2                         1728.454590  \n",
      "DD3S3_Cell_3                         2854.578857  \n",
      "DD3S3_Cell_4                         2131.130127  \n",
      "\n",
      "[5 rows x 115 columns]\n"
     ]
    }
   ],
   "source": [
    "# Merge dfs into one df\n",
    "df = pd.concat(dfs.values(), ignore_index=False , sort = False)\n",
    "del dfs\n",
    "\n",
    "print(df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "bbe27b0f-c7a0-489a-b845-74c42289f7bf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(433976, 115)"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "949edaa7-22ec-4544-a250-4705759ca1bd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check for NaN entries (should not be any unless columns do not align)\n",
    "# False means no NaN entries \n",
    "# True means NaN entries \n",
    "df.isnull().any().any()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "657ba08b-cb75-437a-9557-97e8acb46dcb",
   "metadata": {},
   "source": [
    "## II.4. *FILTERING"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "db040ebb-6447-4e0a-a904-21e62eb3e1c3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of cells before filtering : 433976\n"
     ]
    }
   ],
   "source": [
    "print(\"Number of cells before filtering :\", df.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "d571eb73-5e76-4c16-b7ba-37a55553d454",
   "metadata": {},
   "outputs": [],
   "source": [
    "#print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "0b66a96e-32e2-4682-ae20-ebd6fc41a9c5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of cells after filtering on nucleus size: 389617\n",
      "Number of cells after filtering on AF555A ___ intensity: 350554\n"
     ]
    }
   ],
   "source": [
    "# Delete small cells and objects w/high AF555 Signal (RBCs) \n",
    "# We usually use the 95th percentile calculated during QC_EDA\n",
    "df = df.loc[(df['Nucleus_Size'] > 42 )]\n",
    "df = df.loc[(df['Nucleus_Size'] < 216)]\n",
    "print(\"Number of cells after filtering on nucleus size:\", df.shape[0])\n",
    "\n",
    "df = df.loc[(df['AF555_Cell_Intensity_Average'] < 2000)]\n",
    "print(\"Number of cells after filtering on AF555A ___ intensity:\", df.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "3b3ecc8d-66ad-4153-8289-6e4935cf7fa1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Assign cell type\n",
    "# Assign tumor cells at each row at first (random assigning here just for development purposes)\n",
    "# Generate random values for cell_type column\n",
    "random_values = np.random.randint(0, 10, size=len(df))\n",
    "\n",
    "# Assign cell type based on random values\n",
    "def assign_cell_type(n):\n",
    "    return 'none'\n",
    "\n",
    "df['cell_type'] = np.vectorize(assign_cell_type)(random_values)\n",
    "df['cell_subtype'] = df['cell_type'].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "15dd4c6e-87d3-419e-b824-0a39fce7faa9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Nuc_X</th>\n",
       "      <th>Sample_ID</th>\n",
       "      <th>Nucleus_Roundness</th>\n",
       "      <th>Nuc_Y_Inv</th>\n",
       "      <th>ROI_index</th>\n",
       "      <th>Cell_Size</th>\n",
       "      <th>Nucleus_Size</th>\n",
       "      <th>AF488_Cell_Intensity_Average</th>\n",
       "      <th>AF488_Cytoplasm_Intensity_Average</th>\n",
       "      <th>AF488_Nucleus_Intensity_Average</th>\n",
       "      <th>...</th>\n",
       "      <th>r8c2_Cytoplasm_Intensity_Average</th>\n",
       "      <th>r8c2_Nucleus_Intensity_Average</th>\n",
       "      <th>Sting_Cell_Intensity_Average</th>\n",
       "      <th>Sting_Cytoplasm_Intensity_Average</th>\n",
       "      <th>Sting_Nucleus_Intensity_Average</th>\n",
       "      <th>Vimentin_Cell_Intensity_Average</th>\n",
       "      <th>Vimentin_Cytoplasm_Intensity_Average</th>\n",
       "      <th>Vimentin_Nucleus_Intensity_Average</th>\n",
       "      <th>cell_type</th>\n",
       "      <th>cell_subtype</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_0</th>\n",
       "      <td>823.567871</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.835324</td>\n",
       "      <td>15699.382812</td>\n",
       "      <td>0</td>\n",
       "      <td>281</td>\n",
       "      <td>81</td>\n",
       "      <td>1996.348755</td>\n",
       "      <td>1883.150024</td>\n",
       "      <td>2275.851807</td>\n",
       "      <td>...</td>\n",
       "      <td>257.945007</td>\n",
       "      <td>294.641968</td>\n",
       "      <td>1358.562256</td>\n",
       "      <td>1318.834961</td>\n",
       "      <td>1456.654297</td>\n",
       "      <td>2862.252686</td>\n",
       "      <td>2464.334961</td>\n",
       "      <td>3844.765381</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_1</th>\n",
       "      <td>852.840027</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.523421</td>\n",
       "      <td>15690.533203</td>\n",
       "      <td>0</td>\n",
       "      <td>200</td>\n",
       "      <td>75</td>\n",
       "      <td>2523.199951</td>\n",
       "      <td>2450.087891</td>\n",
       "      <td>2645.053223</td>\n",
       "      <td>...</td>\n",
       "      <td>301.056000</td>\n",
       "      <td>325.813324</td>\n",
       "      <td>1856.005005</td>\n",
       "      <td>1954.375977</td>\n",
       "      <td>1692.053345</td>\n",
       "      <td>3007.534912</td>\n",
       "      <td>3135.496094</td>\n",
       "      <td>2794.266602</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_2</th>\n",
       "      <td>868.272705</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.686147</td>\n",
       "      <td>15682.994141</td>\n",
       "      <td>0</td>\n",
       "      <td>425</td>\n",
       "      <td>165</td>\n",
       "      <td>2864.345947</td>\n",
       "      <td>2765.046143</td>\n",
       "      <td>3020.818115</td>\n",
       "      <td>...</td>\n",
       "      <td>339.603851</td>\n",
       "      <td>364.824249</td>\n",
       "      <td>1969.552979</td>\n",
       "      <td>2171.303955</td>\n",
       "      <td>1651.642456</td>\n",
       "      <td>1955.188232</td>\n",
       "      <td>2099.076904</td>\n",
       "      <td>1728.454590</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_4</th>\n",
       "      <td>704.337280</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.757623</td>\n",
       "      <td>15683.059570</td>\n",
       "      <td>0</td>\n",
       "      <td>418</td>\n",
       "      <td>169</td>\n",
       "      <td>3349.023926</td>\n",
       "      <td>3018.883545</td>\n",
       "      <td>3835.443848</td>\n",
       "      <td>...</td>\n",
       "      <td>350.662659</td>\n",
       "      <td>433.147919</td>\n",
       "      <td>1398.476074</td>\n",
       "      <td>1301.931763</td>\n",
       "      <td>1540.721924</td>\n",
       "      <td>1861.600464</td>\n",
       "      <td>1678.666626</td>\n",
       "      <td>2131.130127</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_5</th>\n",
       "      <td>852.893799</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.714651</td>\n",
       "      <td>15683.017578</td>\n",
       "      <td>0</td>\n",
       "      <td>201</td>\n",
       "      <td>113</td>\n",
       "      <td>2366.134277</td>\n",
       "      <td>2088.715820</td>\n",
       "      <td>2582.177002</td>\n",
       "      <td>...</td>\n",
       "      <td>276.897736</td>\n",
       "      <td>326.592926</td>\n",
       "      <td>1328.602051</td>\n",
       "      <td>1144.681763</td>\n",
       "      <td>1471.831909</td>\n",
       "      <td>1457.726318</td>\n",
       "      <td>1205.852295</td>\n",
       "      <td>1653.876099</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 117 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                   Nuc_X  Sample_ID  Nucleus_Roundness     Nuc_Y_Inv  \\\n",
       "ID                                                                     \n",
       "DD3S3_Cell_0  823.567871  DD3S3.csv           0.835324  15699.382812   \n",
       "DD3S3_Cell_1  852.840027  DD3S3.csv           0.523421  15690.533203   \n",
       "DD3S3_Cell_2  868.272705  DD3S3.csv           0.686147  15682.994141   \n",
       "DD3S3_Cell_4  704.337280  DD3S3.csv           0.757623  15683.059570   \n",
       "DD3S3_Cell_5  852.893799  DD3S3.csv           0.714651  15683.017578   \n",
       "\n",
       "              ROI_index  Cell_Size  Nucleus_Size  \\\n",
       "ID                                                 \n",
       "DD3S3_Cell_0          0        281            81   \n",
       "DD3S3_Cell_1          0        200            75   \n",
       "DD3S3_Cell_2          0        425           165   \n",
       "DD3S3_Cell_4          0        418           169   \n",
       "DD3S3_Cell_5          0        201           113   \n",
       "\n",
       "              AF488_Cell_Intensity_Average  AF488_Cytoplasm_Intensity_Average  \\\n",
       "ID                                                                              \n",
       "DD3S3_Cell_0                   1996.348755                        1883.150024   \n",
       "DD3S3_Cell_1                   2523.199951                        2450.087891   \n",
       "DD3S3_Cell_2                   2864.345947                        2765.046143   \n",
       "DD3S3_Cell_4                   3349.023926                        3018.883545   \n",
       "DD3S3_Cell_5                   2366.134277                        2088.715820   \n",
       "\n",
       "              AF488_Nucleus_Intensity_Average  ...  \\\n",
       "ID                                             ...   \n",
       "DD3S3_Cell_0                      2275.851807  ...   \n",
       "DD3S3_Cell_1                      2645.053223  ...   \n",
       "DD3S3_Cell_2                      3020.818115  ...   \n",
       "DD3S3_Cell_4                      3835.443848  ...   \n",
       "DD3S3_Cell_5                      2582.177002  ...   \n",
       "\n",
       "              r8c2_Cytoplasm_Intensity_Average  \\\n",
       "ID                                               \n",
       "DD3S3_Cell_0                        257.945007   \n",
       "DD3S3_Cell_1                        301.056000   \n",
       "DD3S3_Cell_2                        339.603851   \n",
       "DD3S3_Cell_4                        350.662659   \n",
       "DD3S3_Cell_5                        276.897736   \n",
       "\n",
       "              r8c2_Nucleus_Intensity_Average  Sting_Cell_Intensity_Average  \\\n",
       "ID                                                                           \n",
       "DD3S3_Cell_0                      294.641968                   1358.562256   \n",
       "DD3S3_Cell_1                      325.813324                   1856.005005   \n",
       "DD3S3_Cell_2                      364.824249                   1969.552979   \n",
       "DD3S3_Cell_4                      433.147919                   1398.476074   \n",
       "DD3S3_Cell_5                      326.592926                   1328.602051   \n",
       "\n",
       "              Sting_Cytoplasm_Intensity_Average  \\\n",
       "ID                                                \n",
       "DD3S3_Cell_0                        1318.834961   \n",
       "DD3S3_Cell_1                        1954.375977   \n",
       "DD3S3_Cell_2                        2171.303955   \n",
       "DD3S3_Cell_4                        1301.931763   \n",
       "DD3S3_Cell_5                        1144.681763   \n",
       "\n",
       "              Sting_Nucleus_Intensity_Average  \\\n",
       "ID                                              \n",
       "DD3S3_Cell_0                      1456.654297   \n",
       "DD3S3_Cell_1                      1692.053345   \n",
       "DD3S3_Cell_2                      1651.642456   \n",
       "DD3S3_Cell_4                      1540.721924   \n",
       "DD3S3_Cell_5                      1471.831909   \n",
       "\n",
       "              Vimentin_Cell_Intensity_Average  \\\n",
       "ID                                              \n",
       "DD3S3_Cell_0                      2862.252686   \n",
       "DD3S3_Cell_1                      3007.534912   \n",
       "DD3S3_Cell_2                      1955.188232   \n",
       "DD3S3_Cell_4                      1861.600464   \n",
       "DD3S3_Cell_5                      1457.726318   \n",
       "\n",
       "              Vimentin_Cytoplasm_Intensity_Average  \\\n",
       "ID                                                   \n",
       "DD3S3_Cell_0                           2464.334961   \n",
       "DD3S3_Cell_1                           3135.496094   \n",
       "DD3S3_Cell_2                           2099.076904   \n",
       "DD3S3_Cell_4                           1678.666626   \n",
       "DD3S3_Cell_5                           1205.852295   \n",
       "\n",
       "              Vimentin_Nucleus_Intensity_Average  cell_type  cell_subtype  \n",
       "ID                                                                         \n",
       "DD3S3_Cell_0                         3844.765381       none          none  \n",
       "DD3S3_Cell_1                         2794.266602       none          none  \n",
       "DD3S3_Cell_2                         1728.454590       none          none  \n",
       "DD3S3_Cell_4                         2131.130127       none          none  \n",
       "DD3S3_Cell_5                         1653.876099       none          none  \n",
       "\n",
       "[5 rows x 117 columns]"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "de4deb80-bdbe-430b-9991-019406d99ae2",
   "metadata": {},
   "source": [
    "##  II.5. CELL TYPES COLORS"
   ]
  },
  {
   "cell_type": "raw",
   "id": "9214d297-2e4b-45eb-b664-43f748c4bd6d",
   "metadata": {},
   "source": [
    "# Establish colors to use throughout workflow\n",
    "\n",
    "# we want colors that are categorical, since Cell Type is a non-ordered category. \n",
    "# A categorical color palette will have dissimilar colors.\n",
    "# Get those unique colors\n",
    "cell_types = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']\n",
    "color_values = sb.color_palette(\"hls\", n_colors = len(cell_types))\n",
    "# each color value is a tuple of three values: (R, G, B)\n",
    "\n",
    "print(\"Unique cell types are:\",df.cell_type.unique())\n",
    "# Display those unique colors\n",
    "sb.palplot(sb.color_palette(color_values))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "72a0ac93-964a-4541-91e0-5e21c46154f1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUoAAABlCAYAAAArpKpSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAACpUlEQVR4nO3ZMW4aURiF0R9kGRpAcomg8kq8q0gpXKa0vINsLC48EguAxrhgUkRJqnAHHDQe+5z6FVcP6dPMMGrbti0A/mnc9wCA904oAQKhBAiEEiAQSoBAKAECoQQIrrocOhwOtdlsajab1Wg0uvQmgItr27Z2u10tl8saj48/M3YK5WazqfV6/V/GAbwnTdPUarU6eqZTKGezWVVVrb+uazz1tt7V3Y+7vicM0uPj974nDM63xZe+JwzOvvb1UA9/+nZMp1D+ft0eT8dCeYLr6+u+JwzSfN73guGZ1rTvCYPV5XOi6gEEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEQgkQCCVAIJQAgVACBEIJEAglQCCUAIFQAgRCCRAIJUAglACBUAIEV10OtW1bVVWHl8NFx3w0r6+vfU8YpO227wXD81IvfU8YnH3tq+pv344ZtR1OPT091e3t7duXAbwzTdPUarU6eqbTE+XNzU1VVT0/P9disXj7sk9iu93Wer2upmlqPp/3PWcQ3Nl53Nvp2rat3W5Xy+Uynu0UyvH416fMxWLhRzjDfD53bydyZ+dxb6fp+uDnzxyAQCgBgk6hnEwmdX9/X5PJ5NJ7PhT3djp3dh73dlmd/vUG+My8egMEQgkQCCVAIJQAgVACBEIJEAglQCCUAMFPFNNia56tTbkAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 400x100 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Define your custom colors for each cell type\n",
    "custom_colors = {\n",
    "    'CANCER': (0.1333, 0.5451, 0.1333),\n",
    "    'STROMA': (0.4, 0.4, 0.4),\n",
    "    'IMMUNE': (1, 1, 0),\n",
    "    'ENDOTHELIAL': (0.502, 0, 0.502)\n",
    "}\n",
    "\n",
    "# Retrieve the list of cell types\n",
    "cell_types = list(custom_colors.keys())\n",
    "\n",
    "# Extract the corresponding colors from the dictionary\n",
    "color_values = [custom_colors[cell] for cell in cell_types]\n",
    "\n",
    "# Display the colors\n",
    "sb.palplot(sb.color_palette(color_values))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "704674b9-be53-414b-a03a-4ad1465cfb23",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'CANCER': (0.1333, 0.5451, 0.1333),\n",
       " 'STROMA': (0.4, 0.4, 0.4),\n",
       " 'IMMUNE': (1, 1, 0),\n",
       " 'ENDOTHELIAL': (0.502, 0, 0.502)}"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Store in a dctionnary\n",
    "celltype_color_dict = dict(zip(cell_types, color_values))\n",
    "celltype_color_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "eeeb576b-aea8-4c37-ada1-7f324ddde99a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/celltype_color_data.csv was created!\n"
     ]
    }
   ],
   "source": [
    "# Save color information (mapping and legend) to metadata directory\n",
    "# Create dataframe\n",
    "celltype_color_df = color_dict_to_df(celltype_color_dict, \"cell_type\")\n",
    "celltype_color_df.head()\n",
    "\n",
    "# Save to file in metadatadirectory\n",
    "filename = \"celltype_color_data.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "celltype_color_df.to_csv(filename, index = False)\n",
    "print(\"File\" + filename + \" was created!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "e5ba634f-bbc8-4662-9c8b-48bf3c649cc8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAK4AAACHCAYAAACPr2IQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAVpklEQVR4nO2de1CU1f/H33thubisCIGIrdxvgqJUmmgkBYPklBphMlIgaqkIKVLqV+TiSJhUWzPlpWkFnUJNRbtQlNqud1BT0ESlGhGVJc1aCFkE4fz+YHh+PO0Cu+yGPbvnNfPMuOd8zuecZ+ft4TzPns/58AghBBQKx+A/7AFQKAOBCpfCSahwKZyECpfCSahwKZyECpfCSahwKZyECpfCSahwKZyECpfCSahwKZyECpfCSahwKZyECpfCSYSGNujo6EB7e/u/MRaKBSASicDnGz9f6i1cQggaGhqgVquN7pRiufD5fHh6ekIkEhnlh6fvRnKVSgW1Wg0XFxfY2dmBx+MZ1THF8ujs7ER9fT2srKwwatQoozSk14zb0dHBiNbJyWnAnVEozs7OqK+vx4MHD2BlZTVgP3otNrrXtHZ2dgPuiEIBwCwROjo6jPJj0CqZLg8oxmIqDdHXYRROQoVL4SRUuP2Qk5ODcePGMZ+TkpIwc+bMhzYeShdmLdyGhgakpqbCy8sL1tbWkEqleP7553H48OF/rc+ioiI4ODj8a/4pXRj8yxlXqK2txeTJk+Hg4ICCggKMGTMG7e3t+P7775GSkoIrV6487CFSjMBsZ9wlS5aAx+Ph9OnTiI2NhZ+fH4KCgpCeno7y8nLGTq1WY8GCBXB2doZEIsEzzzyDqqqqAfWpVCoxb948NDY2gsfjgcfjIScnB+vWrUNwcLCW/bhx47B27VoA/78Eyc3NZcayaNEitLW1MfadnZ3Iz8+Hp6cnbG1tERISgr179w5orFzHLGfcP//8E2VlZcjLy8OQIUO06nv+KY+Li4OtrS2+++47DB06FFu3bsWzzz6LmpoaODo6GtRvWFgYPvjgA2RlZeHq1asAALFYDLVajdzcXJw5cwZPPPEEAOD8+fO4cOECSkpKmPaHDx+GjY0NlEolamtrMW/ePDg5OSEvLw8AkJ+fj88++wxbtmyBr68vjh49ioSEBDg7O+Ppp5829GviNkQPNBoNqa6uJhqNRh/zh05FRQUBQEpKSvq0O3bsGJFIJKS1tZVV7u3tTbZu3UoIISQ7O5uEhIQwdYmJiWTGjBm9+iwsLCRDhw7VKo+JiSGLFy9mPqemppKpU6ey/Do6OpJ79+4xZZs3byZisZh0dHSQ1tZWYmdnR06ePMnyO3/+fBIfH9/nff6XMJWWzHLGJXqe41dVVYXm5matn7E1Gg1+++03k45p4cKFSE5Oxvvvvw8+n4/i4mLIZDKWTUhICOvXyUmTJqG5uRk3btxAc3MzWlpaEBUVxWrT1taG8ePHm3SsXMAshevr6wsej9fvA1hzczNGjBgBpVKpVWfqNwPPP/88rK2tsX//fohEIrS3t+Oll17Su31zczMAoLS0FCNHjmTVWVtbm3SsXMAshevo6Ijo6Gh8/PHHSEtL01rnqtVqODg4IDQ0FA0NDRAKhfDw8DBJ3yKRSOfv8EKhEImJiSgsLIRIJMKcOXNga2vLsqmqqoJGo2HKy8vLIRaLIZVK4ejoCGtra9TV1VneelYHZilcAPj4448xefJkTJgwAevWrcPYsWPx4MEDHDx4EJs3b8bly5cRGRmJSZMmYebMmdi4cSP8/PxQX1+P0tJSzJo1C48//rjB/Xp4eKC5uRmHDx9m/vR3//lfsGABAgMDAQAnTpzQatvW1ob58+cjMzMTtbW1yM7OxtKlS8Hn82Fvb4+MjAwsX74cnZ2dmDJlChobG3HixAlIJBIkJiYa94VxjcFcUA829fX1JCUlhbi7uxORSERGjhxJXnjhBaJQKBibpqYmkpqaStzc3IiVlRWRSqVk7ty5pK6ujhBi+MMZIYQsWrSIODk5EQAkOzubVffUU0+RoKAgrTbdfrOysoiTkxMRi8Vk4cKFrAfHzs5O8sEHHxB/f39iZWVFnJ2dSXR0NDly5IjB383DwlRa0msjeWtrK65duwZPT0/Y2NgMwn8n84QQAl9fXyxZsgTp6emsuqSkJKjVahw4cODhDG6QMJWWzHap8F/jzp072LVrFxoaGjBv3ryHPRzOQ4U7SLi4uOCRRx7BJ598gmHDhj3s4XAeKtxBor8VWVFR0eAMxEww270KFPOGCpfCSahwKZyECpfCSahwKZyECpfCSYx+Heb9rrcpxqEXv2UMbKthQ0MD8vLyUFpailu3bsHFxQXjxo3DsmXL8OyzzzJ2+fn5yMzMxIYNG/Dmm2+yfBQVFWHevHmIjo5GWVkZU65WqzFs2DAoFApMnTqVKVcoFCgoKEBFRQU0Gg08PDwQExOD9PR0jBw5EkqlEhERETrHq1Kp4OrqipycHOTm5gLoOnPLzc0NMTEx2LBhg8Gb3M0Ns59xa2tr8dhjj+HHH39EQUEBLl68iLKyMkRERCAlJYVlu23bNrz11lvYtm2bTl9CoRCHDh2CQqHos8+tW7ciMjISrq6u2LdvH6qrq7FlyxY0NjbivffeY9levXoVKpWKdbm4uDD1QUFBUKlUqKurQ2FhIcrKyrB48eIBfhvmg9n/ANEz9qzn9sagoCAkJyczn48cOQKNRoN169Zhx44dOHnyJMLCwli+hgwZgtmzZ2PVqlWoqKjQ2d/NmzeRlpaGtLQ01kZxDw8PhIeHa5126eLi0ufeX6FQCFdXVwDAyJEjERcXh8LCQn1v32wx6xm3O/YsJSWl39gzuVyO+Ph4WFlZIT4+HnK5XKfPnJwcXLx4sdcgxT179qCtrQ1vvfWWznpjNqjX1tbi+++/N/qITnPArIX766+/ghCCgICAPu2ampqwd+9eJCQkAAASEhLwxRdfMFEHPXFzc8Mbb7yBNWvW4MGDB1r1v/zyCyQSCUaMGKHXGB999FGIxWLmCgoKYtVfvHgRYrEYtra28PT0xKVLl7By5Uq9fJszZi1cPXZsAgB27twJb29vhISEAOgKG3d3d8fu3bt12q9cuRJ37tzRuRYmhBh0sNuxY8dQWVnJXN9++y2r3t/fH5WVlThz5gxWrlyJ6OhopKam6u3fXDFr4eobeyaXy3Hp0iUIhULmqq6u7vUhzcHBAatXr0Zubi5aWlpYdX5+fmhsbIRKpdJrjJ6envDx8WEud3d3Vr1IJIKPjw+Cg4OxYcMGCAQC5k2DJWPWwu0Ze3bv3j2terVajYsXL+Ls2bNQKpWsmU+pVOLUqVO9ij41NRV8Ph8ffvghq/yll16CSCTCxo0bdbYzNhVBZmYm3n33XdTX1xvlh+uY/VuF/mLPoqOjMWHCBISHh2u1feKJJyCXy1FQUKBVZ2Njg9zcXK1XalKpFDKZDEuXLkVTUxNeffVVeHh44ObNm9ixYwfEYjHrldjt27fR2trK8uHk5NTrad2TJk3C2LFj8fbbb+Ojjz4ayFdiFpj1jAsAXl5eOHfuHCIiIrBixQoEBwcjKioKhw8fxocffojPPvsMsbGxOtvGxsZix44dvWYZSkxMhJeXl1b5kiVL8MMPP+DWrVuYNWsWAgICsGDBAkgkEmRkZLBs/f39MWLECNb1008/9XlPy5cvx6effoobN27o+S2YHzTmjDKomEpLZj/jUswTKlwKJ6HCpXASKlwKJ6HCpXASKlwKJ6HCpXASKlwKJ6HCpXASKlwKJzF6k83rr79uinHoxdatWw1uc+fOHWRlZaG0tBS///47hg0bhpCQEISHhyMzM7PPtgqFgsl+A3QlUB4+fDjCw8NRUFCAUaNGsewvXbqE3NxcKBQKNDU1wd3dHXPmzMGqVatYuR08PDxw/fp17Ny5E3PmzGH5CAoKQnV1NQoLC5GUlMSq6yuY09Iw+xk3NjYW58+fx/bt21FTU4OvvvoKU6dOxZgxY1gBirNnz8a0adNYZd0xZxKJBCqVCrdu3cK+fftw9epVxMXFsfopLy/HxIkT0dbWhtLSUtTU1CAvLw9FRUWIiopi5SsDunaR/TN2rLy8HA0NDTrDjID+gzktCbPe1qhWq3Hs2DEolUomb4K7uzsmTJigZWtra4v79+8zgYk94fF4TPmIESMwf/58pKWloampCRKJBIQQzJ8/H4GBgSgpKQGfz2f68vPzw/jx4yGTyVghN3PnzoVMJsONGzcglUoBdAlz7ty52LFjh9YY9AnmtCTMesbtjuM6cOAA7t+/bxKft2/fxv79+yEQCCAQCAAAlZWVqK6uRnp6OiPabkJCQhAZGYmdO3eyyocPH47o6Ghs374dANDS0oLdu3ezIo97om8wp6Vg1sIVCoUoKirC9u3b4eDggMmTJ+N///sfLly4YJCfxsZGiMViDBkyBMOHD4dCoWBFDtfU1AAAk5jknwQGBjI2PUlOTkZRUREIIdi7dy+8vb1Zmdq7MSSY01Iwa+ECXWvc+vp6fPXVV5g2bRqUSiVCQ0MNOkjZ3t4elZWVOHv2LN577z2EhoYyaUp7om9wZjfTp09Hc3Mzjh49im3btvU62xoazGkJmL1wga4wm6ioKKxduxYnT55EUlISsrOz9W7P5/Ph4+ODwMBApKen48knn2SdJuPn5wcAuHz5ss72ly9fZmx6IhQK8corryA7OxsVFRWYO3euzvaGBnNaAhYh3H8yevRoncGT+rJq1Srs3r0b586dA9A1AwYEBEAmk6Gzs5NlW1VVhUOHDiE+Pl6nr+TkZBw5cgQzZszQmRtioMGc5o5Zv1W4e/cu4uLikJycjLFjx8Le3h5nz57Fxo0bMWPGjAH7lUqlmDVrFrKysvDNN9+Ax+NBLpcjKioKsbGxWL16NVxdXVFRUYEVK1Zg0qRJWLZsmU5fgYGB+OOPP1jveXsil8sHFMxp7pj1jCsWizFx4kTIZDKEh4cjODgYa9euxcKFC42OkF2+fDlKS0tx+vRpAEBYWBjKy8shEAgQExMDHx8frF69GomJiTh48GCf+XadnJy00qMCXZkmjQnmNGdosCRlUKHBkhSLhgqXwkmocCmchAqXwkmocCmchAqXwkmocCmchAqXwkmocCmchAqXwklMIFzeIF6Gk5SUhJkzZzL/5vF4WLRokZZdSkoKeDweK0DRUPupU6fq3ExTVFTEShOVk5Oj029lZSV4PB5qa2sBdKWH4vF4Oq/y8nK9vwNzxOJmXKlUil27dkGj0TBlra2tKC4u1oraHYi9vtjY2EAul+OXX37p1/bQoUNa2Scfe+yxAfdtDliccENDQyGVSlFSUsKUlZSUYNSoURg/frzR9vri7++PiIgIrFmzpl9bJycnuLq6sq7eckRYChYnXKBr83bP0PBt27YxZyeYwl5fNmzYgH379uHs2bNG+7I0LFK4CQkJOH78OK5fv47r16/jxIkTTCCiKez1JTQ0FLNnz+43U2RYWBgr+6RYLDa6b65j1hEQveHs7Izp06czEbbTp0/HI488YjJ7Q1i/fj0CAwPxww8/sLKm92T37t29RhBbKhYpXKDrz//SpUsBdOVCM4W9RCJBY2OjVrlarcbQoUN1tvH29sbChQuxatWqXs9KkEql8PHx6XeMloRFLhUAYNq0aWhra0N7ezuio6NNYu/v788EUPbk3LlzOqN8u8nKykJNTQ127dql/w1YOBY74woEAiacvPtEGmPtFy9ejI8++ghpaWlYsGABrK2tUVpaip07d+Lrr7/u1ffw4cORnp7ea9Dj3bt30dDQwCpzcHCw6DAqi51xga4/7RKJxGT2Xl5eOHr0KK5cuYLIyEhMnDgRX3zxBfbs2YNp06b16TsjI6PXh67IyEit7JMHDhzQe9zmCA2WpAwqNFiSYtFQ4VI4CRUuhZNQ4VI4CRUuhZNQ4VI4CRUuhZNQ4VI4CRUuhZNQ4VI4idGbbHJ5uaYYh15kE/3zNnSTlJTEpGTqSXR0NMrKypgsj6dOncKTTz7J1C9btow5sh7oCnDMze26V4FAAAcHB4wePRovvvgiFi9erHVwc39ZJpVKJSIiIvoce3dmy2XLlkGtVmvV83g87N+/nwkG5fF0B5R2Z7Ds7vOvv/5iBW92k5OTgwMHDqCyspJVfvPmTXh5ecHPzw8///xzv+MYDCxixv1nxkiVSsXKO2ZjY9NvFALQla5UpVKhrq4OCoUCcXFxyM/PR1hYGP7++2/GTp8sk2FhYXpntjSEwsJCrXs1VlBFRUWYPXs2mpqaUFFRYZQvU2ER2xqtra11Zozs5rXXXsOWLVvw7bff4rnnnuvVTigUMn7c3NwwZswYREVFISQkBO+88w7Wr19vUJbJnmPqK7OlITg4OBjtoyeEEBQWFmLTpk149NFHIZfLMXHiRJP5HygWMeP2h6enJxYtWoTVq1drZc3pj4CAAMTExDBRwAPJMvlfRqFQoKWlBZGRkUhISMCuXbuMylhkKixCuN98841WsOHbb7/NssnMzMS1a9fw+eefG+w/ICCAOcRjoFkm+6I7s6U+AZPx8fFadnV1dQb11xO5XI45c+ZAIBAgODgYXl5e2LNnz4D9mQqLWCpERERg8+bNrDJHR0fWZ2dnZ2RkZCArKwsvv/yyQf4JIVoPRoZmmewLe3t7nSFBvr6+WmUymQyRkZGsMjc3twH1q1arUVJSguPHjzNlCQkJkMvlrBN8HgYWIdwhQ4boFWyYnp6OTZs2YdOmTQb5v3z5Mjw9PQGws0zqOjCktyyTfdGd2VIfXF1dTRZYWVxcjNbWVtaalhCCzs5O1NTUGHwfpsQilgr6IhaLsXbtWuTl5bHeEvTFlStXUFZWxuQiMybL5H8NuVyOFStWsDJaVlVV4amnnnro6VgtYsa9f/++VrChUCjUeTbCa6+9BplMhuLiYq2n5wcPHqChoQGdnZ24e/culEol1q9fj3HjxuHNN98EAKOyTJoCtVqtda/29vZMpnegK82qvb0985nH4zEJrruprKzEuXPn8PnnnyMgIIBVFx8fj3Xr1mH9+vUQCrskdO3aNa33v76+vqx+TQrRA41GQ6qrq4lGo9HH/D9FYmIiAaB1+fv7E0IIcXd3JzKZjNWmuLiYACBPP/00U5adnc20FQgExNHRkUyZMoXIZDLS2tqq1e+FCxdIbGwscXR0JFZWVsTb25tkZmaSe/fu9TrOGTNmaJUXFhaSoUOH6mwDgOzfv5/1WdeVn59PCCFEoVDorBcIBMw9hoSEEEIIWbp0KRk9erTOflUqFeHz+eTLL7/ss99jx45ptTWVlmiwJGVQocGSFIuGCpfCSahwKZyECpfCSQwSrh7PcRRKn5hKQ3oJt/vY9paWFpN0SrFc2traAOh30GBf6PUDRPfG6du3bwMA7Ozset20TKH0RmdnJ+7cuQM7Ozvmh4uBonfr7j2e3eKlUAYCn8/HqFGjjJ749PoBoicdHR1ob283qlOK5SISibT2KQ8Eg4VLofwXoK/DKJyECpfCSahwKZyECpfCSahwKZyECpfCSahwKZzk/wASffYcfzO+MQAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 100x100 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Legend of cell type info only\n",
    "g  = plt.figure(figsize = (1,1)).add_subplot(111)\n",
    "g.axis('off')\n",
    "handles = []\n",
    "for item in celltype_color_dict.keys():\n",
    "        h = g.bar(0,0, color = celltype_color_dict[item],\n",
    "                  label = item, linewidth =0)\n",
    "        handles.append(h)\n",
    "first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),\n",
    "\n",
    "\n",
    "filename = \"Celltype_legend.png\"\n",
    "filename = os.path.join(metadata_images_dir, filename)\n",
    "plt.savefig(filename, bbox_inches = 'tight')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "4b3305bb-688b-4540-bcf8-32c7c0ff69fe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Round</th>\n",
       "      <th>Target</th>\n",
       "      <th>Exp</th>\n",
       "      <th>Channel</th>\n",
       "      <th>target_lower</th>\n",
       "      <th>full_column</th>\n",
       "      <th>marker</th>\n",
       "      <th>localisation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF488</td>\n",
       "      <td>300</td>\n",
       "      <td>c2</td>\n",
       "      <td>af488</td>\n",
       "      <td>AF488_Cell_Intensity_Average</td>\n",
       "      <td>AF488</td>\n",
       "      <td>cell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF488</td>\n",
       "      <td>300</td>\n",
       "      <td>c2</td>\n",
       "      <td>af488</td>\n",
       "      <td>AF488_Cytoplasm_Intensity_Average</td>\n",
       "      <td>AF488</td>\n",
       "      <td>cytoplasm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF488</td>\n",
       "      <td>300</td>\n",
       "      <td>c2</td>\n",
       "      <td>af488</td>\n",
       "      <td>AF488_Nucleus_Intensity_Average</td>\n",
       "      <td>AF488</td>\n",
       "      <td>nucleus</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF555</td>\n",
       "      <td>1500</td>\n",
       "      <td>c3</td>\n",
       "      <td>af555</td>\n",
       "      <td>AF555_Cell_Intensity_Average</td>\n",
       "      <td>AF555</td>\n",
       "      <td>cell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>R0</td>\n",
       "      <td>AF555</td>\n",
       "      <td>1500</td>\n",
       "      <td>c3</td>\n",
       "      <td>af555</td>\n",
       "      <td>AF555_Cytoplasm_Intensity_Average</td>\n",
       "      <td>AF555</td>\n",
       "      <td>cytoplasm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>R8</td>\n",
       "      <td>Sting</td>\n",
       "      <td>1000</td>\n",
       "      <td>c4</td>\n",
       "      <td>sting</td>\n",
       "      <td>Sting_Cytoplasm_Intensity_Average</td>\n",
       "      <td>Sting</td>\n",
       "      <td>cytoplasm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>104</th>\n",
       "      <td>R8</td>\n",
       "      <td>Sting</td>\n",
       "      <td>1000</td>\n",
       "      <td>c4</td>\n",
       "      <td>sting</td>\n",
       "      <td>Sting_Nucleus_Intensity_Average</td>\n",
       "      <td>Sting</td>\n",
       "      <td>nucleus</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105</th>\n",
       "      <td>R8</td>\n",
       "      <td>CD11b</td>\n",
       "      <td>1500</td>\n",
       "      <td>c5</td>\n",
       "      <td>cd11b</td>\n",
       "      <td>CD11b_Cell_Intensity_Average</td>\n",
       "      <td>CD11b</td>\n",
       "      <td>cell</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106</th>\n",
       "      <td>R8</td>\n",
       "      <td>CD11b</td>\n",
       "      <td>1500</td>\n",
       "      <td>c5</td>\n",
       "      <td>cd11b</td>\n",
       "      <td>CD11b_Cytoplasm_Intensity_Average</td>\n",
       "      <td>CD11b</td>\n",
       "      <td>cytoplasm</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>R8</td>\n",
       "      <td>CD11b</td>\n",
       "      <td>1500</td>\n",
       "      <td>c5</td>\n",
       "      <td>cd11b</td>\n",
       "      <td>CD11b_Nucleus_Intensity_Average</td>\n",
       "      <td>CD11b</td>\n",
       "      <td>nucleus</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>108 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    Round Target   Exp Channel target_lower  \\\n",
       "0      R0  AF488   300      c2        af488   \n",
       "1      R0  AF488   300      c2        af488   \n",
       "2      R0  AF488   300      c2        af488   \n",
       "3      R0  AF555  1500      c3        af555   \n",
       "4      R0  AF555  1500      c3        af555   \n",
       "..    ...    ...   ...     ...          ...   \n",
       "103    R8  Sting  1000      c4        sting   \n",
       "104    R8  Sting  1000      c4        sting   \n",
       "105    R8  CD11b  1500      c5        cd11b   \n",
       "106    R8  CD11b  1500      c5        cd11b   \n",
       "107    R8  CD11b  1500      c5        cd11b   \n",
       "\n",
       "                           full_column marker localisation  \n",
       "0         AF488_Cell_Intensity_Average  AF488         cell  \n",
       "1    AF488_Cytoplasm_Intensity_Average  AF488    cytoplasm  \n",
       "2      AF488_Nucleus_Intensity_Average  AF488      nucleus  \n",
       "3         AF555_Cell_Intensity_Average  AF555         cell  \n",
       "4    AF555_Cytoplasm_Intensity_Average  AF555    cytoplasm  \n",
       "..                                 ...    ...          ...  \n",
       "103  Sting_Cytoplasm_Intensity_Average  Sting    cytoplasm  \n",
       "104    Sting_Nucleus_Intensity_Average  Sting      nucleus  \n",
       "105       CD11b_Cell_Intensity_Average  CD11b         cell  \n",
       "106  CD11b_Cytoplasm_Intensity_Average  CD11b    cytoplasm  \n",
       "107    CD11b_Nucleus_Intensity_Average  CD11b      nucleus  \n",
       "\n",
       "[108 rows x 8 columns]"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "a54e644d-00a8-409a-9f62-12e6cd30e669",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Nuc_X', 'Sample_ID', 'Nucleus_Roundness', 'Nuc_Y_Inv',\n",
       "       'ROI_index', 'Cell_Size', 'Nucleus_Size',\n",
       "       'AF488_Cell_Intensity_Average',\n",
       "       'AF488_Cytoplasm_Intensity_Average',\n",
       "       'AF488_Nucleus_Intensity_Average', 'AF555_Cell_Intensity_Average',\n",
       "       'AF555_Cytoplasm_Intensity_Average',\n",
       "       'AF555_Nucleus_Intensity_Average', 'AF647_Cell_Intensity_Average',\n",
       "       'AF647_Cytoplasm_Intensity_Average',\n",
       "       'AF647_Nucleus_Intensity_Average', 'AF750_Cell_Intensity_Average',\n",
       "       'AF750_Cytoplasm_Intensity_Average',\n",
       "       'AF750_Nucleus_Intensity_Average', 'aSMA_Cell_Intensity_Average',\n",
       "       'aSMA_Cytoplasm_Intensity_Average',\n",
       "       'aSMA_Nucleus_Intensity_Average', 'AXL_Cell_Intensity_Average',\n",
       "       'AXL_Cytoplasm_Intensity_Average', 'AXL_Nucleus_Intensity_Average',\n",
       "       'B7H4_Cell_Intensity_Average', 'B7H4_Cytoplasm_Intensity_Average',\n",
       "       'B7H4_Nucleus_Intensity_Average', 'CA9_Cell_Intensity_Average',\n",
       "       'CA9_Cytoplasm_Intensity_Average', 'CA9_Nucleus_Intensity_Average',\n",
       "       'CD4_Cell_Intensity_Average', 'CD4_Cytoplasm_Intensity_Average',\n",
       "       'CD4_Nucleus_Intensity_Average', 'CD8_Cell_Intensity_Average',\n",
       "       'CD8_Cytoplasm_Intensity_Average', 'CD8_Nucleus_Intensity_Average',\n",
       "       'CD11b_Cell_Intensity_Average',\n",
       "       'CD11b_Cytoplasm_Intensity_Average',\n",
       "       'CD11b_Nucleus_Intensity_Average', 'CD11c_Cell_Intensity_Average',\n",
       "       'CD11c_Cytoplasm_Intensity_Average',\n",
       "       'CD11c_Nucleus_Intensity_Average', 'CD20_Cell_Intensity_Average',\n",
       "       'CD20_Cytoplasm_Intensity_Average',\n",
       "       'CD20_Nucleus_Intensity_Average', 'CD31_Cell_Intensity_Average',\n",
       "       'CD31_Cytoplasm_Intensity_Average',\n",
       "       'CD31_Nucleus_Intensity_Average', 'CD44_Cell_Intensity_Average',\n",
       "       'CD44_Cytoplasm_Intensity_Average',\n",
       "       'CD44_Nucleus_Intensity_Average', 'CD45_Cell_Intensity_Average',\n",
       "       'CD45_Cytoplasm_Intensity_Average',\n",
       "       'CD45_Nucleus_Intensity_Average', 'CD68_Cell_Intensity_Average',\n",
       "       'CD68_Cytoplasm_Intensity_Average',\n",
       "       'CD68_Nucleus_Intensity_Average', 'CD163_Cell_Intensity_Average',\n",
       "       'CD163_Cytoplasm_Intensity_Average',\n",
       "       'CD163_Nucleus_Intensity_Average', 'CKs_Cell_Intensity_Average',\n",
       "       'CKs_Cytoplasm_Intensity_Average', 'CKs_Nucleus_Intensity_Average',\n",
       "       'ColVI_Cell_Intensity_Average',\n",
       "       'ColVI_Cytoplasm_Intensity_Average',\n",
       "       'ColVI_Nucleus_Intensity_Average', 'Desmin_Cell_Intensity_Average',\n",
       "       'Desmin_Cytoplasm_Intensity_Average',\n",
       "       'Desmin_Nucleus_Intensity_Average', 'Ecad_Cell_Intensity_Average',\n",
       "       'Ecad_Cytoplasm_Intensity_Average',\n",
       "       'Ecad_Nucleus_Intensity_Average',\n",
       "       'Fibronectin_Cell_Intensity_Average',\n",
       "       'Fibronectin_Cytoplasm_Intensity_Average',\n",
       "       'Fibronectin_Nucleus_Intensity_Average',\n",
       "       'FOXP3_Cell_Intensity_Average',\n",
       "       'FOXP3_Cytoplasm_Intensity_Average',\n",
       "       'FOXP3_Nucleus_Intensity_Average', 'GATA3_Cell_Intensity_Average',\n",
       "       'GATA3_Cytoplasm_Intensity_Average',\n",
       "       'GATA3_Nucleus_Intensity_Average', 'HLA_Cell_Intensity_Average',\n",
       "       'HLA_Cytoplasm_Intensity_Average', 'HLA_Nucleus_Intensity_Average',\n",
       "       'Ki67_Cell_Intensity_Average', 'Ki67_Cytoplasm_Intensity_Average',\n",
       "       'Ki67_Nucleus_Intensity_Average', 'MMP9_Cell_Intensity_Average',\n",
       "       'MMP9_Cytoplasm_Intensity_Average',\n",
       "       'MMP9_Nucleus_Intensity_Average', 'PD1_Cell_Intensity_Average',\n",
       "       'PD1_Cytoplasm_Intensity_Average', 'PD1_Nucleus_Intensity_Average',\n",
       "       'PDGFR_Cell_Intensity_Average',\n",
       "       'PDGFR_Cytoplasm_Intensity_Average',\n",
       "       'PDGFR_Nucleus_Intensity_Average', 'PDL1_Cell_Intensity_Average',\n",
       "       'PDL1_Cytoplasm_Intensity_Average',\n",
       "       'PDL1_Nucleus_Intensity_Average', 'r5c2_Cell_Intensity_Average',\n",
       "       'r5c2_Cytoplasm_Intensity_Average',\n",
       "       'r5c2_Nucleus_Intensity_Average', 'r7c2_Cell_Intensity_Average',\n",
       "       'r7c2_Cytoplasm_Intensity_Average',\n",
       "       'r7c2_Nucleus_Intensity_Average', 'r8c2_Cell_Intensity_Average',\n",
       "       'r8c2_Cytoplasm_Intensity_Average',\n",
       "       'r8c2_Nucleus_Intensity_Average', 'Sting_Cell_Intensity_Average',\n",
       "       'Sting_Cytoplasm_Intensity_Average',\n",
       "       'Sting_Nucleus_Intensity_Average',\n",
       "       'Vimentin_Cell_Intensity_Average',\n",
       "       'Vimentin_Cytoplasm_Intensity_Average',\n",
       "       'Vimentin_Nucleus_Intensity_Average', 'cell_type', 'cell_subtype'],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "23fc3f34-ae00-4e5b-9003-bbdb69442dc6",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(350554, 117)"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "f0434951-c5b4-4bc3-bbb8-329ed86603bb",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(108, 8)"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metadata.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "8fdee906-95a9-4e44-8990-5ee8ab25458a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Divide each marker (and its localisation) by the right exposure setting for each group of samples\n",
    "df.loc[:, ~df.columns.isin(not_intensities)] = \\\n",
    "    df.loc[:, ~df.columns.isin(not_intensities)].apply(lambda column: divide_exp_time(column, 'Exp', metadata), axis = 0)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "23ac4db9-d5e4-47ca-a8d8-3250ee3a9a03",
   "metadata": {},
   "source": [
    "##  II.6. *CELL SUBTYPES COLORS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "8f77d2ca-4282-4ffd-987d-9ab547b59b96",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unique cell types are: ['none']\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7YAAABlCAYAAAB5uH+EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAEpElEQVR4nO3cv25bdRzG4V+cgMkf21I2LHvKDXAl3APqwGVETKAihqpCYmCGEbEACxsIqRIbA1OGWLLEarcOTYkPQxUmlJxzQvTjbZ5nPoq+ejPEH9nOTtM0TQEAAIBQg9oHAAAAwF0IWwAAAKIJWwAAAKIJWwAAAKIJWwAAAKIJWwAAAKIJWwAAAKLttXlou92W5XJZRqNR2dnZue+bAAAAeOCapinr9bpMp9MyGNz8nmyrsF0ul2U+n/8nxwEAAEBbi8WizGazG59pFbaj0aiUUsrT75+V/cOju1/2QHz0zW+1T4j06Qe/1z4hzic/f137hEg/Xn5Y+4Q4f3z8uPYJkd798v3aJ8QZ/PRF7RMi/fres9onxPnqyS+1T4j02Q9N7RPifPfo89onxNm8uiqPvj37p0dv0ipsrz9+vH94VA6Obv+hvDYYHtQ+IdLB6J3aJ8TZ3d+tfUKk8e5+7RPibG75GBD/bnw4rH1CnIHJejk68AZEV2/v+VvQx3gsbLs6eMvrtb7afB3WKxQAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACiCVsAAACi7bV5qGmaUkopFy+e3+sxb5rty03tEyJt1n/WPiHO1cVV7RMirS4vap8QZ73d1j4h0urFy9onxBmYrJfnG6/Vurr8y9+CPlarpvYJcTavvF7r6nqz6x69yU7T4qmzs7NycnJy98sAAACgg8ViUWaz2Y3PtHrH9vj4uJRSyvn5eZlMJne/7IFYrVZlPp+XxWJRxuNx7XMi2Kwfu3Vns37s1p3N+rFbdzbrx27d2awfu3XXNE1Zr9dlOp3e+myrsB0MXn8VdzKZ+CX0MB6P7daRzfqxW3c268du3dmsH7t1Z7N+7NadzfqxWzdt31j1z6MAAACIJmwBAACI1ipsh8NhOT09LcPh8L7veaPYrTub9WO37mzWj926s1k/duvOZv3YrTub9WO3+9XqvyIDAADA/5WPIgMAABBN2AIAABBN2AIAABBN2AIAABBN2AIAABBN2AIAABBN2AIAABBN2AIAABDtbxF10D5wSqeJAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 1200x100 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Establish colors to use throughout workflow\n",
    "\n",
    "# we want colors that are categorical, since Cell Type is a non-ordered category. \n",
    "# A categorical color palette will have dissimilar colors.\n",
    "# Get those unique colors\n",
    "cell_subtypes = ['DC','B', 'TCD4','TCD8','M1','M2','Treg', \\\n",
    "                 'IMMUNE_OTHER', 'CANCER', 'αSMA_myCAF',\\\n",
    "                 'STROMA_OTHER', 'ENDOTHELIAL']\n",
    "color_values = sb.color_palette(\"Paired\",n_colors = len(cell_subtypes))\n",
    "# each color value is a tuple of three values: (R, G, B)\n",
    "\n",
    "print(\"Unique cell types are:\",df.cell_subtype.unique())\n",
    "# Display those unique colors\n",
    "sb.palplot(sb.color_palette(color_values))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "bf75f91b-2bc2-4d4f-bc94-861c6ccf27da",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),\n",
       " 'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),\n",
       " 'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),\n",
       " 'TCD8': (0.2, 0.6274509803921569, 0.17254901960784313),\n",
       " 'M1': (0.984313725490196, 0.6039215686274509, 0.6),\n",
       " 'M2': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),\n",
       " 'Treg': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),\n",
       " 'IMMUNE_OTHER': (1.0, 0.4980392156862745, 0.0),\n",
       " 'CANCER': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),\n",
       " 'αSMA_myCAF': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),\n",
       " 'STROMA_OTHER': (1.0, 1.0, 0.6),\n",
       " 'ENDOTHELIAL': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392)}"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Store in a dctionnary\n",
    "cellsubtype_color_dict = dict(zip(cell_subtypes, color_values))\n",
    "cellsubtype_color_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "b24374ef-ea1c-4910-a343-364fa7567497",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/cellsubtype_color_data.csv was created!\n"
     ]
    }
   ],
   "source": [
    "# Save color information (mapping and legend) to metadata directory\n",
    "# Create dataframe\n",
    "cellsubtype_color_df = color_dict_to_df(cellsubtype_color_dict, \"cell_subtype\")\n",
    "cellsubtype_color_df.head()\n",
    "\n",
    "# Save to file in metadatadirectory\n",
    "filename = \"cellsubtype_color_data.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "cellsubtype_color_df.to_csv(filename, index = False)\n",
    "print(\"File\" + filename + \" was created!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "90df1f96-7934-42d9-948f-69fdbbc284df",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAL8AAAEvCAYAAAAU+xDxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvH0lEQVR4nO3de1yM6f8/8NdUmnRQSURS0UEpOdbGB0Ukh8VKRLaQXeQQi5JDsR3Qxzcf381pdbKUU4UlOZZziHIqsj7FYlrHEEmZ6/eHX/e3MVPNNFNTc1/Px2Mej537vu77vu7ZV7f7nrnf98UhhBBQFAspybsDFCUvNPwUa9HwU6xFw0+xFg0/xVo0/BRr0fBTrEXDT7EWDT/FWjT8FGvR8FOsRcNPsRYNP8VaNPwUa6lIusCXL19QUVHREH2hKLGoqqpCSUn647bY4SeEoLi4GCUlJVJvlKKkoaSkBFNTU6iqqkq1Ho64xSw8Hg8lJSVo27Yt1NXVweFwpNowRdUHn8/Hs2fP0KJFC3Tq1EmqHIp15P/y5QsTfD09vXpvjKJkQV9fH8+ePUNlZSVatGhR7/WIdeJUdY6vrq5e7w1RlKxUne58+fJFqvVIdNVAT3WopkBWOaRfdVKsRcP/jZCQEPTo0YN57+Pjg7Fjxzba9qjGo1DhLy4uxrx589C5c2dwuVwYGRlh9OjROH36tLy7JpWG/gNkK4l/5GqqioqK0L9/f+jo6CAyMhK2traoqKjA8ePH4efnh3v37sm7i1QTozBH/jlz5oDD4eDq1asYP348LCws0K1bNyxatAhZWVlMu5KSEvj6+kJfXx+tWrXC4MGDcfPmzXpv99GjRxg9ejR0dXWhoaGBbt26IS0tDQAQHx8PHR0dgfYHDx4UecG2bds2GBkZQV1dHR4eHnj79i2Ar6dFCQkJOHToEDgcDjgcDjIzMzF48GDMnTtXYB0vXryAqqoq8y+diYkJfv31V3h6ekJDQwOGhoaIjo4WWEbWn0dzohDhf/36NdLT0+Hn5wcNDQ2h+dUDOGHCBDx//hzHjh3D9evX0atXLwwZMgSvX7+u17b9/PxQXl6Oc+fO4fbt21i3bh00NTUlWsdff/2Fffv24c8//0R6ejpycnIwZ84cAMDixYvh4eGB4cOHg8fjgcfjoV+/fvD19UViYiLKy8uZ9ezatQuGhoYYPHgwMy0yMhJ2dnbIyclBYGAgFixYgJMnTzbY59GsEDGUlZWRvLw8UlZWJk7zRnflyhUCgKSkpNTa7vz586RVq1bk06dPAtO7dOlCtm3bRgghJDg4mNjZ2THzvL29yZgxY2pcp62tLQkJCRE5Ly4ujmhrawtMS01NJdU/9uDgYKKsrEyePHnCTDt27BhRUlIiPB6vxj6UlZURXV1dsnfvXmZa9+7dBfpibGxMhg8fLrDcxIkTiZubGyFEvM+jKZJVHhXiyE/EfNzozZs3UVpaCj09PWhqajKvwsJCPHz4sF7bnj9/PkJDQ9G/f38EBwfj1q1bEq+jU6dOMDQ0ZN47OjqCz+fj/v37NS6jpqaGqVOnIjY2FgBw48YN3LlzBz4+PgLtHB0dhd7n5+cDaJjPozlRiAtec3NzcDicOi9qS0tL0b59e2RmZgrN+/bcXFy+vr5wdXXF0aNHceLECURERGDDhg2YN28elJSUhP4wZXlHrK+vL3r06IEnT54gLi4OgwcPhrGxsdjLN8Tn0ZwoxJG/devWcHV1RXR0ND58+CA0v+pO1F69eqG4uBgqKiowMzMTeLVp06be2zcyMsKsWbOQkpKCX375Bb///juAr/egvH//XqBPubm5Qss/fvwYz549Y95nZWVBSUkJlpaWAL7+nC/qp3xbW1v06dMHv//+OxITEzF9+nShNtUv9qveW1lZAWi4z6O5UIjwA0B0dDS+fPkCe3t7JCcn48GDB8jPz8emTZuYf/pdXFzg6OiIsWPH4sSJEygqKsKlS5ewfPlyZGdn12u7/v7+OH78OAoLC3Hjxg1kZGQw4XJwcIC6ujqCgoLw8OFDJCYmIj4+Xmgdampq8Pb2xs2bN3H+/HnMnz8fHh4eMDAwAPD1W5tbt27h/v37ePnypcC/Hr6+vli7di0IIRg3bpzQui9evIj169ejoKAA0dHR2L9/PxYsWNBgn0ez0pgXGA3t2bNnxM/PjxgbGxNVVVViaGhIvv/+e5KRkcG0effuHZk3bx7p0KEDadGiBTEyMiJTpkwhjx8/JoRIfsE7d+5c0qVLF8Llcom+vj6ZOnUqefnyJTM/NTWVmJmZkZYtW5JRo0aR7du3C13w2tnZkc2bN5MOHToQNTU14u7uTl6/fs20ef78ORk6dCjR1NQkAAT25/3790RdXZ3MmTNHqG/GxsZk9erVZMKECURdXZ0YGBiQ//znPwJt6vo8miJZ5VGs+/k/ffqEwsJCmJqaQk1NreH/IimxFRUVoUuXLrh27Rp69eolMM/ExAT+/v7w9/eXT+caiKzyqBAXvGxUUVGBV69eYcWKFfjuu++Egk/VTWHO+dnm4sWLaN++Pa5du4atW7fKuzvNEj3yN1NOTk51/r5RVFTUOJ1ppuiRn2ItGn6KtWj4Kdai4adYi4afYi0afoq1aPgp1pL6e/6knCey6IdYPHt2lHgZHx8fJCQkAABUVFTQunVrdO/eHZ6envDx8RF44GlOTg7Cw8Nx7tw5vH37FkZGRnBycsKSJUtgYWEhs/2gmgZWHPmrSgCLiopw7NgxODs7Y8GCBRg1ahQqKysBAEeOHMF3332H8vJy7N69G/n5+di1axe0tbWxcuVKOe8B1RBY8Qsvl8tlbg82NDREr1698N1332HIkCGIj4/H5MmTMW3aNIwYMQKpqanMcqampnBwcKBPplZQrAi/KIMHD4adnR1SUlKgp6eHly9fYunSpSLbsqGqiY1YcdpTk65du6KoqAgPHjxg3lPswerwE0LA4XDELoCnFAurw5+fnw9TU1Pmmxz6VDd2YW34z5w5g9u3b2P8+PEYNmwY2rRpg/Xr14tsSy94FRMrLnjLy8tRXFyML1++4J9//kF6ejoiIiIwatQo/Pjjj1BWVsaOHTswYcIEfP/995g/fz7MzMzw8uVL7Nu3D48fP8aePXvkvRuUjLEi/Onp6Wjfvj1UVFSgq6sLOzs7bNq0Cd7e3syPXGPGjMGlS5cQERGByZMn4927dzAyMsLgwYMRGhoq5z2gGgItYKeaHVnlkbXn/BRFw0+xFg0/xVo0/BRr0fBTrEXDT7EWDT/FWjT8FGvR8FOsRcNPsZbU9/aYLDsqi36IpShipMTLVC9gB74OYdS3b1+sX78e3bt3l2X3qGaGFUf+6mPYnj59GioqKhg1apS8u0XJGSvCX1XAbmBggB49eiAwMBB///03Xrx4Ie+uUXLEivBXV1pail27dsHMzAx6enry7g4lR6y4n//IkSPQ1NQEAHz48AHt27fHkSNHBB5YRbEPK/7vOzs7Izc3F7m5ubh69SpcXV3h5uaGR48eybtrlByxIvwaGhrM4Mp9+/bFjh078OHDB2awaIqdWBH+b3E4HCgpKaGsrEzeXaHkiBXn/FUF7ADw5s0b/PbbbygtLcXo0aPl3DNKnlgR/qoCdgDQ0tJC165dsX//fjg5Ocm3Y5Rc0QJ2qtmhBewUJSUafoq1aPgp1qLhp1iLhp9iLRp+irVo+CnWouGnWIuGn2ItGn6KtaS+t+fPokhZ9EMso02WiN2Ww+HUOj84OBghISF1jrpeVFQEU1NTZjlNTU106tQJTk5O8Pf3h7m5ucj1X7x4EYMGDYKNjQ1yc3PF7jfVeBT2yF9VsM7j8bBx40a0atVKYNrixYslGnX91KlT4PF4uHnzJsLDw5Gfnw87OzucPn1aaNslJSX48ccfMWTIkMbaXaoeFPauzqoR1wFAW1sbHA5HYNrHjx8lGnVdT0+PWb5z584YPXo0hgwZghkzZuDhw4dQVlZm2s6aNQuTJ0+GsrIyDh482DA7SElNYY/8dTl+/LhUo64rKSlhwYIFePToEa5fv85Mj4uLw3//+18EBwfLsrtUA2Bt+GUx6nrVskVFRcw6AwMDsWvXLqioKOw/qgqDteGXxajrVevgcDj48uULJk+ejNWrVzODWlNNG2sPT9VHXXd0dKzXOvLz8wF8vU54//49srOzkZOTg7lz5wIA+Hw+CCFQUVHBiRMnMHjwYNl0npIJ1oa/+qjr1S94q5SUlNR63s/n87Fp0yaYmpqiZ8+e4HA4uH37tkCbzZs348yZMzhw4IDA16VU08Da8GtoaEg06vqrV69QXFyMjx8/4s6dO9i4cSOuXr2Ko0ePMt/02NjYCGyjbdu2UFNTE5pONQ2sDT8g2ajrLi4uAAB1dXUYGxvD2dkZ27dvh5mZmTy6TskALWCnmh1awE5RUqLhp1iLhp9iLRp+irVo+CnWouGnWIuGn2ItGn6KtWj4Kdai4adYS+p7e/ok2MqiH2LJ9r5dd6P/T14F7Lt378b69evx4MEDaGtrw83NDZGRkXTY0yZIYY/88ihgv3jxIn788UfMmDEDd+/exf79+3H16lXMnDmzsXefEoPC3tUpjwL2y5cvw8TEBPPnz2fW9fPPP2PdunUNuKdUfSnskb8uDVHA7ujoiL///htpaWkghOCff/7BgQMHMGLECFl3n5IB1oa/IQrY+/fvj927d2PixIlQVVWFgYEBtLW1ER0dLXV/KdljbfhlXcAOAHl5eViwYAFWrVqF69evIz09HUVFRZg1a5bU26JkT2HP+esi6wJ2AIiIiED//v2xZMnXxyp2794dGhoaGDBgAEJDQ5nhUKmmgbVH/uoF7KJ8e8H7rW8L2IGvF9FKSoIfaVV9ryz+paFki7VH/oYoYB89ejRmzpyJLVu2wNXVFTweD/7+/rC3t0eHDh3ktatUTYgYysrKSF5eHikrKxOneZMTFxdHtLW1Rc67du0a+eGHH4i+vj7hcrnEzMyM/PTTT+TBgweEEEIKCwsJAOalrq5OrKysyJw5c5g21W3atIlYW1uTli1bkvbt25MpU6aQJ0+eNOTusY6s8kgL2KlmhxawU5SUaPgp1qLhp1iLhp9iLRp+irVo+CnWouGnWIuGn2ItGn6KtWj4KdaS+sa2yt+3y6IfYlGZ+ZPEy/j4+CAhIQE///wztm7dKjDPz88Pmzdvhre3N+Lj43Hu3DlERkbi+vXr4PF4SE1NxdixY2XUe6qpYcWR38jICHv27EFZWRkz7dOnT0hMTESnTp2YaR8+fICdnR2tvGIJVtzS3KtXLzx8+BApKSmYMmUKACAlJQWdOnUSeCyJm5sb3Nzc5NVNqpGx4sgPANOnT0dcXBzzPjY2FtOmTZNjjyh5Y034vby8cOHCBTx69AiPHj3CxYsX4eXlJe9uUXLEitMeANDX18fIkSMRHx8PQghGjhyJNm3ayLtblByxJvzA11OfqtHR6UUtxarwDx8+HJ8/fwaHw4Grq6u8u0PJGavCr6yszDxupKrovLrS0lL89ddfzPvCwkLk5uaidevWAl+JUopB6vDX54cneWrVqlWN87Kzs+Hs7My8X7RoEQAwP4JRioUWsFPNDi1gpygp0fBTrEXDT7EWDT/FWjT8FGvR8FOsRcNPsRYNP8VaNPwUa9HwU6wl9b09TwyNZNEPsXR8+rfEy0hSwB4REYGUlBTcu3cPLVu2RL9+/bBu3TpYWlrKaheoJoQVR35xC9jPnj0LPz8/ZGVl4eTJk6ioqMCwYcPw4cMHeXSbamCsuKVZ3AL29PR0geXi4+PRtm1bXL9+HQMHDmzUPlMNjxVHfqB+Bexv374FALRu3bpB+0bJB2vCL2kBO5/Ph7+/P/r37w8bG5tG7CnVWFhx2gNIXsDu5+eHO3fu4MKFC43YS6oxsSb8gPgF7HPnzsWRI0dw7tw5dOzYsbG6RzUyVoW/rgJ2QgjmzZuH1NRUZGZmClwMU4qHVeGvq4Ddz88PiYmJOHToELS0tFBcXAwA0NbWRsuWLRu1r1TDkzr89fnhSZ5qK2DfsmULAMDJyUlgelxcHHx8fBqwV5Q80AJ2qtmhBewUJSUafoq1aPgp1qLhp1iLhp9iLRp+irVo+CnWouGnWIuGn2ItGn6KtaS+t4ef6S+DbohHyWmj2G05HE6t84ODgxESEiJdh6hmTWHv6uTxeMx/7927F6tWrcL9+/eZaZqamsx/E0Lw5csXqKgo7MdBiaCwpz0GBgbMS1tbGxwOh3l/7949aGlp4dixY+jduze4XC4uXLgAPp+PiIgImJqaomXLlrCzs8OBAwcE1nv48GGYm5tDTU0Nzs7OSEhIAIfDQUlJiXx2lKo3Vh/qAgMD8e9//xudO3eGrq4uIiIisGvXLmzduhXm5uY4d+4cvLy8oK+vj0GDBqGwsBDu7u5YsGABfH19kZOTg8WLF8t7N6h6YnX416xZg6FDhwIAysvLER4ejlOnTsHR0REA0LlzZ1y4cAHbtm3DoEGDsG3bNlhaWiIyMhIAYGlpiTt37iAsLExu+0DVH6vD36dPH+a///rrL3z8+JH5Y6jy+fNn9OzZEwBw//599O3bV2C+vb19w3eUahCsDr+Ghgbz36WlpQCAo0ePwtDQUKAdl8tt1H5RjYPV4a/O2toaXC4Xjx8/xqBBg0S2sbS0RFpamsC0a9euNUb3qAZAw///aWlpYfHixVi4cCH4fD7+9a9/4e3bt7h48SJatWoFb29v/Pzzz/if//kfBAQEYMaMGcjNzWUGp67rdwWqCSJiKCsrI3l5eaSsrEyc5k1OXFwc0dbWZt5nZGQQAOTNmzcC7fh8Ptm4cSOxtLQkLVq0IPr6+sTV1ZWcPXuWaXPo0CFiZmZGuFwucXJyIlu2bCEAmu1n0xzJKo+0gF1KYWFh2Lp1K/7+u3k9xaI5k1Ue6WmPhDZv3oy+fftCT08PFy9eRGRkJPMUOKp5oeGX0IMHDxAaGorXr1+jU6dO+OWXX7Bs2TJ5d4uqB3raQzU79Lk9FCUlGn6KtWj4Kdai4adYi4afYi0afoq1aPgp1pI+/CGcxnvVg4+PD8aOHcv8N4fDwaxZs4Ta+fn5gcPhCAxCIWl7Jycn+Pv7C7WNj4+Hjo7O/31kISEi15ubmwsOh4OioiIAQFFRETgcjshXVlaWWPtfVlaG4OBgWFhYgMvlok2bNpgwYQLu3r3LtDExMalxO9X3kcPh4ODBg0LbqP4ZV70XtZ7hw4eL3Ka6ujpsbW2xY8cOsfZJVlh35Bd3NPb6theXmpoaYmJi8ODBgzrbnjp1CjweT+DVu3fvOpcrLy+Hi4sLYmNjERoaioKCAqSlpaGyshIODg7MH9C1a9eY9SYnJwP4WrhTNe0///mPxPs3fPhwoT4nJSUJtFmzZg14PB7u3LkDLy8vzJw5E8eOHZN4W/XFuvD36tULRkZGSElJYaZVjcZeVbElTXtxWVpawtnZGcuXL6+zrZ6enkBBvoGBAVq0aFHnchs3bsTly5dx5MgReHh4wNjYGPb29khOToaVlRVmzJgBQgj09fWZ9VYNuN22bVuBBwBIisvlCvVZV1dXoI2WlhYMDAzQuXNnBAQEoHXr1jh58qTE26ov1oUfkHw09vqM3i6OtWvXIjk5GdnZ2VKvS5TExEQMHToUdnZ2AtOVlJSwcOFC5OXl4ebNmw2ybUnw+XwkJyfjzZs3UFVVbbTtsjL8ko7GLml7cfXq1QseHh4ICAiotV2/fv2gqakp8BJHQUEBrKysRM6rml5QUCBRnz09PYX6snv3bqF2R44cEWoXHh4u0CYgIACamprgcrlwd3eHrq4ufH19JeqPNFh5V6eko7FL2l4SoaGhsLKywokTJ9C2bVuRbfbu3VtjiOsixn2LEomKioKLi4vAtICAAHz58kVgmrOzMzO6ZZWqU6oqS5YsgY+PD3g8HpYsWYI5c+bAzMxMpv2tDSvDD4g/Grsk7Vu1aoW3b98KTS8pKanxvLlLly6YOXMmAgMDERMTI7KNkZFRvUJhYWHBjDv8rarpFhYWEq3TwMBAqC9aWlpCD+3S0NCos89t2rSBmZkZzMzMsH//ftja2qJPnz6wtraWqE/1xcrTHuD/RmOvqKgQORp7fdpbWlrixo0bQtNv3LhRa8hWrVqFgoIC7NmzR/wdEMOkSZNw6tQpofN6Pp+PqKgoWFtbC10PyIuRkREmTpzYqLURrD3y1zUae33az549G7/99hvmz58PX19fcLlcHD16FElJSfjzzz9rXHe7du2waNEi5mFY33r16hUzGnwVHR2dOu9lX7hwIQ4dOoTRo0djw4YNcHBwwD///IPw8HDk5+fj1KlTDVZ4X15eLtRnFRWVWk8XFyxYABsbG2RnZws8U6mhSB/+ENmeUzam2kZjr0/7zp0749y5c1i+fDlcXFzw+fNndO3aFfv37xf4gUeUxYsXY8uWLfj06ZPQvG/PsQEgKSkJkyZNqnWdampqOHPmDMLDwxEUFIRHjx5BS0sLzs7OyMrKgo2NTa3LSyM9PR3t27cXmGZpaYl79+7VuIy1tTWGDRuGVatWCT0ipiHQSi6q2aGVXBQlJRr+Zqxbt25C36XX9t07JYi1F7yKIC0tDRUVFSLntWvXrpF70/zQ8DdjxsbG8u5Cs0ZPeyjWouGnWIuGn2ItGn6KtWj4Kdai4adYS+qvOq/9KfqW2YbQd3T97mkvLi5GWFgYjh49iqdPn6Jt27bo0aMH/P39MWTIEKZdREQEVqxYgbVr12LJkiUC64iPj8e0adPg6uqK9PR0ZnpJSQl0dXWRkZEBJycnZnpGRgYiIyNx5coVlJWVwcTEBG5ubli0aBEMDQ2RmZkJZ2dnkf3l8XgwMDBASEgIVq9eDeBr9VWHDh3g5uaGtWvXCt0bT0lO4Y/8RUVF6N27N86cOYPIyEjcvn0b6enpcHZ2hp+fn0Db2NhYLF26FLGxsSLXpaKiglOnTiEjI6PWbW7btg0uLi4wMDBAcnIy8vLysHXrVrx9+xYbNmwQaFu9ULzqVb2opVu3buDxeHj8+DHi4uKQnp6O2bNn1/PToKpT+B+55syZAw6Hg6tXrwqMvtitWzdMnz6deX/27FmUlZVhzZo12LlzJy5duoR+/foJrEtDQwMeHh4IDAzElStXRG7vyZMnmD9/PubPn4+oqChmuomJCQYOHChU9NG2bVuBx5p8S0VFBQYGBgAAQ0NDTJgwQaCemKo/hT7yv379Gunp6fDz8xMIfpXqoYuJiYGnpydatGgBT0/PGquqQkJCcPv2bRw4cEDk/P379+Pz589YunSpyPm1Bb0uRUVFOH78eKMWeSsyhQ7/X3/9BUIIunbtWmu7d+/e4cCBA0xRupeXF/bt28eMzVtdhw4dsGDBAixfvhyVlZVC8x88eIBWrVoJ3ctek44dOwrckNatWzeB+bdv34ampiZatmwJU1NT3L17t86Cd0o8Ch1+cYu3k5KS0KVLF6akr0ePHjA2NsbevXtFtg8ICMCLFy9EXhsQQiSqjjp//jxyc3OZ17dFHJaWlsjNzcW1a9cQEBAAV1dXzJs3T+z1UzVT6PCbm5uDw+HUWj0EfD3luXv3LlRUVJhXXl5ejRe+Ojo6WLZsGVavXo2PHz8KzLOwsMDbt2/B4/HE6qOpqSlTxG1mZiZ0s5qqqirMzMxgY2ODtWvXQllZmfkGiJKOQoe/devWcHV1RXR0ND58+CA0v6SkBLdv30Z2djYyMzMFjsCZmZm4fPlyjX848+bNg5KSktCj/Nzd3aGqqor169eLXO7bC15JrVixAv/+97/x7NkzqdZDseDbnujoaPTv3x/29vZYs2YNunfvjsrKSpw8eRJbtmyBq6sr7O3tMXDgQKFl+/bti5iYGJGF5Wpqali9erXQ16VGRkaIiorC3Llz8e7dO/z4448wMTHBkydPsHPnTmhqagp83fn8+XOhul09Pb0aH0fo6OiI7t27Izw8HL/99lt9PhKqSmOOeC0vz549I35+fsTY2JioqqoSQ0ND8v3335Pjx48TPT09sn79epHLrVu3jrRt25Z8/vxZaBR3QgiprKwk1tbWBADJyMgQmHfy5Eni6upKdHV1iZqaGunatStZvHgxefbsGSHk/0aBF/W6fPkyIYSQ4OBgYmdnJ9SvpKQkwuVyyePHj6X+bJojOgI7xVq0gJ2ipETDT7EWDT/FWjT8FGvR8FOsRcNPsRYNP8VaNPwUa9HwU6xFw0+xltQ3tvkN/F0W/RBL9LmZMl/nx48f8euvv2Lfvn14+vQptLS0YG1tjUWLFmHMmDEAvo6sfvbsWURERCAwMFBg+ZEjRyItLQ3BwcEICQkRmJeUlAQvLy/MmjVLrHG/GsO7d++wbt06JCcno6ioCDo6OrCxscGcOXMwbtw4gVqE2vpfUwH+8uXLERoa2uD7IQusP/LPmjULKSkp+N///V/cu3cP6enpcHd3x6tXrwTaGRkZIT4+XmDa06dPcfr06RqrtmJiYrB06VIkJSWJHHGlsZWUlKBfv37YuXMnli1bhhs3buDcuXOYOHEili5dKjSYnjj9/7YA/9uDQ1Om8OEvLS3FtGnToKWlhXbt2iEyMhJPnz6Furo6SktLcfjwYQQFBWHEiBEwMTFB7969MW/ePIHidgAYNWoUXr58iYsXLzLTEhISMGzYMJFDiBYWFuLSpUsIDAyEhYWFwAjudYmPj4eOjg6OHDkCS0tLqKurw93dHR8/fkRCQgJMTEygq6uL+fPnM0OArlmzRuQwQz169MDKlSsBAEFBQSgqKsKVK1fg7e0Na2trWFhYYObMmcjNzRUY31fc/lcfqd3AwEDsMYKbAoUPv4+PDy5duoTMzEzExcVh5cqVCAoKgouLCzQ1NWFgYIC0tDS8f/++1vWoqqpiypQpAk9OiI+PF/ojqRIXF4eRI0dCW1sbXl5eNRbE1+Tjx4/YtGkT9uzZg/T0dGRmZmLcuHFIS0tDWloa/vjjD2zbto0ppJ8+fTry8/Nx7do1Zh05OTm4desWpk2bBj6fjz179mDKlCno0KGD0PY0NTWhovJ/Z8HS9r85UOjwv3z5EikpKQgJCUHv3r0xYsQIjBs3Djt37sTYsWMBANu3b8elS5egp6eHvn37YuHChQJH9+qmT5+Offv24cOHDzh37hzevn2LUaNGCbXj8/mIj49nCuInTZqECxcuoLCwUOy+V1RUYMuWLejZsycGDhwId3d3XLhwATExMbC2tsaoUaPg7OzMPEOoY8eOcHV1FfjjjIuLw6BBg9C5c2e8fPkSb968qbOYX9L+f1uA/+3pYlOm0OGvenqDo6MjM83e3h7Kysr4/vvvAQADBw7Ef//7X5w+fRru7u64e/cuBgwYgF9//VVofXZ2djA3N8eBAwcQGxuLqVOnChwtq5w8eRIfPnzAiBEjAHwdbHno0KE11gSLoq6uji5dujDv27VrBxMTE4HTinbt2uH58+fM+5kzZzLn558/f0ZiYiLzL5MYZRv16v+3Bfi6urpib0feFLqMkcvlAoDAc2709fVhYWEhMB5sixYtMGDAAAwYMAABAQEIDQ3FmjVrEBAQIPSMnOnTpyM6Ohp5eXm4evWqyO3GxMTg9evXaNmyJTONz+fj1q1bWL16NZSU6j7mfFvGyOFwRE7j8/nM+9GjR4PL5SI1NRWqqqqoqKiAu7s7s986Ojp1FvNL2n9TU1OpnkUkTwp95Dc1NYWSkhIePHjATDt8+DAeP35c65HQ2toalZWVIr/hmDx5Mm7fvg0bGxtYW1sLzX/16hUOHTqEPXv2CBwRc3Jy8ObNG5w4cUI2OyeCiooKvL29ERcXh7i4OEyaNIkJsJKSEiZNmoTdu3eLLH4vLS1FZWWlXPvf2BT6yK+jo4MffvgBYWFhsLe3R0FBAdLT09GyZUucOXMGQ4YMgZOTEzw9PdGnTx/o6ekhLy8PQUFBcHZ2FjnotK6uLng8Xo0F5n/88Qf09PTg4eEh9PyeESNGICYmps4BqaXh6+sLK6uvD/T99tolLCwMmZmZcHBwQFhYGPr06YMWLVrg/PnziIiIwLVr1+Te/8Ykdfgb4ocnWYqOjoavry8MDQ2hrKyMjRs3gsvlYsqUKQgLC4OrqysSEhIQFBSEjx8/okOHDhg1ahRWrVpV4zpr+2c+NjZW6MeiKuPHj8fUqVPx8uVLgdMuWTI3N0e/fv3w+vVrODg4CMxr3bo1srKysHbtWoSGhuLRo0fQ1dWFra0tIiMjoa2tLXb/FQEtYFcwhBCYm5tjzpw5WLRokby70yBklUeFPu1hmxcvXmDPnj0oLi7GtGnT5N2dJk+hL3ibKjc3txpHTg8PD6/3etu2bYs1a9Zg+/btzeorR3mhR3452LFjB8rKykTOk2bEFUm+y6do+OXC0NBQ3l2gQE97KBaj4adYi4afYi0afoq1aPgp1qLhp1hLBuHf3ogvyb148QKzZ89Gp06dwOVyYWBgAFdXV4SFhYHD4dT6yszMRHx8PPNeSUkJ7du3x8SJE/H48WOhbd29exceHh7Q19cHl8uFhYUFVq1aJTRul4mJCTgcDvbs2SO0jm7duoHD4QjVCwNfR4hXVlYWOVJMXcrKyhAcHAwLCwtwuVy0adMGEyZMwN27d4X6VdPLx8cHwNdbqQ8ePCi0DR8fH6ZIqOq9qPVUvzGu+jbV1dVha2uLHTt2SLx/9aHwR/7x48cjJycHCQkJKCgowOHDh+Hk5ARbW1uBwmsPDw8MHz5cYFrVINStWrUCj8fD06dPkZycjPv372PChAkC28nKyoKDgwM+f/6Mo0ePoqCgAGFhYYiPj8fQoUPx+fNngfZGRkZCg0lnZWWhuLhY5JjBQN0jxNekvLwcLi4uiI2NRWhoKAoKCpCWlobKyko4ODggKysLAHDt2jVm35OTkwEIFqh/O/6YOL79THk8HpKSkgTarFmzBjweD3fu3IGXlxdmzpyJY8eOSbwtSSn0j1wlJSU4f/48MjMzMWjQIACAsbEx7O3thdq2bNkS5eXlzGjn1XE4HGZ6+/btMWPGDMyfPx/v3r1Dq1atQAjBjBkzYGVlhZSUFKbYw9jYGBYWFujZsyeioqIExs+dMmUKoqKi8Pfff8PIyAjA13BPmTIFO3fuFOqDOCPE12Tjxo24fPkycnJymOFWjY2NkZycDAcHB8yYMQN37tyBvr4+s0zVL811jRBfl6p/bWujpaXFtAkICMD69etx8uRJuLm51Xu74lDoI3/V/TIHDx5EeXm5TNb5/PlzpKamQllZGcrKygCA3Nxc5OXlYdGiRUJVWnZ2dnBxcRE62rVr1465nRr4WrC+d+/eGgvixR0hXpTExEQMHTqUCX4VJSUlLFy4EHl5ebh586bY62sofD4fycnJePPmTaOMMq/Q4VdRUUF8fDwSEhKgo6OD/v37IygoCLdu3ZJoPW/fvoWmpiY0NDTQrl07ZGRkwM/Pjzk9KSgoAACmiORbVlZWTJvqpk+fjvj4eBBCcODAAXTp0gU9evQQaifJCPGiFBQU1Nq36vsgLk9PT6Gb8nbv3i3U7siRI3XevBcQEABNTU1wuVy4u7tDV1cXvr6+EvWnPhQ6/MDXc/5nz57h8OHDGD58ODIzM9GrVy+RF5Q10dLSQm5uLrKzs7Fhwwb06tULYWFhQu0kvbFs5MiRKC0txblz5xAbG1vjUV/SEeJFkfVNb1FRUQJljrm5ucxDAapzdnYWajdr1iyBNkuWLEFubi7OnDkDBwcHREVFwczMTKb9FUWhz/mrqKmpYejQoRg6dChWrlwJX19fBAcHM99e1EVJSYn5n2FlZYWHDx9i9uzZ+OOPPwB8HXUdAPLz89GzZ0+h5fPz85k21amoqGDq1KkIDg7GlStXkJqaKnL71UeIr8Ln8xEbG4sZM2bU2X8LCwvk5+eLnFc1XVT/amNgYCAUUC0tLaFBtjU0NOoMcps2bZgR6Pfv3w9bW1v06dNHZI20LCn8kV8Ua2trkSOyiyswMBB79+7FjRs3AHw9Enft2hVRUVECT1MAgJs3b+LUqVPw9PQUua7p06fj7NmzGDNmjMh78Os7Qnx1kyZNwqlTp4TO6/l8PqKiomBtbS10PSAvRkZGmDhxIpYtW9bg21LoI/+rV68wYcIETJ8+Hd27d4eWlhays7Oxfv165iG09WFkZIRx48Zh1apVOHLkCDgcDmJiYjB06FCMHz8ey5Ytg4GBAa5cuYJffvkFjo6O8Pf3F7kuKysrvHz5Eurq6iLnx8TE1GuE+OoWLlyIQ4cOYfTo0diwYQMcHBzwzz//IDw8HPn5+Th16pTIml1ZKC8vR3FxscA0FRWVWmuYFyxYABsbG2RnZ6NPnz4N0i9AJuH/SfpVNBBNTU3mHPLhw4eoqKiAkZERZs6ciaCgIKnWvXDhQjg6OuLq1auwt7dHv379kJWVhdWrV8PNzQ3v379Hp06d4O3tjWXLljHPEBJFT09P5PTPnz9j165dAl+RVjd+/Hhs2LAB4eHhNT5NAvh62nfmzBmEh4cjKCgIjx49gpaWFpydnZGVlSXyGZ+ykp6eLvQgX0tLy1r/xbK2tsawYcOwatUqpKWlNVjfaAE71ezQEdgpSko0/AqgW7duNRbEi/runfpKoS942SItLQ0VFRUi57Vr166Re9N80PArAGNjY3l3oVmipz0Ua9HwU6xFw0+xFg0/xVo0/BRr0fBTrCX1V52HJ4sukmgI3yeKvi23Nj4+Pky1VHWurq5IT0+HiYkJHj16hMuXL+O7775j5vv7+zN3TwJASEgIVq9eDQBQVlaGjo4OrK2t8cMPP2D27NlC9+7cvXsXq1evRkZGBt69ewdjY2NMmjQJgYGBUFdXr3EE8+oyMjJQVFQEf39/oVuFga/llampqUzReE03pyUlJWHSpEnMNt+8eSOyNDEkJAQHDx5Ebm6uwPQnT56gc+fOsLCwwJ07d+rsR3PBiiN/XUXUampqNd48Vl23bt3A4/Hw+PFjZGRkYMKECYiIiEC/fv0ExvEVp5i9X79+YhfQSyIuLk5oX6UNZXx8PDw8PPDu3TtcuXJFqnU1Jaz4kauuIuqffvoJW7duRVpaGjP8pigqKirMejp06ABbW1umNnbdunUIDQ2VqJi9ep9qK6CXhI6OjtTrqI4Qgri4OGzevBkdO3ZETEyM0HBHzRUrjvx1MTU1xaxZs7Bs2TKhYpS6dO3aFW5ubkhJSQFQv2L2piwjIwMfP36Ei4sLvLy8sGfPHqkKgZoSVoRfnCLqFStWoLCwsF43gnXt2hVFRUUA6l/MXpuqAvpvX6KIKiwX9YAtccXExGDSpElQVlaGjY0NOnfujP3799d7fU0JK057nJ2dsWXLFoFp346Aoq+vj8WLF2PVqlWYOHGiROsnhAhdbMqyYFxLS4spmazO3NxcaFpUVBRcXFwEpnXo0KFe2y0pKUFKSgouXLjATPPy8kJMTIzY9c9NGSvCL04RNQAsWrQImzdvxubNmyVaf35+PkxNTQHUv5i9NtUL6OsiqrC8vhITE/Hp0yeBc3xCCPh8PgoKCiTej6aGFac94tLU1MTKlSsRFhYm8O1Nbe7du4f09HSMHz8egHTF7E1NTEwMfvnlF4HC+Zs3b2LAgAESPzKxKWLFkV+SIuqffvoJUVFRSExMFPpWo7KyEsXFxeDz+Xj16hUyMzMRGhqKHj16YMmSJQAgVTG7LJSUlAjtq5aWlsDzP2/fvg0tLS3mPYfDEXp6Q25uLm7cuIHdu3eja9euAvM8PT2xZs0ahIaGMo9TKSwsFPp9wNzcvMbnjjYJRAxlZWUkLy+PlJWVidO8SfH29iYAhF6WlpaEEEKMjY1JVFSUwDKJiYkEABk0aBAzLTg4mFlWWVmZtG7dmvzrX/8iUVFR5NOnT0LbvXXrFhk/fjxp3bo1adGiBenSpQtZsWIF+fDhQ439HDNmjND0uLg4oq2tLXIZACQ1NVXgvahXREQEIYSQjIwMkfOVlZWZfbSzsyOEEDJ37lxibW0tcrs8Ho8oKSmRQ4cO1brd8+fPi1xeWrLKIy1gp5odWsBOUVKi4adYi4afYi0afoq1JAq/GNfGFNXgZJVDscJf9RzIbwdWoyh5qBrfrGpknPoS60euquKN58+fAwDU1dUb7Km+FFUbPp+PFy9eQF1dXWC8gvoQe+mqe8Sr/gAoSl6UlJTQqVMnqQ/AYv3IVd2XL19qfDQeRTUGVVVVoVqJ+pA4/BSlKOhXnRRr0fBTrEXDT7EWDT/FWjT8FGvR8FOsRcNPsdb/A22CHYqSLKorAAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 100x100 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Legend of cell type info only\n",
    "g  = plt.figure(figsize = (1,1)).add_subplot(111)\n",
    "g.axis('off')\n",
    "handles = []\n",
    "for item in cellsubtype_color_dict.keys():\n",
    "        h = g.bar(0,0, color = cellsubtype_color_dict[item],\n",
    "                  label = item, linewidth =0)\n",
    "        handles.append(h)\n",
    "first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell subtype'),\n",
    "\n",
    "\n",
    "filename = \"Cellsubtype_legend.png\"\n",
    "filename = os.path.join(metadata_images_dir, filename)\n",
    "plt.savefig(filename, bbox_inches = 'tight')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c0e92c5-e026-4868-a5f3-20cee185d893",
   "metadata": {},
   "source": [
    "## II.7. IMMUNE CHECKPOINT COLORS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "e4b41aab-bdd0-41b0-a39e-0fe3e74fe830",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Nuc_X</th>\n",
       "      <th>Sample_ID</th>\n",
       "      <th>Nucleus_Roundness</th>\n",
       "      <th>Nuc_Y_Inv</th>\n",
       "      <th>ROI_index</th>\n",
       "      <th>Cell_Size</th>\n",
       "      <th>Nucleus_Size</th>\n",
       "      <th>AF488_Cell_Intensity_Average</th>\n",
       "      <th>AF488_Cytoplasm_Intensity_Average</th>\n",
       "      <th>AF488_Nucleus_Intensity_Average</th>\n",
       "      <th>...</th>\n",
       "      <th>r8c2_Nucleus_Intensity_Average</th>\n",
       "      <th>Sting_Cell_Intensity_Average</th>\n",
       "      <th>Sting_Cytoplasm_Intensity_Average</th>\n",
       "      <th>Sting_Nucleus_Intensity_Average</th>\n",
       "      <th>Vimentin_Cell_Intensity_Average</th>\n",
       "      <th>Vimentin_Cytoplasm_Intensity_Average</th>\n",
       "      <th>Vimentin_Nucleus_Intensity_Average</th>\n",
       "      <th>cell_type</th>\n",
       "      <th>cell_subtype</th>\n",
       "      <th>immune_checkpoint</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_0</th>\n",
       "      <td>823.567871</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.835324</td>\n",
       "      <td>15699.382812</td>\n",
       "      <td>0</td>\n",
       "      <td>281</td>\n",
       "      <td>81</td>\n",
       "      <td>6.654496</td>\n",
       "      <td>6.277167</td>\n",
       "      <td>7.586173</td>\n",
       "      <td>...</td>\n",
       "      <td>11.785679</td>\n",
       "      <td>1.358562</td>\n",
       "      <td>1.318835</td>\n",
       "      <td>1.456654</td>\n",
       "      <td>19.081685</td>\n",
       "      <td>16.428900</td>\n",
       "      <td>25.631769</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_1</th>\n",
       "      <td>852.840027</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.523421</td>\n",
       "      <td>15690.533203</td>\n",
       "      <td>0</td>\n",
       "      <td>200</td>\n",
       "      <td>75</td>\n",
       "      <td>8.410667</td>\n",
       "      <td>8.166960</td>\n",
       "      <td>8.816844</td>\n",
       "      <td>...</td>\n",
       "      <td>13.032533</td>\n",
       "      <td>1.856005</td>\n",
       "      <td>1.954376</td>\n",
       "      <td>1.692053</td>\n",
       "      <td>20.050233</td>\n",
       "      <td>20.903307</td>\n",
       "      <td>18.628444</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_2</th>\n",
       "      <td>868.272705</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.686147</td>\n",
       "      <td>15682.994141</td>\n",
       "      <td>0</td>\n",
       "      <td>425</td>\n",
       "      <td>165</td>\n",
       "      <td>9.547820</td>\n",
       "      <td>9.216820</td>\n",
       "      <td>10.069394</td>\n",
       "      <td>...</td>\n",
       "      <td>14.592970</td>\n",
       "      <td>1.969553</td>\n",
       "      <td>2.171304</td>\n",
       "      <td>1.651642</td>\n",
       "      <td>13.034588</td>\n",
       "      <td>13.993846</td>\n",
       "      <td>11.523031</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_4</th>\n",
       "      <td>704.337280</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.757623</td>\n",
       "      <td>15683.059570</td>\n",
       "      <td>0</td>\n",
       "      <td>418</td>\n",
       "      <td>169</td>\n",
       "      <td>11.163413</td>\n",
       "      <td>10.062945</td>\n",
       "      <td>12.784813</td>\n",
       "      <td>...</td>\n",
       "      <td>17.325917</td>\n",
       "      <td>1.398476</td>\n",
       "      <td>1.301932</td>\n",
       "      <td>1.540722</td>\n",
       "      <td>12.410670</td>\n",
       "      <td>11.191111</td>\n",
       "      <td>14.207534</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_5</th>\n",
       "      <td>852.893799</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.714651</td>\n",
       "      <td>15683.017578</td>\n",
       "      <td>0</td>\n",
       "      <td>201</td>\n",
       "      <td>113</td>\n",
       "      <td>7.887114</td>\n",
       "      <td>6.962386</td>\n",
       "      <td>8.607257</td>\n",
       "      <td>...</td>\n",
       "      <td>13.063717</td>\n",
       "      <td>1.328602</td>\n",
       "      <td>1.144682</td>\n",
       "      <td>1.471832</td>\n",
       "      <td>9.718175</td>\n",
       "      <td>8.039015</td>\n",
       "      <td>11.025841</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91241</th>\n",
       "      <td>11890.440430</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.654341</td>\n",
       "      <td>1231.699951</td>\n",
       "      <td>45</td>\n",
       "      <td>185</td>\n",
       "      <td>50</td>\n",
       "      <td>11.222882</td>\n",
       "      <td>11.690247</td>\n",
       "      <td>9.961000</td>\n",
       "      <td>...</td>\n",
       "      <td>13.310400</td>\n",
       "      <td>2.154859</td>\n",
       "      <td>2.332244</td>\n",
       "      <td>1.675920</td>\n",
       "      <td>11.001081</td>\n",
       "      <td>11.442271</td>\n",
       "      <td>9.809867</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91243</th>\n",
       "      <td>11915.123047</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.854257</td>\n",
       "      <td>1228.222168</td>\n",
       "      <td>45</td>\n",
       "      <td>203</td>\n",
       "      <td>81</td>\n",
       "      <td>10.506076</td>\n",
       "      <td>10.907295</td>\n",
       "      <td>9.901769</td>\n",
       "      <td>...</td>\n",
       "      <td>13.592593</td>\n",
       "      <td>2.356256</td>\n",
       "      <td>2.432205</td>\n",
       "      <td>2.241864</td>\n",
       "      <td>10.545419</td>\n",
       "      <td>10.825956</td>\n",
       "      <td>10.122881</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91244</th>\n",
       "      <td>11961.339844</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.651563</td>\n",
       "      <td>1230.939941</td>\n",
       "      <td>45</td>\n",
       "      <td>157</td>\n",
       "      <td>50</td>\n",
       "      <td>11.337304</td>\n",
       "      <td>11.769533</td>\n",
       "      <td>10.412333</td>\n",
       "      <td>...</td>\n",
       "      <td>13.680000</td>\n",
       "      <td>2.176707</td>\n",
       "      <td>2.105561</td>\n",
       "      <td>2.328960</td>\n",
       "      <td>11.364076</td>\n",
       "      <td>11.828598</td>\n",
       "      <td>10.370000</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91245</th>\n",
       "      <td>11969.869141</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.764502</td>\n",
       "      <td>1230.717407</td>\n",
       "      <td>45</td>\n",
       "      <td>115</td>\n",
       "      <td>46</td>\n",
       "      <td>10.039797</td>\n",
       "      <td>10.513092</td>\n",
       "      <td>9.329855</td>\n",
       "      <td>...</td>\n",
       "      <td>12.852174</td>\n",
       "      <td>2.346356</td>\n",
       "      <td>2.526551</td>\n",
       "      <td>2.076065</td>\n",
       "      <td>11.523826</td>\n",
       "      <td>12.672657</td>\n",
       "      <td>9.800579</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91247</th>\n",
       "      <td>11965.208984</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.873094</td>\n",
       "      <td>1224.179077</td>\n",
       "      <td>45</td>\n",
       "      <td>146</td>\n",
       "      <td>67</td>\n",
       "      <td>10.225981</td>\n",
       "      <td>10.388186</td>\n",
       "      <td>10.034727</td>\n",
       "      <td>...</td>\n",
       "      <td>13.613732</td>\n",
       "      <td>2.212938</td>\n",
       "      <td>2.229595</td>\n",
       "      <td>2.193299</td>\n",
       "      <td>16.504383</td>\n",
       "      <td>21.886076</td>\n",
       "      <td>10.158806</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>350554 rows × 118 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                         Nuc_X  Sample_ID  Nucleus_Roundness     Nuc_Y_Inv  \\\n",
       "ID                                                                           \n",
       "DD3S3_Cell_0        823.567871  DD3S3.csv           0.835324  15699.382812   \n",
       "DD3S3_Cell_1        852.840027  DD3S3.csv           0.523421  15690.533203   \n",
       "DD3S3_Cell_2        868.272705  DD3S3.csv           0.686147  15682.994141   \n",
       "DD3S3_Cell_4        704.337280  DD3S3.csv           0.757623  15683.059570   \n",
       "DD3S3_Cell_5        852.893799  DD3S3.csv           0.714651  15683.017578   \n",
       "...                        ...        ...                ...           ...   \n",
       "DD3S2_Cell_91241  11890.440430  DD3S2.csv           0.654341   1231.699951   \n",
       "DD3S2_Cell_91243  11915.123047  DD3S2.csv           0.854257   1228.222168   \n",
       "DD3S2_Cell_91244  11961.339844  DD3S2.csv           0.651563   1230.939941   \n",
       "DD3S2_Cell_91245  11969.869141  DD3S2.csv           0.764502   1230.717407   \n",
       "DD3S2_Cell_91247  11965.208984  DD3S2.csv           0.873094   1224.179077   \n",
       "\n",
       "                  ROI_index  Cell_Size  Nucleus_Size  \\\n",
       "ID                                                     \n",
       "DD3S3_Cell_0              0        281            81   \n",
       "DD3S3_Cell_1              0        200            75   \n",
       "DD3S3_Cell_2              0        425           165   \n",
       "DD3S3_Cell_4              0        418           169   \n",
       "DD3S3_Cell_5              0        201           113   \n",
       "...                     ...        ...           ...   \n",
       "DD3S2_Cell_91241         45        185            50   \n",
       "DD3S2_Cell_91243         45        203            81   \n",
       "DD3S2_Cell_91244         45        157            50   \n",
       "DD3S2_Cell_91245         45        115            46   \n",
       "DD3S2_Cell_91247         45        146            67   \n",
       "\n",
       "                  AF488_Cell_Intensity_Average  \\\n",
       "ID                                               \n",
       "DD3S3_Cell_0                          6.654496   \n",
       "DD3S3_Cell_1                          8.410667   \n",
       "DD3S3_Cell_2                          9.547820   \n",
       "DD3S3_Cell_4                         11.163413   \n",
       "DD3S3_Cell_5                          7.887114   \n",
       "...                                        ...   \n",
       "DD3S2_Cell_91241                     11.222882   \n",
       "DD3S2_Cell_91243                     10.506076   \n",
       "DD3S2_Cell_91244                     11.337304   \n",
       "DD3S2_Cell_91245                     10.039797   \n",
       "DD3S2_Cell_91247                     10.225981   \n",
       "\n",
       "                  AF488_Cytoplasm_Intensity_Average  \\\n",
       "ID                                                    \n",
       "DD3S3_Cell_0                               6.277167   \n",
       "DD3S3_Cell_1                               8.166960   \n",
       "DD3S3_Cell_2                               9.216820   \n",
       "DD3S3_Cell_4                              10.062945   \n",
       "DD3S3_Cell_5                               6.962386   \n",
       "...                                             ...   \n",
       "DD3S2_Cell_91241                          11.690247   \n",
       "DD3S2_Cell_91243                          10.907295   \n",
       "DD3S2_Cell_91244                          11.769533   \n",
       "DD3S2_Cell_91245                          10.513092   \n",
       "DD3S2_Cell_91247                          10.388186   \n",
       "\n",
       "                  AF488_Nucleus_Intensity_Average  ...  \\\n",
       "ID                                                 ...   \n",
       "DD3S3_Cell_0                             7.586173  ...   \n",
       "DD3S3_Cell_1                             8.816844  ...   \n",
       "DD3S3_Cell_2                            10.069394  ...   \n",
       "DD3S3_Cell_4                            12.784813  ...   \n",
       "DD3S3_Cell_5                             8.607257  ...   \n",
       "...                                           ...  ...   \n",
       "DD3S2_Cell_91241                         9.961000  ...   \n",
       "DD3S2_Cell_91243                         9.901769  ...   \n",
       "DD3S2_Cell_91244                        10.412333  ...   \n",
       "DD3S2_Cell_91245                         9.329855  ...   \n",
       "DD3S2_Cell_91247                        10.034727  ...   \n",
       "\n",
       "                  r8c2_Nucleus_Intensity_Average  \\\n",
       "ID                                                 \n",
       "DD3S3_Cell_0                           11.785679   \n",
       "DD3S3_Cell_1                           13.032533   \n",
       "DD3S3_Cell_2                           14.592970   \n",
       "DD3S3_Cell_4                           17.325917   \n",
       "DD3S3_Cell_5                           13.063717   \n",
       "...                                          ...   \n",
       "DD3S2_Cell_91241                       13.310400   \n",
       "DD3S2_Cell_91243                       13.592593   \n",
       "DD3S2_Cell_91244                       13.680000   \n",
       "DD3S2_Cell_91245                       12.852174   \n",
       "DD3S2_Cell_91247                       13.613732   \n",
       "\n",
       "                  Sting_Cell_Intensity_Average  \\\n",
       "ID                                               \n",
       "DD3S3_Cell_0                          1.358562   \n",
       "DD3S3_Cell_1                          1.856005   \n",
       "DD3S3_Cell_2                          1.969553   \n",
       "DD3S3_Cell_4                          1.398476   \n",
       "DD3S3_Cell_5                          1.328602   \n",
       "...                                        ...   \n",
       "DD3S2_Cell_91241                      2.154859   \n",
       "DD3S2_Cell_91243                      2.356256   \n",
       "DD3S2_Cell_91244                      2.176707   \n",
       "DD3S2_Cell_91245                      2.346356   \n",
       "DD3S2_Cell_91247                      2.212938   \n",
       "\n",
       "                  Sting_Cytoplasm_Intensity_Average  \\\n",
       "ID                                                    \n",
       "DD3S3_Cell_0                               1.318835   \n",
       "DD3S3_Cell_1                               1.954376   \n",
       "DD3S3_Cell_2                               2.171304   \n",
       "DD3S3_Cell_4                               1.301932   \n",
       "DD3S3_Cell_5                               1.144682   \n",
       "...                                             ...   \n",
       "DD3S2_Cell_91241                           2.332244   \n",
       "DD3S2_Cell_91243                           2.432205   \n",
       "DD3S2_Cell_91244                           2.105561   \n",
       "DD3S2_Cell_91245                           2.526551   \n",
       "DD3S2_Cell_91247                           2.229595   \n",
       "\n",
       "                  Sting_Nucleus_Intensity_Average  \\\n",
       "ID                                                  \n",
       "DD3S3_Cell_0                             1.456654   \n",
       "DD3S3_Cell_1                             1.692053   \n",
       "DD3S3_Cell_2                             1.651642   \n",
       "DD3S3_Cell_4                             1.540722   \n",
       "DD3S3_Cell_5                             1.471832   \n",
       "...                                           ...   \n",
       "DD3S2_Cell_91241                         1.675920   \n",
       "DD3S2_Cell_91243                         2.241864   \n",
       "DD3S2_Cell_91244                         2.328960   \n",
       "DD3S2_Cell_91245                         2.076065   \n",
       "DD3S2_Cell_91247                         2.193299   \n",
       "\n",
       "                  Vimentin_Cell_Intensity_Average  \\\n",
       "ID                                                  \n",
       "DD3S3_Cell_0                            19.081685   \n",
       "DD3S3_Cell_1                            20.050233   \n",
       "DD3S3_Cell_2                            13.034588   \n",
       "DD3S3_Cell_4                            12.410670   \n",
       "DD3S3_Cell_5                             9.718175   \n",
       "...                                           ...   \n",
       "DD3S2_Cell_91241                        11.001081   \n",
       "DD3S2_Cell_91243                        10.545419   \n",
       "DD3S2_Cell_91244                        11.364076   \n",
       "DD3S2_Cell_91245                        11.523826   \n",
       "DD3S2_Cell_91247                        16.504383   \n",
       "\n",
       "                  Vimentin_Cytoplasm_Intensity_Average  \\\n",
       "ID                                                       \n",
       "DD3S3_Cell_0                                 16.428900   \n",
       "DD3S3_Cell_1                                 20.903307   \n",
       "DD3S3_Cell_2                                 13.993846   \n",
       "DD3S3_Cell_4                                 11.191111   \n",
       "DD3S3_Cell_5                                  8.039015   \n",
       "...                                                ...   \n",
       "DD3S2_Cell_91241                             11.442271   \n",
       "DD3S2_Cell_91243                             10.825956   \n",
       "DD3S2_Cell_91244                             11.828598   \n",
       "DD3S2_Cell_91245                             12.672657   \n",
       "DD3S2_Cell_91247                             21.886076   \n",
       "\n",
       "                  Vimentin_Nucleus_Intensity_Average  cell_type  cell_subtype  \\\n",
       "ID                                                                              \n",
       "DD3S3_Cell_0                               25.631769       none          none   \n",
       "DD3S3_Cell_1                               18.628444       none          none   \n",
       "DD3S3_Cell_2                               11.523031       none          none   \n",
       "DD3S3_Cell_4                               14.207534       none          none   \n",
       "DD3S3_Cell_5                               11.025841       none          none   \n",
       "...                                              ...        ...           ...   \n",
       "DD3S2_Cell_91241                            9.809867       none          none   \n",
       "DD3S2_Cell_91243                           10.122881       none          none   \n",
       "DD3S2_Cell_91244                           10.370000       none          none   \n",
       "DD3S2_Cell_91245                            9.800579       none          none   \n",
       "DD3S2_Cell_91247                           10.158806       none          none   \n",
       "\n",
       "                  immune_checkpoint  \n",
       "ID                                   \n",
       "DD3S3_Cell_0                   none  \n",
       "DD3S3_Cell_1                   none  \n",
       "DD3S3_Cell_2                   none  \n",
       "DD3S3_Cell_4                   none  \n",
       "DD3S3_Cell_5                   none  \n",
       "...                             ...  \n",
       "DD3S2_Cell_91241               none  \n",
       "DD3S2_Cell_91243               none  \n",
       "DD3S2_Cell_91244               none  \n",
       "DD3S2_Cell_91245               none  \n",
       "DD3S2_Cell_91247               none  \n",
       "\n",
       "[350554 rows x 118 columns]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Assign IMMUNE SUBTYPES\n",
    "df['cell_subtype'] = df['cell_type'].copy()\n",
    "df['immune_checkpoint'] = 'none'\n",
    "df"
   ]
  },
  {
   "cell_type": "raw",
   "id": "984f9fe2-41d1-4c00-be20-e2d6ca9d2394",
   "metadata": {},
   "source": [
    "immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'None']\n",
    "color_values = sb.color_palette(\"husl\",n_colors=len(immune_checkpoint))\n",
    "# each color value is a tuple of three values: (R, G, B)\n",
    "\n",
    "print(\"Unique immune checkpoint are:\",df.immune_checkpoint.unique())\n",
    "# Display those unique colors\n",
    "sb.palplot(sb.color_palette(color_values))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "3b593828-a016-4d52-b722-9908aac4e2d1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unique immune checkpoint combinations are: ['B7H4', 'PDL1', 'PD1', 'B7H4_PDL1', 'None']\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAABlCAYAAACBS66rAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAADBUlEQVR4nO3aPW4TYRSG0WsT4oBkW0pp2VV6dsBmYBEgUaShZydIrAcpRUZyB0WMwPwkQ4GgwjC23miYcE47U1xdW348/jxq27YtAAga9z0AAHePuAAQJy4AxIkLAHHiAkCcuAAQJy4AxB11uenm5qbW63VNp9MajUa3PRMA/6i2bWuz2dRisajxePfzSae4rNfrWq1WseEAGLamaWq5XO683iku0+m0qqrePn1e0+OTzGT/gSePX/c9wiC9+/Cm7xEG6en7Z32PMDgX9x/1PcLgfN5u69WLl7+6sEunuPz8KWx6fFKzibh0df/hvb5HGKSj6z+/afm9ByfHfY8wOCe+LB/sb0ckDvQBiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiBMXAOLEBYA4cQEgTlwAiDvqclPbtlVVtfmyvdVh7pqvH6/7HmGQvn3a9D3CIH3aful7hMHZXvtM29fn7Y+d/ezCLqP2b3dU1cXFRZ2dnWUmA2Dwmqap5XK583qnJ5fT09Oqqrq8vKz5fJ6Z7D9wdXVVq9Wqmqap2WzW9ziDYGeHsbf92dlh2ratzWZTi8Xij/d1ist4/ONoZj6fexEOMJvN7G1PdnYYe9ufne2vy0OGA30A4sQFgLhOcZlMJnV+fl6TyeS257lT7G1/dnYYe9ufnd2uTv8WA4B9+FkMgDhxASBOXACIExcA4sQFgDhxASBOXACIExcA4r4DOnx2ZebrFYYAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 500x100 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'B7H4_PDL1', 'None']\n",
    "\n",
    "# Base colors for the primary checkpoints\n",
    "base_colors = sb.color_palette(\"husl\", n_colors=3)  # Three distinct colors\n",
    "\n",
    "# Function to mix two RGB colors\n",
    "def mix_colors(color1, color2):\n",
    "    return tuple((c1 + c2) / 2 for c1, c2 in zip(color1, color2))\n",
    "\n",
    "# Generate mixed colors for the combinations of checkpoints\n",
    "mixed_colors = [\n",
    "    mix_colors(base_colors[0], base_colors[1]),  # Mix B7H4 and PDL1\n",
    "#    mix_colors(base_colors[0], base_colors[2]),  # Mix B7H4 and PD1\n",
    "#    mix_colors(base_colors[1], base_colors[2]),  # Mix PDL1 and PD1\n",
    "    tuple(np.mean(base_colors, axis=0))  # Mix B7H4, PDL1, and PD1\n",
    "]\n",
    "\n",
    "# Adding the color for 'None'\n",
    "#none_color = [(0.8, 0.8, 0.8)]  # A shade of gray\n",
    "\n",
    "# Combine all colors into one list\n",
    "color_values = base_colors + mixed_colors #+ none_color\n",
    "\n",
    "# Display unique immune checkpoint combinations\n",
    "print(\"Unique immune checkpoint combinations are:\", immune_checkpoint)\n",
    "# Display the unique colors\n",
    "sb.palplot(color_values)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "ce2c7c21-f90b-4d06-a93f-418d9aae4304",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'B7H4': (0.9677975592919913, 0.44127456009157356, 0.5358103155058701),\n",
       " 'PDL1': (0.3126890019504329, 0.6928754610296064, 0.1923704830330379),\n",
       " 'PD1': (0.23299120924703914, 0.639586552066035, 0.9260706093977744),\n",
       " 'B7H4_PDL1': (0.6402432806212122, 0.56707501056059, 0.36409039926945397),\n",
       " 'None': (0.5044925901631545, 0.5912455243957383, 0.5514171359788941)}"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Store in a dctionnary\n",
    "immunecheckpoint_color_dict = dict(zip(immune_checkpoint, color_values))\n",
    "immunecheckpoint_color_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "2f38cc65-50f3-463d-be1b-9fc1f8c90aa1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/immunecheckpoint_color_data.csv was created!\n"
     ]
    }
   ],
   "source": [
    "# Save color information (mapping and legend) to metadata directory\n",
    "# Create dataframe\n",
    "immunecheckpoint_color_df = color_dict_to_df(immunecheckpoint_color_dict, \"immune_checkpoint\")\n",
    "immunecheckpoint_color_df.head()\n",
    "\n",
    "# Save to file in metadatadirectory\n",
    "filename = \"immunecheckpoint_color_data.csv\"\n",
    "filename = os.path.join(metadata_dir, filename)\n",
    "immunecheckpoint_color_df.to_csv(filename, index = False)\n",
    "print(\"File \" + filename + \" was created!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "9e2ae256-bbc3-4e93-93bb-560ffa02fb71",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAK8AAACcCAYAAAAJfPt2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAWC0lEQVR4nO3de1BTZ/oH8G+4qAQCkWtorQZEFHZhEYUWFKXgSmFRpI4s1YrQwa5YL2i7gru1UO14oSJabe0WHMqv7nbAadUsKriAuJi6IhSorSy6GbytsFTBUW5Vw/v7A3LGmIBJuB55PjPMcK7vm/DN4eSc85wjYIwxEMJDRsPdAUIMReElvEXhJbxF4SW8ReElvEXhJbxF4SW8ReElvEXhJbxF4SW8ReElvEXhJbxF4SW8ReElvGWi7wJKpRKPHj0ajL6QUcLU1BTGxsb9Xo/O4WWMobGxEffu3et3o4SIxWJIJBIIBAKD16FzeFXBtbe3h1Ao7FejZPRijKG9vR1NTU0AAEdHR4PXpVN4lUolF1wbGxuDGyMEAMzMzAAATU1NsLe3N3gXQqcvbKp9XKFQaFAjhDxNlaX+fH/S62gD7SqQgTIQWaJDZYS3KLyDJDAwEImJiYPahlQqxd69ewe1jS+//BJisXhQ2zDUgIQ3NjYWixYtGohVkRHm97//Pa5cuaLXMkPxwQUMOElBRhczMzPu6MBIMyi7DYGBgVi7di0SExMxfvx4ODg4IDMzE21tbYiLi4NIJIKLiwtOnTrFLVNaWgqBQIDCwkJMnz4dZmZmCAoKQlNTE06dOgU3NzdYWlpi6dKlaG9v55bT9q/Ty8sLqamp3LBAIEBWVhYiIyMhFAoxZcoUyGQytWV+/PFHhIaGwsLCAg4ODli+fDnu3LnT5+uUy+UIDAyEUCjE+PHjERISgpaWFm56V1cXNm3aBGtra0gkErU+AcC9e/cQHx8POzs7WFpaIigoCDU1NWrz/P3vf4ePjw/GjRsHW1tbREZG9tqfrKwsiMViFBcXc3+HNWvWYM2aNbCysoKtrS22bNmCJ+8z09LSgpiYGIwfPx5CoRChoaG4evUqN/3p3YbU1FR4eXnhq6++glQqhZWVFaKjo/HgwQMA3f+Fz549i3379kEgEEAgEODatWt9vo+GGrR93pycHNja2qK8vBxr165FQkIClixZAn9/f3z//feYP38+li9frhZEoPvNOXDgAL777jvcvHkTUVFR2Lt3L/72t7/hxIkTOH36NPbv3693fz788ENERUXhhx9+QFhYGJYtW4bm5mYA3SEKCgrC9OnTUVFRgYKCAvzvf/9DVFRUr+urrq5GcHAw3N3dcf78eZw7dw4LFiyAUqlUew/Mzc1x4cIFpKWlYevWrfjHP/7BTV+yZAn34aysrIS3tzeCg4O5fp04cQKRkZEICwtDVVUViouL4evrq7U/aWlpSE5OxunTpxEcHKzWBxMTE5SXl2Pfvn3Ys2cPsrKyuOmxsbGoqKiATCbD+fPnwRhDWFhYn4ewFAoFjh07hvz8fOTn5+Ps2bPYuXMnAGDfvn3w8/PDypUr0dDQgIaGBrz00kt9/WkMx3TQ0dHBLl++zDo6OrROX7FiBYuIiOCG586dy2bPns0NP378mJmbm7Ply5dz4xoaGhgAdv78ecYYY2fOnGEAWFFRETfPjh07GACmUCi4cX/4wx9YSEgINzxp0iSWkZGh1p/f/OY3LCUlhRsGwN5//31uuLW1lQFgp06dYowxtm3bNjZ//ny1ddy8eZMBYHV1dVpf8xtvvMFmzZqldZq294Axxnx8fFhSUhJjjLGysjJmaWnJOjs71eaZPHky+8tf/sIYY8zPz48tW7as1zZUr33Tpk3M0dGR/fjjjxp9cHNzY11dXdy4pKQk5ubmxhhj7MqVKwwAk8vl3PQ7d+4wMzMzlpeXxxhjLDs7m1lZWXHTU1JSmFAoZPfv3+fG/fGPf2Qvv/yyWrvr16/vtd+MPTtTuhi0fV5PT0/ud2NjY9jY2MDDw4Mb5+DgAADcaUJtyzk4OEAoFMLZ2VltXHl5eb/6Y25uDktLS67tmpoanDlzBhYWFhrLKRQKuLq6aoyvrq7GkiVLdG4T6D4V+mSbra2tGmcsOzo6oFAouDZWrlzZZxvp6eloa2tDRUWF2vuk8sorr6gdU/Xz80N6ejqUSiVqa2thYmKCl19+mZtuY2ODqVOnora2ttc2pVIpRCKR1tc1lAYtvKampmrDAoFAbZzqDe3q6up1uaeXUY17chkjIyO1fThA+1mbvtbT2tqKBQsWYNeuXRrL9XbuXZcvMc9q09HREaWlpRrLqfYxdWkjICAAJ06cQF5eHpKTk585/0B41t9kqPD+OK+dnR0aGhq44fv376O+vl6vdXh7e+Onn36CVCqFi4uL2o+5ubnWZTw9PbkvRobw9vZGY2MjTExMNNq0tbXVuQ1fX1+cOnUK27dvx+7duzWmX7hwQW34X//6F6ZMmQJjY2O4ubnh8ePHavPcvXsXdXV1cHd3N/i1jRkzRm3ff7DwPrxBQUH46quvUFZWhkuXLmHFihV6X+jxzjvvoLm5GW+88QYuXrwIhUKBwsJCxMXF9fpH2Lx5My5evIjVq1fjhx9+wL///W8cPHjwmUcoVObNmwc/Pz8sWrQIp0+fxrVr1/Ddd9/hz3/+MyoqKgAAKSkp+Prrr5GSkoLa2lpcunRJ638Hf39/nDx5Eh9++KHGkZcbN25g48aNqKurw9dff439+/dj/fr1AIApU6YgIiICK1euxLlz51BTU4M333wTL774IiIiIvR4B9VJpVJcuHAB165dw507dwZtq8z78G7evBlz585FeHg4fve732HRokWYPHmyXut44YUXIJfLoVQqMX/+fHh4eCAxMRFisRhGRtrfIldXV5w+fRo1NTXw9fWFn58fjh8/DhMT3fbEBAIBTp48iTlz5iAuLg6urq6Ijo7G9evXue8DgYGBOHLkCGQyGby8vBAUFNTr/v7s2bNx4sQJvP/++2pHY2JiYtDR0QFfX1+88847WL9+Pd5++21uenZ2NmbMmIHw8HD4+fmBMYaTJ09q7Bro47333oOxsTHc3d1hZ2eHGzduGLyuvgjY0zuMWnR2dqK+vh5OTk4YN27coHSEDLzAwEB4eXkN+ilkQwxEpni/5SWjF4WX8BZd2/Ac03YY7nlCW17CWxRewlsUXsJbFF7CWxRewlsUXsJbFF7CW0NynLdzY9pQNMMZt2eTXvPHxsYiJyeHG7a2toaPjw/S0tLg6emJ0tJSvPrqq1qXLS8vh4+PDzdPS0uLRrWtVCpFYmKiRlEi66laKCgowNGjR6mIVU+05e3x2muvcWUrxcXFMDExQXh4OIDuq7ZU01Q/8fHxcHJywsyZMw1uc+/evXQjl36gM2w9xo4dC4lEAgCQSCRITk5GQEAAfv75Z9jZ2XHTgO6L3Y8fP461a9caHL7q6mqkp6ejoqKiXzebG81oy6tFa2srDh8+DBcXF603FpTJZLh79y7i4uIMWn97ezuWLl2KTz/9VO1DQfRDW94e+fn5XA1bW1sbHB0dkZ+fr/V63kOHDiEkJAQTJkzQmKZt3NMV0hs2bIC/v3+/LvgmFF7Oq6++ioMHDwLovpfBZ599htDQUJSXl2PSpEncfLdu3UJhYSHy8vK0rqesrEytOBHovq5WRSaToaSkBFVVVQP/IkYZCm8Pc3NzuLi4cMNZWVmwsrJCZmYmPvroI258dnY2bGxssHDhQq3rcXJy0jja8GR1RUlJCRQKhcY8ixcvRkBAwHN/JdhAovD2QiAQwMjICB0dHdw4xhiys7MRExNjcJlMcnIy4uPj1cZ5eHggIyMDCxYs6FefRxsKb49ffvkFjY2NALp3Gw4cOMCVxKuUlJSgvr5eI3z6kEgkWr+kTZw4EU5OTgavdzSi8PYoKCjgDlmJRCJMmzYNR44cUdtfPXToEPz9/TFt2rRh6iV5EhVgkmFBBZhkVKPwEt6i8BLeovAS3qLwEt6i8BLeovAS3qLwEt6i8BLeovAS3hqSaxsiZd5D0Qzn6MLv9Zr/yQJMU1NTTJw4ETExMfjTn/6Ec+fOccWXAoEAIpEIzs7O+O1vf4sNGzaolfCkpqbi2LFjqK6u1trOt99+i88//xyVlZVobm5GVVUVvLy8DHqNhLa8HFUB5tWrV/Huu+8iNTUVH3/8MTe9rq4Ot2/fxsWLF5GUlISioiL8+te/xqVLl3Ruo62tDbNnz9Z6a36iP7qqrMeTBZgJCQk4evQoZDIZ/Pz8AAD29vYQi8WQSCRwdXVFREQEpk+fjoSEBJw7d06nNpYvXw4Ag/ZEyNGGtry9MDMzw8OHD/ucvmrVKsjl8mF5Bhmh8GpgjKGoqAiFhYUICgrqc17Vdb20JR0etNvQQ1U9/OjRI3R1dWHp0qVITU3FxYsXe11GdSk03ThkeFB4e6iqh8eMGYMXXnhBp0dSqR5xKpVKB7l3RBsKb4+nq4efpaOjA1988QXmzJkDOzu7QewZ6Q2FV0dNTU3o7OzEgwcPUFlZibS0NNy5cwfffvut2nwdHR0ax3lFIhEmT56M5uZm3LhxA7dv3wbQffgN6L0ok/RtSMKr70mDkWjq1KkQCASwsLCAs7Mz5s+fj40bN2qE7sqVK5g+fbrauODgYBQVFUEmk6ndIio6OhpA92NaU1NTB/01PG+oAJMMCyrAJKMahZfwFoWX8BaFl/AWhZfwFoWX8BaFl/AWhZfwFoWX8BaFl/DWkFzbMPvw3aFohnPuTc3HT/VloAowf/rpJ3zwwQeorKzE9evXkZGRofHUSzJwaMvbYyAKMNvb2+Hs7IydO3fSVWJDgMLbQ1WAOWnSJCQkJGDevHmQyWTcdHt7e674Mjo6GnK5HHZ2dkhISODm8fHxwccff4zo6GiMHTt2OF7GqELh7QUVYI58FN6nUAEmf1AlRQ8qwOQfCm8PKsDkHwpvDyrA5B8Kr450KcB8+PAhLl++zP3+3//+F9XV1bCwsNDrg0F0MyTh1fekwUikSwHm7du31Yovd+/ejd27d2Pu3Ln0QOxBQAWYZFhQASYZ1Si8hLcovIS3KLyEtyi8hLcovIS3KLyEtyi8hLcovIS3KLyEt4bk2obDn8QMRTOcN9f9n17zP1mACQDW1tbw8fFBWloaPD09UVpayhVhPq28vBw+Pj7cPC0tLRCLxWrzSKVSJCYmahRjMsYQFhaGgoICHD16FIsWLXpmX69duwYnJye1vs6YMQO7du3irqsIDAzE2bNnAQBjxoyBra0tvL29ERcXh9dff11tfQKBoNe2Ozs7sWrVKlRWVqK2thbh4eE4duzYM/s4VGjL20NVgNnQ0IDi4mKYmJggPDwcAODv789NU/3Ex8fDyckJM2fONLjNvXv3Gnwhe1FRERoaGlBYWIjW1laEhobi3r173PSVK1eioaEBCoUC33zzDdzd3REdHY23335b5zaUSiXMzMywbt06zJs3z6B+Dia6JLLHk0/AlEgkSE5ORkBAAH7++WfY2dmpXT326NEjHD9+HGvXrjU4fNXV1UhPT0dFRYVa+byubGxsuGdZ7N69G7NmzcKFCxcQEhICABAKhVyfJ0yYgFdeeQXTpk3DW2+9haioKJ3CaG5ujoMHDwIA5HK52odjJKAtrxatra04fPgwXFxcYGOjeTmnTCbD3bt31Z4voY/29nYsXboUn3766YCUyJuZmQFAnwWjALBixQqMHz9e4yEwfEVb3h6qGjag+wHXjo6OyM/Ph5GR5uf70KFDCAkJwYQJEzSmaRvX3t6uNrxhwwb4+/sjIiKi3/2+d+8etm3bBgsLC/j6+vY5r5GREVxdXZ+bglEKbw9VDRsAtLS04LPPPkNoaCjKy8sxadIkbr5bt26hsLAQeXl5WtdTVlYGkUikNi4wMJD7XSaToaSkBFVVVf3qr7+/P4yMjNDW1gZnZ2fk5ubCwcHhmcsxxp6bglEKb4+na9iysrJgZWWFzMxMfPTRR9z47Oxs2NjYYOHChVrX4+TkpHG04clizpKSEigUCo15Fi9ejICAAJ0rLnJzc+Hu7g4bGxuNdfVGqVTi6tWr8PHx0Wn+kY7C2wuBQAAjIyN0dHRw4xhjyM7ORkxMDExNTQ1ab3JyMuLj49XGeXh4ICMjAwsWLNB5PS+99BImT56sV9s5OTloaWnB4sWL9VpupKLw9vjll1/Q2NgIoHu34cCBA2htbVULVElJCerr6zXCp4/ennY5ceJEteO3/dXe3o7GxkY8fvwYt27dwtGjR5GRkYGEhASNY9b19fUaT+2cMmUKzM3NcfnyZTx8+BDNzc148OABN5+Xl9eA9dVQQxJefU8aDIeCggLukJVIJMK0adNw5MgRtf3VQ4cOwd/fn7tTzkiWmZmJzMxMjBkzBjY2NpgxYwZyc3MRGRmpMe/GjRs1xpWVlWH27NkICwvD9evXufGqEyE6lD4OOirAJMOCCjDJqEbhHWFWrVoFCwsLrT+rVq0a7u6NKLTbMMI0NTXh/v37WqdZWlrC3t5+iHs0OAYiU3S0YYSxt7d/bgI62Gi3gfAWhZfwFoWX8BaFl/AWhZfwFoWX8NaQHCrb+vmeoWiG88EqzXP1fVEVYO7YsQPJycnc+GPHjiEyMnJEnMcnmmjL22PcuHHYtWsXWlpahrsrREcU3h7z5s2DRCLBjh07ep3nm2++wa9+9SuMHTsWUqkU6enpatOlUim2b9+Ot956CyKRCBMnTsQXX3yhNs/NmzcRFRUFsVgMa2trREREPDdlOUONwtvD2NgY27dvx/79+3Hr1i2N6ZWVlYiKikJ0dDQuXbqE1NRUbNmyBV9++aXafOnp6Zg5cyaqqqqwevVqJCQkoK6uDkB31XFISAhEIhHKysogl8thYWGB11577ZnFk0QThfcJkZGR8PLyQkpKisa0PXv2IDg4GFu2bIGrqytiY2OxZs0atYdrA0BYWBhWr14NFxcXJCUlwdbWFmfOnAHQXbrT1dWFrKwseHh4wM3NDdnZ2bhx4wY9cMUAFN6n7Nq1Czk5OdwDAlVqa2sxa9YstXGzZs3C1atXoVQquXGenp7c7wKBABKJhHs2cU1NDf7zn/9AJBJxV4pZW1ujs7MTCoViEF/V84kuzHnKnDlzEBISgs2bNyM2Nlbv5Z+ubRMIBOjq6gLQfT+IGTNm4K9//avGcvQgQv1ReLXYuXMnvLy8MHXqVG6cm5sb5HK52nxyuRyurq4wNjbWab3e3t7Izc2Fvb09LC0tB7TPoxHtNmjh4eGBZcuW4ZNPPuHGvfvuuyguLsa2bdtw5coV5OTk4MCBA3jvvfd0Xu+yZctga2uLiIgIlJWVob6+HqWlpVi3bp3WL4mkb0Oy5dX3pMFIsHXrVuTm5nLD3t7eyMvLwwcffIBt27bB0dERW7du1WvXQigU4p///CeSkpLw+uuv48GDB3jxxRcRHBxMW2IDUCUFGRZUgElGNQov4S0KL+EtCi/hLb3CS5cGkoEyEFnSKbyqs0ZP3ySZEEOpsmTo3TYBHY/zGhsbQywWc+fohULhc3ODYjK0GGNob29HU1MTxGKxzmcntdHpOK+q0cbGxhH3UA3CT2KxGBKJpF8bQZ3Dq6JUKvHo0SODGyTE1NS0X1tcFb3DS8hIQYfKCG9ReAlvUXgJb1F4CW9ReAlvUXgJb1F4CW/9P8n17cNbMm37AAAAAElFTkSuQmCC",
      "text/plain": [
       "<Figure size 100x100 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Legend of cell type info only\n",
    "g  = plt.figure(figsize = (1,1)).add_subplot(111)\n",
    "g.axis('off')\n",
    "handles = []\n",
    "for item in immunecheckpoint_color_dict.keys():\n",
    "        h = g.bar(0,0, color = immunecheckpoint_color_dict[item],\n",
    "                  label = item, linewidth =0)\n",
    "        handles.append(h)\n",
    "first_legend = plt.legend(handles=handles, loc='upper right', title = 'Immune checkpoint'),\n",
    "\n",
    "\n",
    "filename = \"Cellsubtype_legend.png\"\n",
    "filename = os.path.join(metadata_images_dir, filename)\n",
    "plt.savefig(filename, bbox_inches = 'tight')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "92332958-9ac4-4415-81b3-b3ae27354da3",
   "metadata": {},
   "source": [
    "## II.7. BACKGROUND SUBSTRACTION"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "30fb1c46-e5f9-4a8e-91eb-010c878a8785",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Do background subtraction\n",
    "# this uses a df (metadata) outside of \n",
    "# the scope of the lambda...\n",
    "# careful that this might break inside of a script...\n",
    "df.loc[:,~df.columns.isin(not_intensities)] = \\\n",
    "    df.loc[:,~df.columns.isin(not_intensities)].apply(lambda column: do_background_sub(column, df, metadata), axis = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "b47e45ac-deed-447b-b630-1ccdaa85d195",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Nuc_X</th>\n",
       "      <th>Sample_ID</th>\n",
       "      <th>Nucleus_Roundness</th>\n",
       "      <th>Nuc_Y_Inv</th>\n",
       "      <th>ROI_index</th>\n",
       "      <th>Cell_Size</th>\n",
       "      <th>Nucleus_Size</th>\n",
       "      <th>AF488_Cell_Intensity_Average</th>\n",
       "      <th>AF488_Cytoplasm_Intensity_Average</th>\n",
       "      <th>AF488_Nucleus_Intensity_Average</th>\n",
       "      <th>...</th>\n",
       "      <th>r8c2_Nucleus_Intensity_Average</th>\n",
       "      <th>Sting_Cell_Intensity_Average</th>\n",
       "      <th>Sting_Cytoplasm_Intensity_Average</th>\n",
       "      <th>Sting_Nucleus_Intensity_Average</th>\n",
       "      <th>Vimentin_Cell_Intensity_Average</th>\n",
       "      <th>Vimentin_Cytoplasm_Intensity_Average</th>\n",
       "      <th>Vimentin_Nucleus_Intensity_Average</th>\n",
       "      <th>cell_type</th>\n",
       "      <th>cell_subtype</th>\n",
       "      <th>immune_checkpoint</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_0</th>\n",
       "      <td>823.567871</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.835324</td>\n",
       "      <td>15699.382812</td>\n",
       "      <td>0</td>\n",
       "      <td>281</td>\n",
       "      <td>81</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>4.199506</td>\n",
       "      <td>0.663407</td>\n",
       "      <td>0.636738</td>\n",
       "      <td>0.729255</td>\n",
       "      <td>12.427189</td>\n",
       "      <td>10.151733</td>\n",
       "      <td>18.045597</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_1</th>\n",
       "      <td>852.840027</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.523421</td>\n",
       "      <td>15690.533203</td>\n",
       "      <td>0</td>\n",
       "      <td>200</td>\n",
       "      <td>75</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>4.215689</td>\n",
       "      <td>1.107392</td>\n",
       "      <td>1.215437</td>\n",
       "      <td>0.927316</td>\n",
       "      <td>11.639566</td>\n",
       "      <td>12.736348</td>\n",
       "      <td>9.811600</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_2</th>\n",
       "      <td>868.272705</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.686147</td>\n",
       "      <td>15682.994141</td>\n",
       "      <td>0</td>\n",
       "      <td>425</td>\n",
       "      <td>165</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>4.523576</td>\n",
       "      <td>1.151564</td>\n",
       "      <td>1.370796</td>\n",
       "      <td>0.806107</td>\n",
       "      <td>3.486768</td>\n",
       "      <td>4.777026</td>\n",
       "      <td>1.453637</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_4</th>\n",
       "      <td>704.337280</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.757623</td>\n",
       "      <td>15683.059570</td>\n",
       "      <td>0</td>\n",
       "      <td>418</td>\n",
       "      <td>169</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>4.541104</td>\n",
       "      <td>0.516323</td>\n",
       "      <td>0.459996</td>\n",
       "      <td>0.599314</td>\n",
       "      <td>1.247257</td>\n",
       "      <td>1.128166</td>\n",
       "      <td>1.422721</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S3_Cell_5</th>\n",
       "      <td>852.893799</td>\n",
       "      <td>DD3S3.csv</td>\n",
       "      <td>0.714651</td>\n",
       "      <td>15683.017578</td>\n",
       "      <td>0</td>\n",
       "      <td>201</td>\n",
       "      <td>113</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>4.456460</td>\n",
       "      <td>0.591222</td>\n",
       "      <td>0.442303</td>\n",
       "      <td>0.707195</td>\n",
       "      <td>1.831061</td>\n",
       "      <td>1.076629</td>\n",
       "      <td>2.418584</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91241</th>\n",
       "      <td>11890.440430</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.654341</td>\n",
       "      <td>1231.699951</td>\n",
       "      <td>45</td>\n",
       "      <td>185</td>\n",
       "      <td>50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.349400</td>\n",
       "      <td>1.266964</td>\n",
       "      <td>1.422560</td>\n",
       "      <td>0.846853</td>\n",
       "      <td>-0.221802</td>\n",
       "      <td>-0.247975</td>\n",
       "      <td>-0.151134</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91243</th>\n",
       "      <td>11915.123047</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.854257</td>\n",
       "      <td>1228.222168</td>\n",
       "      <td>45</td>\n",
       "      <td>203</td>\n",
       "      <td>81</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.690824</td>\n",
       "      <td>1.506736</td>\n",
       "      <td>1.552194</td>\n",
       "      <td>1.438268</td>\n",
       "      <td>0.039343</td>\n",
       "      <td>-0.081339</td>\n",
       "      <td>0.221112</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91244</th>\n",
       "      <td>11961.339844</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.651563</td>\n",
       "      <td>1230.939941</td>\n",
       "      <td>45</td>\n",
       "      <td>157</td>\n",
       "      <td>50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.267667</td>\n",
       "      <td>1.252032</td>\n",
       "      <td>1.162981</td>\n",
       "      <td>1.442600</td>\n",
       "      <td>0.026772</td>\n",
       "      <td>0.059065</td>\n",
       "      <td>-0.042333</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91245</th>\n",
       "      <td>11969.869141</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.764502</td>\n",
       "      <td>1230.717407</td>\n",
       "      <td>45</td>\n",
       "      <td>115</td>\n",
       "      <td>46</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.522319</td>\n",
       "      <td>1.508861</td>\n",
       "      <td>1.657759</td>\n",
       "      <td>1.285514</td>\n",
       "      <td>1.484028</td>\n",
       "      <td>2.159565</td>\n",
       "      <td>0.470724</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DD3S2_Cell_91247</th>\n",
       "      <td>11965.208984</td>\n",
       "      <td>DD3S2.csv</td>\n",
       "      <td>0.873094</td>\n",
       "      <td>1224.179077</td>\n",
       "      <td>45</td>\n",
       "      <td>146</td>\n",
       "      <td>67</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>3.579005</td>\n",
       "      <td>1.367290</td>\n",
       "      <td>1.387004</td>\n",
       "      <td>1.344045</td>\n",
       "      <td>6.278402</td>\n",
       "      <td>11.497890</td>\n",
       "      <td>0.124080</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "      <td>none</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>350554 rows × 118 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                         Nuc_X  Sample_ID  Nucleus_Roundness     Nuc_Y_Inv  \\\n",
       "ID                                                                           \n",
       "DD3S3_Cell_0        823.567871  DD3S3.csv           0.835324  15699.382812   \n",
       "DD3S3_Cell_1        852.840027  DD3S3.csv           0.523421  15690.533203   \n",
       "DD3S3_Cell_2        868.272705  DD3S3.csv           0.686147  15682.994141   \n",
       "DD3S3_Cell_4        704.337280  DD3S3.csv           0.757623  15683.059570   \n",
       "DD3S3_Cell_5        852.893799  DD3S3.csv           0.714651  15683.017578   \n",
       "...                        ...        ...                ...           ...   \n",
       "DD3S2_Cell_91241  11890.440430  DD3S2.csv           0.654341   1231.699951   \n",
       "DD3S2_Cell_91243  11915.123047  DD3S2.csv           0.854257   1228.222168   \n",
       "DD3S2_Cell_91244  11961.339844  DD3S2.csv           0.651563   1230.939941   \n",
       "DD3S2_Cell_91245  11969.869141  DD3S2.csv           0.764502   1230.717407   \n",
       "DD3S2_Cell_91247  11965.208984  DD3S2.csv           0.873094   1224.179077   \n",
       "\n",
       "                  ROI_index  Cell_Size  Nucleus_Size  \\\n",
       "ID                                                     \n",
       "DD3S3_Cell_0              0        281            81   \n",
       "DD3S3_Cell_1              0        200            75   \n",
       "DD3S3_Cell_2              0        425           165   \n",
       "DD3S3_Cell_4              0        418           169   \n",
       "DD3S3_Cell_5              0        201           113   \n",
       "...                     ...        ...           ...   \n",
       "DD3S2_Cell_91241         45        185            50   \n",
       "DD3S2_Cell_91243         45        203            81   \n",
       "DD3S2_Cell_91244         45        157            50   \n",
       "DD3S2_Cell_91245         45        115            46   \n",
       "DD3S2_Cell_91247         45        146            67   \n",
       "\n",
       "                  AF488_Cell_Intensity_Average  \\\n",
       "ID                                               \n",
       "DD3S3_Cell_0                               0.0   \n",
       "DD3S3_Cell_1                               0.0   \n",
       "DD3S3_Cell_2                               0.0   \n",
       "DD3S3_Cell_4                               0.0   \n",
       "DD3S3_Cell_5                               0.0   \n",
       "...                                        ...   \n",
       "DD3S2_Cell_91241                           0.0   \n",
       "DD3S2_Cell_91243                           0.0   \n",
       "DD3S2_Cell_91244                           0.0   \n",
       "DD3S2_Cell_91245                           0.0   \n",
       "DD3S2_Cell_91247                           0.0   \n",
       "\n",
       "                  AF488_Cytoplasm_Intensity_Average  \\\n",
       "ID                                                    \n",
       "DD3S3_Cell_0                                    0.0   \n",
       "DD3S3_Cell_1                                    0.0   \n",
       "DD3S3_Cell_2                                    0.0   \n",
       "DD3S3_Cell_4                                    0.0   \n",
       "DD3S3_Cell_5                                    0.0   \n",
       "...                                             ...   \n",
       "DD3S2_Cell_91241                                0.0   \n",
       "DD3S2_Cell_91243                                0.0   \n",
       "DD3S2_Cell_91244                                0.0   \n",
       "DD3S2_Cell_91245                                0.0   \n",
       "DD3S2_Cell_91247                                0.0   \n",
       "\n",
       "                  AF488_Nucleus_Intensity_Average  ...  \\\n",
       "ID                                                 ...   \n",
       "DD3S3_Cell_0                                  0.0  ...   \n",
       "DD3S3_Cell_1                                  0.0  ...   \n",
       "DD3S3_Cell_2                                  0.0  ...   \n",
       "DD3S3_Cell_4                                  0.0  ...   \n",
       "DD3S3_Cell_5                                  0.0  ...   \n",
       "...                                           ...  ...   \n",
       "DD3S2_Cell_91241                              0.0  ...   \n",
       "DD3S2_Cell_91243                              0.0  ...   \n",
       "DD3S2_Cell_91244                              0.0  ...   \n",
       "DD3S2_Cell_91245                              0.0  ...   \n",
       "DD3S2_Cell_91247                              0.0  ...   \n",
       "\n",
       "                  r8c2_Nucleus_Intensity_Average  \\\n",
       "ID                                                 \n",
       "DD3S3_Cell_0                            4.199506   \n",
       "DD3S3_Cell_1                            4.215689   \n",
       "DD3S3_Cell_2                            4.523576   \n",
       "DD3S3_Cell_4                            4.541104   \n",
       "DD3S3_Cell_5                            4.456460   \n",
       "...                                          ...   \n",
       "DD3S2_Cell_91241                        3.349400   \n",
       "DD3S2_Cell_91243                        3.690824   \n",
       "DD3S2_Cell_91244                        3.267667   \n",
       "DD3S2_Cell_91245                        3.522319   \n",
       "DD3S2_Cell_91247                        3.579005   \n",
       "\n",
       "                  Sting_Cell_Intensity_Average  \\\n",
       "ID                                               \n",
       "DD3S3_Cell_0                          0.663407   \n",
       "DD3S3_Cell_1                          1.107392   \n",
       "DD3S3_Cell_2                          1.151564   \n",
       "DD3S3_Cell_4                          0.516323   \n",
       "DD3S3_Cell_5                          0.591222   \n",
       "...                                        ...   \n",
       "DD3S2_Cell_91241                      1.266964   \n",
       "DD3S2_Cell_91243                      1.506736   \n",
       "DD3S2_Cell_91244                      1.252032   \n",
       "DD3S2_Cell_91245                      1.508861   \n",
       "DD3S2_Cell_91247                      1.367290   \n",
       "\n",
       "                  Sting_Cytoplasm_Intensity_Average  \\\n",
       "ID                                                    \n",
       "DD3S3_Cell_0                               0.636738   \n",
       "DD3S3_Cell_1                               1.215437   \n",
       "DD3S3_Cell_2                               1.370796   \n",
       "DD3S3_Cell_4                               0.459996   \n",
       "DD3S3_Cell_5                               0.442303   \n",
       "...                                             ...   \n",
       "DD3S2_Cell_91241                           1.422560   \n",
       "DD3S2_Cell_91243                           1.552194   \n",
       "DD3S2_Cell_91244                           1.162981   \n",
       "DD3S2_Cell_91245                           1.657759   \n",
       "DD3S2_Cell_91247                           1.387004   \n",
       "\n",
       "                  Sting_Nucleus_Intensity_Average  \\\n",
       "ID                                                  \n",
       "DD3S3_Cell_0                             0.729255   \n",
       "DD3S3_Cell_1                             0.927316   \n",
       "DD3S3_Cell_2                             0.806107   \n",
       "DD3S3_Cell_4                             0.599314   \n",
       "DD3S3_Cell_5                             0.707195   \n",
       "...                                           ...   \n",
       "DD3S2_Cell_91241                         0.846853   \n",
       "DD3S2_Cell_91243                         1.438268   \n",
       "DD3S2_Cell_91244                         1.442600   \n",
       "DD3S2_Cell_91245                         1.285514   \n",
       "DD3S2_Cell_91247                         1.344045   \n",
       "\n",
       "                  Vimentin_Cell_Intensity_Average  \\\n",
       "ID                                                  \n",
       "DD3S3_Cell_0                            12.427189   \n",
       "DD3S3_Cell_1                            11.639566   \n",
       "DD3S3_Cell_2                             3.486768   \n",
       "DD3S3_Cell_4                             1.247257   \n",
       "DD3S3_Cell_5                             1.831061   \n",
       "...                                           ...   \n",
       "DD3S2_Cell_91241                        -0.221802   \n",
       "DD3S2_Cell_91243                         0.039343   \n",
       "DD3S2_Cell_91244                         0.026772   \n",
       "DD3S2_Cell_91245                         1.484028   \n",
       "DD3S2_Cell_91247                         6.278402   \n",
       "\n",
       "                  Vimentin_Cytoplasm_Intensity_Average  \\\n",
       "ID                                                       \n",
       "DD3S3_Cell_0                                 10.151733   \n",
       "DD3S3_Cell_1                                 12.736348   \n",
       "DD3S3_Cell_2                                  4.777026   \n",
       "DD3S3_Cell_4                                  1.128166   \n",
       "DD3S3_Cell_5                                  1.076629   \n",
       "...                                                ...   \n",
       "DD3S2_Cell_91241                             -0.247975   \n",
       "DD3S2_Cell_91243                             -0.081339   \n",
       "DD3S2_Cell_91244                              0.059065   \n",
       "DD3S2_Cell_91245                              2.159565   \n",
       "DD3S2_Cell_91247                             11.497890   \n",
       "\n",
       "                  Vimentin_Nucleus_Intensity_Average  cell_type  cell_subtype  \\\n",
       "ID                                                                              \n",
       "DD3S3_Cell_0                               18.045597       none          none   \n",
       "DD3S3_Cell_1                                9.811600       none          none   \n",
       "DD3S3_Cell_2                                1.453637       none          none   \n",
       "DD3S3_Cell_4                                1.422721       none          none   \n",
       "DD3S3_Cell_5                                2.418584       none          none   \n",
       "...                                              ...        ...           ...   \n",
       "DD3S2_Cell_91241                           -0.151134       none          none   \n",
       "DD3S2_Cell_91243                            0.221112       none          none   \n",
       "DD3S2_Cell_91244                           -0.042333       none          none   \n",
       "DD3S2_Cell_91245                            0.470724       none          none   \n",
       "DD3S2_Cell_91247                            0.124080       none          none   \n",
       "\n",
       "                  immune_checkpoint  \n",
       "ID                                   \n",
       "DD3S3_Cell_0                   none  \n",
       "DD3S3_Cell_1                   none  \n",
       "DD3S3_Cell_2                   none  \n",
       "DD3S3_Cell_4                   none  \n",
       "DD3S3_Cell_5                   none  \n",
       "...                             ...  \n",
       "DD3S2_Cell_91241               none  \n",
       "DD3S2_Cell_91243               none  \n",
       "DD3S2_Cell_91244               none  \n",
       "DD3S2_Cell_91245               none  \n",
       "DD3S2_Cell_91247               none  \n",
       "\n",
       "[350554 rows x 118 columns]"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "729f24de-6494-4eae-91d5-d3eb399f1e56",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Nuc_X' 'Sample_ID' 'Nucleus_Roundness' 'Nuc_Y_Inv' 'ROI_index'\n",
      " 'Cell_Size' 'Nucleus_Size' 'aSMA_Cell_Intensity_Average'\n",
      " 'aSMA_Cytoplasm_Intensity_Average' 'aSMA_Nucleus_Intensity_Average'\n",
      " 'AXL_Cell_Intensity_Average' 'AXL_Cytoplasm_Intensity_Average'\n",
      " 'AXL_Nucleus_Intensity_Average' 'B7H4_Cell_Intensity_Average'\n",
      " 'B7H4_Cytoplasm_Intensity_Average' 'B7H4_Nucleus_Intensity_Average'\n",
      " 'CA9_Cell_Intensity_Average' 'CA9_Cytoplasm_Intensity_Average'\n",
      " 'CA9_Nucleus_Intensity_Average' 'CD4_Cell_Intensity_Average'\n",
      " 'CD4_Cytoplasm_Intensity_Average' 'CD4_Nucleus_Intensity_Average'\n",
      " 'CD8_Cell_Intensity_Average' 'CD8_Cytoplasm_Intensity_Average'\n",
      " 'CD8_Nucleus_Intensity_Average' 'CD11b_Cell_Intensity_Average'\n",
      " 'CD11b_Cytoplasm_Intensity_Average' 'CD11b_Nucleus_Intensity_Average'\n",
      " 'CD11c_Cell_Intensity_Average' 'CD11c_Cytoplasm_Intensity_Average'\n",
      " 'CD11c_Nucleus_Intensity_Average' 'CD20_Cell_Intensity_Average'\n",
      " 'CD20_Cytoplasm_Intensity_Average' 'CD20_Nucleus_Intensity_Average'\n",
      " 'CD31_Cell_Intensity_Average' 'CD31_Cytoplasm_Intensity_Average'\n",
      " 'CD31_Nucleus_Intensity_Average' 'CD44_Cell_Intensity_Average'\n",
      " 'CD44_Cytoplasm_Intensity_Average' 'CD44_Nucleus_Intensity_Average'\n",
      " 'CD45_Cell_Intensity_Average' 'CD45_Cytoplasm_Intensity_Average'\n",
      " 'CD45_Nucleus_Intensity_Average' 'CD68_Cell_Intensity_Average'\n",
      " 'CD68_Cytoplasm_Intensity_Average' 'CD68_Nucleus_Intensity_Average'\n",
      " 'CD163_Cell_Intensity_Average' 'CD163_Cytoplasm_Intensity_Average'\n",
      " 'CD163_Nucleus_Intensity_Average' 'CKs_Cell_Intensity_Average'\n",
      " 'CKs_Cytoplasm_Intensity_Average' 'CKs_Nucleus_Intensity_Average'\n",
      " 'ColVI_Cell_Intensity_Average' 'ColVI_Cytoplasm_Intensity_Average'\n",
      " 'ColVI_Nucleus_Intensity_Average' 'Desmin_Cell_Intensity_Average'\n",
      " 'Desmin_Cytoplasm_Intensity_Average' 'Desmin_Nucleus_Intensity_Average'\n",
      " 'Ecad_Cell_Intensity_Average' 'Ecad_Cytoplasm_Intensity_Average'\n",
      " 'Ecad_Nucleus_Intensity_Average' 'Fibronectin_Cell_Intensity_Average'\n",
      " 'Fibronectin_Cytoplasm_Intensity_Average'\n",
      " 'Fibronectin_Nucleus_Intensity_Average' 'FOXP3_Cell_Intensity_Average'\n",
      " 'FOXP3_Cytoplasm_Intensity_Average' 'FOXP3_Nucleus_Intensity_Average'\n",
      " 'GATA3_Cell_Intensity_Average' 'GATA3_Cytoplasm_Intensity_Average'\n",
      " 'GATA3_Nucleus_Intensity_Average' 'HLA_Cell_Intensity_Average'\n",
      " 'HLA_Cytoplasm_Intensity_Average' 'HLA_Nucleus_Intensity_Average'\n",
      " 'Ki67_Cell_Intensity_Average' 'Ki67_Cytoplasm_Intensity_Average'\n",
      " 'Ki67_Nucleus_Intensity_Average' 'MMP9_Cell_Intensity_Average'\n",
      " 'MMP9_Cytoplasm_Intensity_Average' 'MMP9_Nucleus_Intensity_Average'\n",
      " 'PD1_Cell_Intensity_Average' 'PD1_Cytoplasm_Intensity_Average'\n",
      " 'PD1_Nucleus_Intensity_Average' 'PDGFR_Cell_Intensity_Average'\n",
      " 'PDGFR_Cytoplasm_Intensity_Average' 'PDGFR_Nucleus_Intensity_Average'\n",
      " 'PDL1_Cell_Intensity_Average' 'PDL1_Cytoplasm_Intensity_Average'\n",
      " 'PDL1_Nucleus_Intensity_Average' 'r5c2_Cell_Intensity_Average'\n",
      " 'r5c2_Cytoplasm_Intensity_Average' 'r5c2_Nucleus_Intensity_Average'\n",
      " 'r7c2_Cell_Intensity_Average' 'r7c2_Cytoplasm_Intensity_Average'\n",
      " 'r7c2_Nucleus_Intensity_Average' 'r8c2_Cell_Intensity_Average'\n",
      " 'r8c2_Cytoplasm_Intensity_Average' 'r8c2_Nucleus_Intensity_Average'\n",
      " 'Sting_Cell_Intensity_Average' 'Sting_Cytoplasm_Intensity_Average'\n",
      " 'Sting_Nucleus_Intensity_Average' 'Vimentin_Cell_Intensity_Average'\n",
      " 'Vimentin_Cytoplasm_Intensity_Average'\n",
      " 'Vimentin_Nucleus_Intensity_Average' 'cell_type' 'cell_subtype'\n",
      " 'immune_checkpoint']\n"
     ]
    }
   ],
   "source": [
    "# Drop AF columns\n",
    "df = df.filter(regex='^(?!AF\\d{3}).*')\n",
    "print(df.columns.values)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5c23ca0a-2d0e-4f30-b358-54acc69ac3d0",
   "metadata": {},
   "source": [
    "## II.8. SAVE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "8c0c8c62-9b55-451f-8c2f-bdc9a33b9fff",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/DD3S3_bs.csv was created!\n",
      "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/TMA_bs.csv was created!\n",
      "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/DD3S1_bs.csv was created!\n",
      "File /Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_bs/DD3S2_bs.csv was created!\n"
     ]
    }
   ],
   "source": [
    "# Save the data by Sample_ID\n",
    "# Check for the existence of the output file first\n",
    "for sample in ls_samples:\n",
    "    sample_id = sample.split('_')[0]\n",
    "    filename = os.path.join(output_data_dir,  sample_id + \"_\" + step_suffix + \".csv\")\n",
    "    if os.path.exists(filename):\n",
    "        print(\"File by name \"+filename+\" already exists.\")\n",
    "    else:\n",
    "        sample_id_csv = sample_id + '.csv'\n",
    "        df_save = df.loc[df['Sample_ID'] == sample_id_csv, :]\n",
    "        #print(df_save)\n",
    "        filename = os.path.join(output_data_dir,  sample_id + \"_\" + step_suffix + \".csv\")\n",
    "        df_save.to_csv(filename, index=True, index_label='ID')  # Set index parameter to True to retain the index column\n",
    "        print(\"File \" + filename + \" was created!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8ef5b66d-fcc6-4677-aab7-f5a748196295",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a2991f1-7ef3-430e-82f1-810ad70e769d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}