Spaces:

LaboLabrie
/

CycIF

Sleeping

App Files Files Community

KashyapiNagaHarshitha commited on Jul 3, 2024

Commit

1604536

verified ·

1 Parent(s): 302d1f0

Delete Quality_Control.py

Browse files

Files changed (1) hide show

Quality_Control.py +0 -1783

Quality_Control.py DELETED Viewed

@@ -1,1783 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-import warnings
-import os
-import plotly as plt
-import seaborn as sb
-import plotly.express as px
-import panel as pn
-import holoviews as hv
-import hvplot.pandas
-import pandas as pd
-import numpy as np
-import json
-import matplotlib.pyplot as plt
-from bokeh.plotting import figure
-from bokeh.io import push_notebook, show
-from bokeh.io.export import export_png
-from bokeh.resources import INLINE
-from bokeh.embed import file_html
-from bokeh.io import curdoc
-from bokeh.models import Span, Label
-from bokeh.models import ColumnDataSource, Button
-from my_modules import *
-from datasets import load_dataset
-os.getcwd()
-#Silence FutureWarnings & UserWarnings
-warnings.filterwarnings('ignore', category= FutureWarning)
-warnings.filterwarnings('ignore', category= UserWarning)
-#present_dir = os.path.dirname(os.path.realpath(__file__))
-#input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
-base_dir = '/code/wetransfer_data-zip_2024-05-17_1431'
-set_path = 'test'
-selected_metadata_files = ['Slide_B_DD1s1.one_1.tif.csv', 'Slide_B_DD1s1.one_2.tif.csv']
-ls_samples = ['DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv']
-pn.extension()
-update_button = pn.widgets.Button(name='CSV Files', button_type='primary')
-def update_samples(event):
-    with open('stored_variables.json', 'r') as file:
-        stored_vars = json.load(file)
-#        ls_samples = stored_vars['ls_samples']
-    print(ls_samples)
-update_button.on_click(update_samples)
-csv_files_button = pn.widgets.Button(icon="clipboard", button_type="primary")
-indicator = pn.indicators.LoadingSpinner(value=False, size=25)
-def handle_click(clicks):
-    with open('stored_variables.json', 'r') as file:
-        stored_vars = json.load(file)
-#        ls_samples = stored_vars['ls_samples']
-    return f'CSV Files Selected: {ls_samples}'
-pn.Row(
-    csv_files_button,
-    pn.bind(handle_click, csv_files_button.param.clicks),
-)
-# ## I.2. *DIRECTORIES
-set_path = 'test'
-# Set base directory
-directorio_actual = os.getcwd()
-print(directorio_actual)
-##### MAC WORKSTATION #####
-#base_dir = r'/Volumes/LaboLabrie/Projets/OC_TMA_Pejovic/Temp/Zoe/CyCIF_pipeline/'
-###########################
-##### WINDOWS WORKSTATION #####
-#base_dir = r'C:\Users\LaboLabrie\gerz2701\cyCIF-pipeline\Set_B'
-###############################
-input_path = base_dir
-##### LOCAL WORKSTATION #####
-#base_dir = r'/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/'
-base_dir = input_path
-print(base_dir)
-#############################
-#set_name = 'Set_A'
-#set_name = 'test'
-set_name = set_path
-project_name = set_name              # Project name
-step_suffix = 'qc_eda'               # Curent part (here part I)
-previous_step_suffix_long = ""       # Previous part (here empty)
-# Initial input data directory
-input_data_dir = os.path.join(base_dir, project_name + "_data")
-# QC/EDA output directories
-# global output
-output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
-# images subdirectory
-output_images_dir = os.path.join(output_data_dir,"images")
-# Data and Metadata directories
-# global data
-metadata_dir = os.path.join(base_dir, project_name + "_metadata")
-# images subdirectory
-metadata_images_dir = os.path.join(metadata_dir,"images")
-# Create directories if they don't already exist
-for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
-    if not os.path.exists(d):
-        print("Creation of the" , d, "directory...")
-        os.makedirs(d)
-    else :
-        print("The", d, "directory already exists !")
-os.chdir(input_data_dir)
-with open('stored_variables.json', 'r') as file:
-        stored_vars = json.load(file)
-#        ls_samples = stored_vars['ls_samples']
-        selected_metadata_files = stored_vars['selected_metadata_files']
-directories = []
-for i in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
-    directories.append(i)
-directories
-def print_directories(directories):
-    label_path = []
-    labels = [
-        "base_dir",
-        "input_data_dir",
-        "output_data_dir",
-        "output_images_dir",
-        "metadata_dir",
-        "metadata_images_dir"
-    ]
-    for label, path in zip(labels, directories):
-        label_path.append(f"{label} : {path}")
-    return label_path
-print_directories
-# Verify paths
-print('base_dir :', base_dir)
-print('input_data_dir :', input_data_dir)
-print('output_data_dir :', output_data_dir)
-print('output_images_dir :', output_images_dir)
-print('metadata_dir :', metadata_dir)
-print('metadata_images_dir :', metadata_images_dir)
-# ## I.3. FILES
-# Listing all the .csv files in the metadata/data directory
-# Don't forget to move the csv files into the proj_data directory
-# if the data dir is empty it's not going to work
-#ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith(".csv")]
-print("The following CSV files were detected:\n\n",[sample for sample in ls_samples], "\n\nin", input_data_dir, "directory.")
-# In[26]:
-import os
-import pandas as pd
-def combine_and_save_metadata_files(metadata_dir, selected_metadata_files):
-    if len(selected_metadata_files) == []:
-        if not file:
-            warnings.warn("No Ashlar file uploaded. Please upload a valid file.", UserWarning)
-            return
-    elif len(selected_metadata_files) > 1:
-        combined_metadata_df = pd.DataFrame()
-        for file in selected_metadata_files:
-            file_path = os.path.join(metadata_dir, file)
-            df = pd.read_csv(file_path)
-            combined_metadata_df = pd.concat([combined_metadata_df, df], ignore_index=True)
-        combined_metadata_df.to_csv(os.path.join(metadata_dir, "combined_metadata.csv"), index=False)
-        print(f"Combined metadata file saved as 'combined_metadata.csv' in {metadata_dir}")
-        return combined_metadata_df
-    else:
-        if selected_metadata_files:
-            single_file_path = os.path.join(metadata_dir, selected_metadata_files[0])
-            single_file_df = pd.read_csv(single_file_path)
-            print(f"Only one file selected: {selected_metadata_files[0]}")
-            return single_file_df
-        else:
-            print("No metadata files selected.")
-            return pd.DataFrame()
-# In[27]:
-print(combine_and_save_metadata_files(metadata_dir, selected_metadata_files))
-# In[28]:
-ls_samples
-# In[29]:
-path = os.path.join(input_data_dir, ls_samples[0])
-#df = load_dataset('csv', data_files = path )
-df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]),index_col = 0, nrows = 1)
-df.head(10)
-# In[30]:
-# First gather information on expected headers using first file in ls_samples
-# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
-df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
-# Make sure the file was imported correctly
-print("df :\n", df.head(), "\n")
-print("df's columns :\n", df.columns, "\n")
-print("df's index :\n", df.index, "\n")
-print("df's index name :\n", df.index.name)
-# In[31]:
-df.head()
-# In[32]:
-# Verify that the ID column in input file became the index
-# Verify that the index name column is "ID", if not, rename it
-if df.index.name != "ID":
-    print("Expected the first column in input file (index_col = 0) to be 'ID'. \n"
-          "This column will be used to set the index names (cell number for each sample). \n"
-          "It appears that the column '" + df.index.name + "' was actually the imported as the index column.")
-    #df.index.name = 'ID'
-    print("A new index name (first column) will be given ('ID') to replace the current one '" + df.index.name + "'\n")
-# Apply the changes to the headers as specified with apply_header_changes() function (in my_modules.py)
-# Apply the changes to the dataframe rows as specified with apply_df_changes() function (in my_modules.py)
-#df = apply_header_changes(df)
-print(df.index)
-df.index = df.index.str.replace(r'@1$', '')
-df = apply_df_changes(df)
-# Set variable to hold default header values
-expected_headers = df.columns.values
-expected_header = True
-print(expected_header)
-intial_dataframe = df
-# Make sure the file is now formated correctly
-print("\ndf :\n", df.head(), "\n")
-print("df's columns :\n", df.columns, "\n")
-print("df's index :\n", df.index, "\n")
-print("df's index name :\n", df.index.name)
-# In[33]:
-df.head()
-# In[34]:
-df.head()
-# In[35]:
-print("Used " + ls_samples[0] + " to determine the expected and corrected headers for all files.\n")
-print("These headers are: \n" + ", ".join([h for h in expected_headers]))
-corrected_headers = True
-# In[36]:
-for sample in ls_samples:
-    file_path = os.path.join(input_data_dir,sample)
-    print(file_path)
-# In[37]:
-# Import all the others files
-dfs = {}
-###############################
-# !! This may take a while !! #
-###############################
-errors = []
-for sample in ls_samples:
-    file_path = os.path.join(input_data_dir,sample)
-    try:
-        # Read the CSV file
-        df = load_dataset("csv", data_files = file_path)
-        df = pd.read_csv(file_path, index_col=0)
-        # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it
-        if not df.empty:
-            # Manipulations necessary for concatenation
-            df = apply_header_changes(df)
-            df = apply_df_changes(df)
-            # Reorder the columns to match the expected headers list
-            #df = df.reindex(columns=expected_headers)
-            print(df.head(1))
-            print(sample, "file is processed !\n")
-            #print(df)
-            # Compare df's header df against what is expected
-            compare_headers(expected_headers, df.columns.values, sample)
-            #print(df.columns.values)
-            # Add a new colunm to identify the csv file (sample) where the df comes from
-            df['Sample_ID'] = sample
-    except pd.errors.EmptyDataError:
-        errors.append(f'\nEmpty data error in {sample} file. Removing from analysis...')
-        print(f'\nEmpty data error in {sample} file. Removing from analysis...')
-        ls_samples.remove(sample)
-    # Add df to dfs
-    dfs[sample] = df
-print(dfs)
-dfs.values()
-# Merge dfs into one df
-df = pd.concat(dfs.values(), ignore_index=False , sort = False)
-del dfs
-merge = True
-merged_dataframe = df
-df.head()
-# Set index to Sample_ID + cell number :
-# create a new custom index for df based on the sample names and integer cell numbers, and then remove the temporary columns 'level_0' and 'index' that were introduced during the operations
-# Creates a copy of the DataFrame df and resets its index without creating a new column for the old index
-# This essentially removes the old index column and replaces it with a default integer index
-df = df.copy().reset_index(drop=True)
-#print(df)
-# Initializing an empty list index to store the new index labels for the DataFrame
-index = []
-for sample in ls_samples:
-    # Extract a chunk of data from the original df where the 'Sample_ID' column matches the current sample name
-    # This chunk is stored in the df_chunk df, which is a subset of the original data for that specific sample
-    df_chunk = df.loc[df['Sample_ID'] == sample,:].copy()
-    old_index = df_chunk.index
-    # Reset the index of the df_chunk df, removing the old index and replacing it with a default integer index
-    df_chunk = df_chunk.reset_index(drop=True)
-    # A new index is created for the df_chunk df. It combines the sample name with 'Cell_' and the integer index values, converting them to strings
-    # This new index will have labels like 'SampleName_Cell_0', 'SampleName_Cell_1', and so on.
-    sample = sample.split('.')[0]
-    df_chunk = df_chunk.set_index(f'{sample}_Cell_' + df_chunk.index.astype(str))
-    # The index values of df_chunk are then added to the index list
-    index = index + df_chunk.index.values.tolist()
-# After processing all the samples in the loop, assign the index list as the new index of the original df.
-df.index =  index
-# Remove the 'level_0' and 'index' columns from df
-df = df.loc[:,~df.columns.isin(['level_0','index'])]
-assigned_new_index = True
-df.head()
-# ### I.3.2. NOT_INTENSITIES
-# not_intensities is the list of the columns unrelated to the markers fluorescence intensities
-# Can include items that aren't in a given header.
-#not_intensitiehttp://localhost:8888/lab/tree/Downloads/wetransfer_data-zip_2024-05-17_1431/1_qc_eda.ipynb
-#I.3.2.-NOT_INTENSITIESs = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
-#                   'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
-#                  'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']
-# not_intensities is the list of the columns unrelated to the markers fluorescence intensities
-# Can include items that aren't in a given header.
-#not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
-#                   'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
-#                   'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']
-# Get all column names
-all_columns = df.columns.tolist()
-# Create a list to store non-intensity column names
-not_intensities = []
-intensity_columns = []
-# Iterate over each column name
-for column in all_columns:
-    # Check if the column name contains 'Intensity_Average'
-    if 'Intensity_Average' not in column:
-        print(not_intensities)
-        not_intensities.append(column)
-    else:
-        intensity_columns.append(column)
-# Create a new DataFrame with non-intensity columns
-not_intensities_df = pd.DataFrame(not_intensities)
-print("Non-intensity columns:")
-print(not_intensities)
-print("non-intensity DataFrame:")
-not_intensities
-#print(len(intensity_columns))
-pd.DataFrame(not_intensities)
-path_not_intensities = os.path.join(metadata_dir,"not_intensities.csv")
-# If this file already exists, add only not_intensities items of the list not already present in file
-if os.path.exists(path_not_intensities):
-    print("'not_intensities.csv' already exists.")
-    print("Reconciling file and Jupyter notebook lists.")
-    file_not_intensities = open(path_not_intensities, "r")
-    file_ni = file_not_intensities.read().splitlines()
-    # Set difference to identify items not already in file
-    to_add = set(not_intensities) - set(file_ni)
-    # We want not_intensities to the a complete list
-    not_intensities = list(set(file_ni) | set(not_intensities))
-    file_not_intensities.close()
-    file_not_intensities = open(path_not_intensities, "a")
-    for item in to_add:
-        file_not_intensities.write(item +"\n")
-    file_not_intensities.close()
-else:
-    # The file does not yet exist
-    print("Could not find " + path_not_intensities + ". Creating now.")
-    file_not_intensities = open(path_not_intensities, "w")
-    for item in not_intensities:
-        file_not_intensities.write(item + "\n")
-    file_not_intensities.close()
-# In[46]:
-not_intensities_df = pd.read_csv(path_not_intensities)
-not_intensities_df
-# In[47]:
-# Columns we want to keep: not_intensities, and any intensity column that contains 'Intensity_Average' (drop any intensity marker column that is not a mean intensity)
-to_keep = not_intensities + [x for x in df.columns.values[~df.columns.isin(not_intensities)] if 'Intensity_Average' in x]
-to_keep
-# In[48]:
-print(len(to_keep) - 1)
-# In[49]:
-# However, our to_keep list contains items that might not be in our df headers!
-# These items are from our not_intensities list. So let's ask for only those items from to_keep that are actually found in our df
-# Retains only the columns from the to_keep list that are found in the df's headers (columns).
-# This ensures that we are only keeping the columns that exist in your df, avoiding any potential issues with non-existent column names.
-# The result is a df containing only the specified columns.
-df = df[[x for x in to_keep if x in df.columns.values]]
-df.head()
-# In[50]:
-import pandas as pd
-# Assuming you have a DataFrame named 'df'
-# df = pd.read_csv('your_file.csv')
-# Get all column names
-all_columns = df.columns.tolist()
-# Create an empty list to store intensity markers
-intensity_marker = []
-# Iterate over each column name
-for column in all_columns:
-    # Check if the column name contains 'Intensity_Average'
-    if 'Intensity_Average' in column:
-        # Split the column name by underscore
-        parts = column.split('_')
-        # Extract the word before the first underscore
-        marker = parts[0]
-        # Add the marker to the intensity_marker list
-        intensity_marker.append(marker)
-# Remove duplicates from the intensity_marker list
-intensity_marker = list(set(intensity_marker))
-print("Intensity Markers:")
-print(intensity_marker)
-# Create a callback function to update the intensities array
-def update_intensities(event):
-    global intensities
-    global intensities_df
-    new_intensities = []
-    selected_columns = []
-    for marker, cell, cytoplasm, nucleus in zip(marker_options_df['Marker'], marker_options_df['Cell'], marker_options_df['Cytoplasm'], marker_options_df['Nucleus']):
-        if cell:
-            new_intensities.append(f"{marker}_Cell_Intensity_Average")
-            selected_columns.append(f"{marker}_Cell_Intensity_Average")
-        if cytoplasm:
-            new_intensities.append(f"{marker}_Cytoplasm_Intensity_Average")
-            selected_columns.append(f"{marker}_Cytoplasm_Intensity_Average")
-        if nucleus:
-            new_intensities.append(f"{marker}_Nucleus_Intensity_Average")
-            selected_columns.append(f"{marker}_Nucleus_Intensity_Average")
-    intensities = new_intensities
-    if selected_columns:
-        intensities_df = merged_dataframe[selected_columns]
-    else:
-        intensities_df = pd.DataFrame()
-    print("Updated intensities DataFrame:")
-    print(intensities_df)
-# In[54]:
-tabulator_formatters = {
-    'bool': {'type': 'tickCross'}
-}
-# Create a DataFrame with the intensity markers and default values
-marker_options_df = pd.DataFrame({
-    'Marker': intensity_marker,
-    'Cell': [False] * len(intensity_marker),
-    'Cytoplasm': [False] * len(intensity_marker),
-    'Nucleus': [False] * len(intensity_marker)
-})
-# Create the Tabulator widget and link the callback function
-tabulator = pn.widgets.Tabulator(marker_options_df, formatters=tabulator_formatters, sizing_mode='stretch_width')
-tabulator.param.watch(update_intensities,'value')
-# Create a Panel layout with the Tabulator widget
-marker_options_layout = pn.Column(tabulator, sizing_mode="stretch_width")
-import panel as pn
-import pandas as pd
-import random
-import asyncio
-# Initialize the Panel extension with Tabulator
-pn.extension('tabulator')
-# Create a DataFrame with the intensity markers and default values
-marker_options_df = pd.DataFrame({
-    'Marker': intensity_marker,
-    'Cell': [True] * len(intensity_marker),
-    'Cytoplasm': [False] * len(intensity_marker),
-    'Nucleus': [False] * len(intensity_marker)
-})
-# Define formatters for the Tabulator widget
-tabulator_formatters = {
-    'Cell': {'type': 'tickCross'},
-    'Cytoplasm': {'type': 'tickCross'},
-    'Nucleus': {'type': 'tickCross'}
-}
-# Create the Tabulator widget
-tabulator = pn.widgets.Tabulator(marker_options_df, formatters=tabulator_formatters, sizing_mode='stretch_width')
-# Create a DataFrame to store the initial intensities
-new_data = [{'Description': f"{marker}_Cell_Intensity_Average"} for marker in intensity_marker if True]
-new_data_df = pd.DataFrame(new_data)
-# Create a widget to display the new data as a DataFrame
-new_data_table = pn.widgets.Tabulator(new_data_df, name='New Data Table', sizing_mode='stretch_width')
-# Create a button to start the update process
-run_button = pn.widgets.Button(name="Save Selection", button_type='primary')
-# Define the update_intensities function
-def update_intensities():
-    global new_data, new_data_df
-    new_data = []
-    for _, row in tabulator.value.iterrows():
-        marker = row['Marker']
-        if row['Cell']:
-            new_data.append({'Description': f"{marker}_Cell_Intensity_Average"})
-        if row['Cytoplasm']:
-            new_data.append({'Description': f"{marker}_Cytoplasm_Intensity_Average"})
-        if row['Nucleus']:
-            new_data.append({'Description': f"{marker}_Nucleus_Intensity_Average"})
-    new_data_df = pd.DataFrame(new_data)
-    new_data_table.value = new_data_df
-# Define the runner function
-async def runner(event):
-    update_intensities()
-# Bind the runner function to the button
-run_button.on_click(runner)
-# Layout
-updated_intensities = pn.Column(tabulator, run_button, new_data_table, sizing_mode="stretch_width")
-pn.extension()
-# Serve the layout
-#updated_intensities.servable()
-intensities_df = new_data_table
-intensities_df
-intensities_df = pn.pane.DataFrame(intensities_df)
-intensities_df
-print(intensities_df)
-# ## I.4. QC CHECKS
-def quality_check_results(check_shape, check_no_null,check_zero_intensities):
-    results = [
-        f"Check Index: {check_index}",
-        f"Check Shape: {check_shape}",
-        f"Check No Null: {check_no_null}",
-        f"Check Zero Intensities: {check_zero_intensities}"
-    ]
-    return pn.Column(*[pn.Row(result) for result in results], sizing_mode="stretch_width")
-print(ls_samples)
-def check_index_format(index_str, ls_samples):
-    """
-    Checks if the given index string follows the specified format.
-    Args:
-        index_str (str): The index string to be checked.
-        ls_samples (list): A list of valid sample names.
-    Returns:
-        bool: True if the index string follows the format, False otherwise.
-    """
-    # Split the index string into parts
-    parts = index_str.split('_')
-    # Check if there are exactly 3 parts
-    if len(parts) != 3:
-        print(len(parts))
-        return False
-    # Check if the first part is in ls_samples
-    sample_name = parts[0]
-    if f'{sample_name}.csv' not in ls_samples:
-        print(sample_name)
-        return False
-    # Check if the second part is in ['cell', 'cytoplasm', 'nucleus']
-    location = parts[1]
-    valid_locations = ['Cell', 'Cytoplasm', 'Nucleus']
-    if location not in valid_locations:
-        print(location)
-        return False
-    # Check if the third part is a number
-    try:
-        index = int(parts[2])
-    except ValueError:
-        print(index)
-        return False
-    # If all checks pass, return True
-    return True
-# In[70]:
-# Let's take a look at a few features to make sure our dataframe is as expected
-df.index
-def check_format_ofindex(index):
-    for index in df.index:
-        check_index = check_index_format(index, ls_samples)
-        if check_index is False:
-            index_format = "Bad"
-            return index_format
-    index_format = "Good"
-    return index_format
-print(check_format_ofindex(df.index))
-# In[71]:
-df.shape
-check_index = df.index
-check_shape = df.shape
-print(check_shape)
-# In[72]:
-# Check for NaN entries (should not be any unless columns do not align)
-# False means no NaN entries
-# True means NaN entries
-df.isnull().any().any()
-check_no_null = df.isnull().any().any()
-# In[73]:
-# Check that all expected files were imported into final dataframe
-if sorted(df.Sample_ID.unique()) == sorted(ls_samples):
-    print("All expected filenames are present in big df Sample_ID column.")
-    check_all_expected_files_present = "All expected filenames are present in big df Sample_ID column."
-else:
-    compare_headers(['no samples'], df.Sample_ID.unique(), "big df Sample_ID column")
-    check_all_expected_files_present = compare_headers(['no samples'], df.Sample_ID.unique(), "big df Sample_ID column")
-print(df.Sample_ID)
-# In[74]:
-# Delete rows that have 0 value mean intensities for intensity columns
-print("df.shape before removing 0 mean values: ", df.shape)
-# We use the apply method on df to calculate the mean intensity for each row. It's done this by applying a lambda function to each row.
-# The lambda function excludes the columns listed in the not_intensities list (which are not to be considered for mean intensity calculations)
-# and calculates the mean of the remaining values in each row.
-###############################
-# !! This may take a while !! #
-###############################
-# Calculate mean intensity excluding 'not_intensities' columns
-mean_intensity = df.loc[:, ~df.columns.isin(not_intensities)].mean(axis=1)
-# Check if there are any 0 mean intensity values
-if (mean_intensity == 0).any():
-    df = df.loc[mean_intensity > 0, :]
-    print("Shape after removing 0 mean values: ", df.shape)
-    check_zero_intensities = f'df.shape after removing 0 mean values: {df.shape}'
-else:
-    print("No zero intensity values.")
-    check_zero_intensities = " No zero intensity values found in the DataFrame."
-# Get quantiles (5th, 50th, 95th)
-# List of nucleus size percentiles to extract
-#qs = [0.05,0.50,0.95]
-#df["Nucleus_Size"].quantile(q=qs)
-quality_control_df = df
-quality_control_df.head()
-# Function to perform quality checks
-def perform_quality_checks(df, ls_samples, not_intensities):
-    results = {}
-    errors = []
-    # Check index
-    results['index'] = df.index
-    # Check shape
-    results['shape'] = df.shape
-    # Check for NaN entries
-    results['nan_entries'] = df.isnull().any().any()
-    # Remove rows with 0 mean intensity values
-    mean_intensity = df.loc[:, ~df.columns.isin(not_intensities)].mean(axis=1)
-    if (mean_intensity == 0).any():
-        df = df.loc[mean_intensity > 0, :]
-        results['zero_intensity_removal'] = f"Zero intensity entires are found and removed. Shape after removing: {df.shape}"
-    else:
-        results['zero_intensity_removal'] = "No zero intensity values found in the DataFrame."
-    return results
-# Example usage of the function
-quality_check_results = perform_quality_checks(df, ls_samples, not_intensities)
-# Print results
-for key, value in quality_check_results.items():
-    print(f"{key}: {value}")
-# In[80]:
-import panel as pn
-import pandas as pd
-def quality_check(file, not_intensities):
-    # Load the output file
-    df = file
-    # Check Index
-    check_index = check_format_ofindex(df.index)
-    # Check Shape
-    check_shape = df.shape
-    # Check for NaN entries
-    check_no_null = df.isnull().any().any()
-    mean_intensity = df.loc[:, ~df.columns.isin(not_intensities)].mean(axis=1)
-    if (mean_intensity == 0).any():
-        df = df.loc[mean_intensity > 0, :]
-        print("df.shape after removing 0 mean values: ", df.shape)
-        check_zero_intensities = f'df.shape after removing 0 mean values: {df.shape}'
-    else:
-        print("No zero intensity values found in the DataFrame.")
-        check_zero_intensities = "No zero intensities."
-    # Create a quality check results table
-    quality_check_results_table = pd.DataFrame({
-        'Check': ['Index', 'Shape', 'Check for NaN Entries', 'Check for Zero Intensities'],
-        'Result': [str(check_index), str(check_shape), str(check_no_null), check_zero_intensities]
-    })
-    # Create a quality check results component
-    quality_check_results_component = pn.Card(
-        pn.pane.DataFrame(quality_check_results_table),
-        title="Quality Control Results",
-        header_background="#2196f3",
-        header_color="white",
-    )
-    return quality_check_results_component
-quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99, step=0.01, value=0.05)
-# Function to calculate quantile values
-def calculate_quantiles(quantile):
-    quantile_value_intensity = df["AF555_Cell_Intensity_Average"].quantile(q=[quantile, 0.50, 1 - quantile])
-    return quantile_value_intensity
-# Function to create the Panel app
-def create_app(quantile = quantile_slider.param.value):
-    quantiles = calculate_quantiles(quantile)
-    output = pd.DataFrame(quantiles)
-    # Create a Markdown widget to display the output
-    output_widget = pn.pane.DataFrame(output)
-    return output_widget
-# Bind the create_app function to the quantile slider
-quantile_output_app = pn.bind(create_app, quantile_slider.param.value)
-#pn.Column(quantile_slider,quantile_output_app).servable()
-# Function to create the line graph plot using Bokeh
-def create_line_graph2(quantile):
-    # Calculate histogram
-    hist, edges = np.histogram(df['Nucleus_Size'], bins=30)
-    # Calculate the midpoints of bins for plotting
-    midpoints = (edges[:-1] + edges[1:]) / 2
-    # Calculate quantiles
-    qs = [quantile, 0.50, 1.00 - quantile]
-    quantiles = df['Nucleus_Size'].quantile(q=qs).values
-    # Create Bokeh line graph plot
-    p = figure(title='Frequency vs. Nucleus_Size',
-               x_axis_label='Nucleus_Size',
-               y_axis_label='Frequency',
-               width=800, height=400)
-    # Plotting histogram
-    p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
-           fill_color='skyblue', line_color='black', alpha=0.6)
-    # Plotting line graph
-    p.line(midpoints, hist, line_width=2, color='blue', alpha=0.7)
-    # Add quantile lines
-    for q in quantiles:
-        span = Span(location=q, dimension='height', line_color='red', line_dash='dashed', line_width=2)
-        p.add_layout(span)
-        p.add_layout(Label(x=q, y=max(hist), text=f'{q:.1f}', text_color='red'))
-    return p
-# Bind the create_line_graph function to the quantile slider
-nucleus_size_line_graph_with_histogram = pn.bind(create_line_graph2, quantile=quantile_slider.param.value)
-# Clean the 'Nucleus_Size' column by removing NaN and infinite values
-df = df[np.isfinite(df['Nucleus_Size'])]  # This will keep only finite values
-# Check if the DataFrame is not empty after cleaning
-if df.empty:
-    raise ValueError("No valid data available after cleaning.")
-else:
-    # Calculate the histogram
-    hist, edges = np.histogram(df['Nucleus_Size'], bins=30)
-    print("Histogram calculated successfully.")
-    print("Histogram:", hist)
-    print("Edges:", edges)
-    plot1 = pn.Column(quantile_slider, pn.pane.Bokeh(nucleus_size_line_graph_with_histogram))
-#Removing cells based on nucleus size
-quantile = quantile_slider.value
-qs = [quantile, 0.50, 1.00 - quantile]
-quantiles = df['Nucleus_Size'].quantile(q=qs).values
-threshold = quantiles[2]
-# In[89]:
-print(threshold)
-# In[90]:
-import panel as pn
-import pandas as pd
-import numpy as np
-from bokeh.plotting import figure
-from bokeh.models import Span, Label
-# Define the quantile slider
-#quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99, step=0.01, value=0.05)
-# Function to update the threshold and display number of cells removed
-def update_threshold_and_display(quantile):
-    qs = [quantile, 0.50, 1.00 - quantile]
-    quantiles = df['Nucleus_Size'].quantile(q=qs).values
-    threshold = quantiles[2]
-    # Filter the DataFrame based on the new threshold
-    df_filtered = df.loc[(df['Nucleus_Size'] > 42) & (df['Nucleus_Size'] < threshold)]
-    # Calculate the number of cells removed
-    cells_before_filter = df.shape[0]
-    cells_after_filter = df_filtered.shape[0]
-    cells_removed = cells_before_filter - cells_after_filter
-    # Display the results
-    results = pn.Column(
-        f"Number of cells before filtering: {cells_before_filter}",
-        f"Number of cells after filtering on nucleus size: {cells_after_filter}",
-        f"Number of cells removed: {cells_removed}"
-    )
-    return results
-# Bind the update function to the quantile slider
-results_display = pn.bind(update_threshold_and_display, quantile_slider)
-# Layout the components in a Panel app
-layout2 = results_display
-# In[91]:
-print("Number of cells before filtering :", df.shape[0])
-cells_before_filter = f"Number of cells before filtering :{df.shape[0]}"
-# Delete small cells and objects w/high AF555 Signal (RBCs)
-# We usually use the 95th percentile calculated during QC_EDA
-df = df.loc[(df['Nucleus_Size'] > 42 )]
-df = df.loc[(df['Nucleus_Size'] < threshold)]
-cells_after_filter_nucleus_shape = df.shape[0]
-print("Number of cells after filtering on nucleus size:", df.shape[0])
-df = df.loc[(df['AF555_Cell_Intensity_Average'] < 2000)]
-print("Number of cells after filtering on AF555A ___ intensity:", df.shape[0])
-cells_after_filter_intensity_shape = df.shape[0]
-cells_after_filter_nucleus = f"Number of cells after filtering on nucleus size: {cells_after_filter_nucleus_shape}"
-cells_after_filter_intensity = f"Number of cells after filtering on AF555A ___ intensity: {cells_after_filter_intensity_shape}"
-num_of_cell_removal_intensity = cells_after_filter_intensity
-print(num_of_cell_removal_intensity )
-num_of_cell_removal = pn.Column(cells_before_filter, cells_after_filter_nucleus)
-# Assuming you have a DataFrame 'df' with the intensity columns
-intensities = df.filter(like='Intensity').columns.tolist()
-# Create a ColumnDataSource from the DataFrame
-source = ColumnDataSource(df)
-# Function to calculate quantile values
-def calculate_quantiles(column, quantile):
-    quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile]).values
-    return quantiles
-# Create the dropdown menu
-column_dropdown = pn.widgets.Select(name='Select Column', options=intensities)
-quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99, step=0.01, value=0.05)
-# Function to create the Bokeh plot
-def create_intensity_plot(column, quantile):
-    quantiles = calculate_quantiles(column, quantile)
-    hist, edges = np.histogram(df[column], bins = 30)
-    # Calculate the midpoints of bins for plotting
-    midpoints = (edges[:-1] + edges[1:]) / 2
-    # Create Bokeh plot
-    p = figure(title=f'Distribution of {column} with Quantiles',
-               x_axis_label=f'{column} Values',
-               y_axis_label='Frequency',
-               width=800, height=400)
-    p.quad(top=hist, bottom=0, left=edges[:-1], right= edges[1:],
-           fill_color='skyblue', line_color='black', alpha=0.7)
-    # Plotting line graph
-    p.line(midpoints, hist, line_width=2, color='blue', alpha=0.7)
-    # Add quantile lines
-    for q in quantiles:
-        span = Span(location=q, dimension='height', line_color='red', line_dash='dashed', line_width=2)
-        p.add_layout(span)
-        p.add_layout(Label(x=q, y=max(hist), text=f'{q:.1f}', text_color='red'))
-    return p
-# Bind the create_plot function to the quantile slider, column dropdown, and button click
-marker_intensity_with_histogram = pn.bind(create_intensity_plot,column_dropdown.param.value, quantile_slider.param.value, watch=True)
-# Create the button
-generate_plot_button = Button(label='Generate Plot', button_type='primary')
-def update_plot(column, quantile):
-    plot = create_intensity_plot(column, quantile)
-    plot.renderers[0].data_source = source  # Update the data source for the renderer
-    return plot
-#Display the dropdown menu, quantile slider, button, and plot
-#plot = update_plot(column_dropdown.param.value, quantile_slider.param.value)
-def generate_plot(event):
-    updated_plot = update_plot(column_dropdown.param.value, quantile_slider.param.value)
-    #pn.Column(pn.Row(column_dropdown, generate_plot_button), quantile_slider, updated_plot).servable()
-generate_plot_button.on_click(generate_plot)
-selected_marker_plot = pn.Column(pn.Row(pn.Column(column_dropdown, marker_intensity_with_histogram )))
-#pn.Column(pn.Row(pn.Column(column_dropdown, marker_intensity_with_histogram ), generate_plot_button)).servable()
-import panel as pn
-import numpy as np
-import pandas as pd
-from bokeh.plotting import figure
-from bokeh.models import ColumnDataSource, Button, Span, Label
-# Assuming you have a DataFrame 'df' with the intensity columns
-intensities = df.filter(like='Intensity').columns.tolist()
-# Create a ColumnDataSource from the DataFrame
-source = ColumnDataSource(df)
-# Function to calculate quantile values
-def calculate_quantiles(column, quantile):
-    quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
-    return quantiles
-# In[105]:
-quantile_slider = pn.widgets.FloatSlider(name='Quantile', start=0.01, end=0.99, step=0.01, value=0.05)
-# Bind the create_line_graph function to the quantile slider
-#nucleus_size_line_graph = pn.bind(create_line_graph, quantile=quantile_slider.param.value)
-# Layout the components in a Panel app
-#nucleus_size_graph = pn.Column(nucleus_size_line_graph)
-# In[106]:
-#df["CKs_Cytoplasm_Intensity_Average"].quantile(q=qs)
-# In[107]:
-len(intensities)
-if 'CKs_Cytoplasm_Intensity_Average' in intensities:
-    print(1)
-# In[108]:
-df
-# In[109]:
-def calculate_cytoplasm_quantiles(column, quantile):
-    # Print the columns of the DataFrame
-    print("DataFrame columns:", df.columns)
-    # Check if the column exists in the DataFrame
-    if column not in df.columns:
-        raise KeyError(f"Column '{column}' does not exist in the DataFrame.")
-    quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
-    return quantiles
-def create_cytoplasm_intensity_df(column, quantile):
-    quantiles = calculate_cytoplasm_quantiles(column, quantile)
-    output = pd.DataFrame(quantiles)
-    return pn.pane.DataFrame(output)
-# Bind the create_app function to the quantile slider
-cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column='CKs_Cytoplasm_Intensity_Average', quantile=quantile_slider.param.value)
-pn.Column(quantile_slider, cytoplasm_quantile_output_app)
-# In[110]:
-def calculate_cytoplasm_quantiles(column, quantile):
-    quantiles = df[column].quantile(q=[quantile, 0.50, 1 - quantile])
-    return quantiles
-def create_cytoplasm_intensity_df(column, quantile):
-    quantiles = calculate_cytoplasm_quantiles(column, quantile)
-    output = pd.DataFrame(quantiles)
-    # Create a Dataframe widget to display the output
-    output_widget = pn.pane.DataFrame(output)
-    return output_widget
-# Bind the create_app function to the quantile slider
-cytoplasm_quantile_output_app = pn.bind(create_cytoplasm_intensity_df, column='CKs_Cytoplasm_Intensity_Average', quantile = quantile_slider.param.value)
-pn.Column(quantile_slider,cytoplasm_quantile_output_app)
-# ## I.5. COLUMNS OF INTERESTS
-# In[111]:
-# Remove columns containing "DAPI"
-df = df[[x for x in df.columns.values if 'DAPI' not in x]]
-print("Columns are now...")
-print([c for c in df.columns.values])
-# In[112]:
-# Create lists of full names and shortened names to use in plotting
-full_to_short_names, short_to_full_names =  \
-    shorten_feature_names(df.columns.values[~df.columns.isin(not_intensities)])
-short_to_full_names
-# In[113]:
-# Save this data to a metadata file
-filename = os.path.join(metadata_dir, "full_to_short_column_names.csv")
-fh = open(filename, "w")
-fh.write("full_name,short_name\n")
-for k,v in full_to_short_names.items():
-    fh.write(k + "," + v + "\n")
-fh.close()
-print("The full_to_short_column_names.csv file was created !")
-# In[114]:
-# Save this data to a metadata file
-filename = os.path.join(metadata_dir, "short_to_full_column_names.csv")
-fh = open(filename, "w")
-fh.write("short_name,full_name\n")
-for k,v in short_to_full_names.items():
-    fh.write(k + "," + v + "\n")
-fh.close()
-print("The short_to_full_column_names.csv file was created !")
-# ## I.6. EXPOSURE TIME
-# In[115]:
-#import the ashlar analysis file
-file_path = os.path.join(metadata_dir, 'combined_metadata.csv')
-ashlar_analysis = pd.read_csv(file_path)
-ashlar_analysis
-# In[116]:
-# Extracting and renaming columns
-new_df = ashlar_analysis[['Name', 'Cycle', 'ChannelIndex', 'ExposureTime']].copy()
-new_df.rename(columns={
-    'Name': 'Target',
-    'Cycle': 'Round',
-    'ChannelIndex': 'Channel'
-}, inplace=True)
-# Applying suffixes to the columns
-new_df['Round'] = 'R' + new_df['Round'].astype(str)
-new_df['Channel'] = 'c' + new_df['Channel'].astype(str)
-# Save to CSV
-new_df.to_csv('Ashlar_Exposure_Time.csv', index=False)
-# Print the new dataframe
-print(new_df)
-# In[117]:
-# Here, we want to end up with a data structure that incorporates metadata on each intensity marker column used in our big dataframe in an easy-to-use format.
-# This is going to include the full name of the intensity marker columns in the big data frame,
-# the corresponding round and channel,
-# the target protein (e.g., CD45),
-# and the segmentation localization information (cell, cytoplasm, nucleus)
-# We can use this data structure to assign unique colors to all channels and rounds, for example, for use in later visualizations
-# Exposure_time file from ASHLAR analysis
-filename = "Exposure_Time.csv"
-filename = os.path.join(metadata_dir, filename)
-exp_df = pd.read_csv(filename)
-print(exp_df)
-# Verify file imported correctly
-# File length
-print("df's shape: ", exp_df.shape)
-# Headers
-expected_headers =['Round','Target','Exp','Channel']
-compare_headers(expected_headers, exp_df.columns.values, "Imported metadata file")
-# Missingness
-if exp_df.isnull().any().any():
-    print("\nexp_df has null value(s) in row(s):")
-    print(exp_df[exp_df.isna().any(axis=1)])
-else:
-    print("\nNo null values detected.")
-# In[118]:
-if len(exp_df['Target']) > len(exp_df['Target'].unique()):
-    print("One or more non-unique Target values in exp_df. Currently not supported.")
-exp_df = exp_df.drop_duplicates(subset = 'Target').reindex()
-# In[119]:
-# sort exp_df by the values in the 'Target' column in ascending order and then retrieve the first few rows of the sorted df
-exp_df.sort_values(by = ['Target']).head()
-# In[120]:
-# Create lowercase version of target
-exp_df['target_lower'] = exp_df['Target'].str.lower()
-exp_df.head()
-# In[121]:
-# Create df that contains marker intensity columns in our df that aren't in not_intensities
-intensities = pd.DataFrame({'full_column':df.columns.values[~df.columns.isin(not_intensities)]})
-intensities
-# In[122]:
-# Extract the marker information from the `full_column`, which corresponds to full column in big dataframe
-# Use regular expressions (regex) to isolate the part of the field that begins (^) with an alphanumeric value (W), and ends with an underscore (_)
-# '$' is end of line
-intensities['marker'] = intensities['full_column'].str.extract(r'([^\W_]+)')
-# convert to lowercase
-intensities['marker_lower'] = intensities['marker'].str.lower()
-intensities
-# In[123]:
-# Subset the intensities df to exclude any column pertaining to DAPI
-intensities = intensities.loc[intensities['marker_lower'] != 'dapi']
-intensities.head()
-# In[124]:
-# Merge the intensities andexp_df together to create metadata
-metadata = pd.merge(exp_df, intensities, how = 'left', left_on = 'target_lower',right_on = 'marker_lower')
-metadata = metadata.drop(columns = ['marker_lower'])
-metadata = metadata.dropna()
-# Target is the capitalization from the Exposure_Time.csv
-# target_lower is Target in small caps
-# marker is the extracted first component of the full column in segmentation data, with corresponding capitalization
-metadata
-# In[125]:
-# Add a column to signify marker target localisation.
-# Use a lambda to determine segmented location of intensity marker column and update metadata accordingly
-# Using the add_metadata_location() function in my_modules.py
-metadata['localisation'] = metadata.apply(
-    lambda row: add_metadata_location(row), axis = 1)
-# In[126]:
-mlid = metadata
-# In[127]:
-# Save this data structure to the metadata folder
-# don't want to add color in because that's better off treating color the same for round, channel, and sample
-filename = "marker_intensity_metadata.csv"
-filename = os.path.join(metadata_dir, filename)
-metadata.to_csv(filename, index = False)
-print("The marker_intensity_metadata.csv file was created !")
-# ## I.7. COLORS WORKFLOW
-# ### I.7.1. CHANNELS COLORS
-# we want colors that are categorical, since Channel is a non-ordered category (yes, they are numbered, but arbitrarily).
-# A categorical color palette will have dissimilar colors.
-# Get those unique colors
-if len(metadata.Channel.unique()) > 10:
-    print("WARNING: There are more unique channel values than \
-    there are colors to choose from. Select different palette, e.g., \
-    continuous palette 'husl'.")
-channel_color_values = sb.color_palette("bright",n_colors = len(metadata.Channel.unique()))
-# chose 'colorblind' because it is categorical and we're unlikely to have > 10
-# You can customize the colors for each channel here
-custom_colors = {
-    'c2': 'lightgreen',
-    'c3': 'tomato',
-    'c4': 'pink',
-    'c5': 'turquoise'
-}
-custom_colors_values = sb.palplot(sb.color_palette([custom_colors.get(ch, 'blue') for ch in metadata.Channel.unique()]))
-# Display those unique customs colors
-print("Unique channels are:", metadata.Channel.unique())
-sb.palplot(sb.color_palette(channel_color_values))
-# In[131]:
-# Function to create a palette plot with custom colors
-def create_palette_plot():
-    # Get unique channels
-    unique_channels = metadata.Channel.unique()
-    # Define custom colors for each channel
-    custom_colors = {
-        'c2': 'lightgreen',
-        'c3': 'tomato',
-        'c4': 'pink',
-        'c5': 'turquoise'
-    }
-    # Get custom colors for each channel
-    colors = [custom_colors.get(ch, 'blue') for ch in unique_channels]
-    # Create a palette plot (palplot)
-    palette_plot = sb.palplot(sb.color_palette(colors))
-    channel_color_values = sb.color_palette("bright",n_colors = len(metadata.Channel.unique()))
-    channel_color_values = sb.palplot(channel_color_values)
-    return palette_plot, channel_color_values
-# Create the palette plot directly
-palette_plot = create_palette_plot()
-# Define the Panel app layout
-app_palette_plot = pn.Column(
-    pn.pane.Markdown("### Custom Color Palette"),
-    palette_plot,
-)
-# Function to create a palette plot with custom colors
-def create_palette_plot(custom_colors):
-    # Get unique channels
-    unique_channels = metadata.Channel.unique()
-    # Get custom colors for each channel
-    colors = [custom_colors.get(ch, 'blue') for ch in unique_channels]
-    # Create a palette plot (palplot)
-    palette_plot = sb.palplot(sb.color_palette(colors))
-    return palette_plot
-# Define custom colors for each channel
-custom_colors = {
-    'c2': 'lightgreen',
-    'c3': 'tomato',
-    'c4': 'pink',
-    'c5': 'turquoise'
-}
-# Display those unique customs colo
-print("Unique channels are:", metadata.Channel.unique())
-# Function to bind create_palette_plot
-app_palette_plot = create_palette_plot(custom_colors)
-#app_palette_plot.servable()
-# In[133]:
-# Store in a dictionary
-channel_color_dict = dict(zip(metadata.Channel.unique(), channel_color_values))
-channel_color_dict
-for k,v in channel_color_dict.items():
-    channel_color_dict[k] = np.float64(v)
-channel_color_dict
-# In[134]:
-color_df_channel = color_dict_to_df(channel_color_dict, "Channel")
-# Save to file in metadatadirectory
-filename = "channel_color_data.csv"
-filename = os.path.join(metadata_dir, filename)
-color_df_channel.to_csv(filename, index = False)
-color_df_channel
-# In[135]:
-# Legend of channel info only
-g  = plt.figure(figsize = (1,1)).add_subplot(111)
-g.axis('off')
-handles = []
-for item in channel_color_dict.keys():
-        h = g.bar(0,0, color = channel_color_dict[item],
-                  label = item, linewidth =0)
-        handles.append(h)
-first_legend = plt.legend(handles=handles, loc='upper right', title = 'Channel'),
-                            # box_to_anchor=(10,10),
-                             #       bbox_transform=plt.gcf().transFigure)
-filename = "Channel_legend.png"
-filename = os.path.join(metadata_images_dir, filename)
-plt.savefig(filename, bbox_inches = 'tight')
-# ### I.7.2. ROUNDS COLORS
-# we want colors that are sequential, since Round is an ordered category.
-# We can still generate colors that are easy to distinguish. Also, many of the categorical palettes cap at at about 10 or so unique colors, and repeat from there.
-# We do not want any repeats!
-round_color_values = sb.cubehelix_palette(
-    len(metadata.Round.unique()), start=1, rot= -0.75, dark=0.19, light=.85, reverse=True)
-# round_color_values = sb.color_palette("cubehelix",n_colors = len(metadata.Round.unique()))
-# chose 'cubehelix' because it is sequential, and round is a continuous process
-# each color value is a tuple of three values: (R, G, B)
-print(metadata.Round.unique())
-sb.palplot(sb.color_palette(round_color_values))
-## TO-DO: write what these parameters mean
-# In[137]:
-# Store in a dictionary
-round_color_dict = dict(zip(metadata.Round.unique(), round_color_values))
-for k,v in round_color_dict.items():
-    round_color_dict[k] = np.float64(v)
-round_color_dict
-# In[138]:
-color_df_round = color_dict_to_df(round_color_dict, "Round")
-# Save to file in metadatadirectory
-filename = "round_color_data.csv"
-filename = os.path.join(metadata_dir, filename)
-color_df_round.to_csv(filename, index = False)
-color_df_round
-# Legend of round info only
-round_legend  = plt.figure(figsize = (1,1)).add_subplot(111)
-round_legend.axis('off')
-handles = []
-for item in round_color_dict.keys():
-        h = round_legend.bar(0,0, color = round_color_dict[item],
-                  label = item, linewidth =0)
-        handles.append(h)
-first_legend = plt.legend(handles=handles, loc='upper right', title = 'Round'),
-                            # bbox_to_anchor=(10,10),
-                             #       bbox_transform=plt.gcf().transFigure)
-filename = "Round_legend.png"
-filename = os.path.join(metadata_images_dir, filename)
-plt.savefig(filename, bbox_inches = 'tight')
-# ### I.7.3. SAMPLES COLORS
-# In[140]:
-# we want colors that are neither sequential nor categorical.
-# Categorical would be ideal if we could generate an arbitrary number of colors, but I do not think that we can.
-# Hense, we will choose `n` colors from a continuous palette. First we will generate the right number of colors. Later, we will assign TMA samples to gray.
-# Get those unique colors
-color_values = sb.color_palette("husl",n_colors = len(ls_samples))#'HLS'
-# each color value is a tuple of three values: (R, G, B)
-# Display those unique colors
-sb.palplot(sb.color_palette(color_values))
-# In[141]:
-TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]
-TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray")
-sb.palplot(sb.color_palette(TMA_color_values))
-# In[142]:
-# Store in a dictionary
-color_dict = dict()
-color_dict = dict(zip(df.Sample_ID.unique(), color_values))
-# Replace all TMA samples' colors with gray
-i = 0
-for key in color_dict.keys():
-    if 'TMA' in key:
-        color_dict[key] = TMA_color_values[i]
-        i +=1
-color_dict
-color_df_sample = color_dict_to_df(color_dict, "Sample_ID")
-# Save to file in metadatadirectory
-filename = "sample_color_data.csv"
-filename = os.path.join(metadata_dir, filename)
-color_df_sample.to_csv(filename, index = False)
-color_df_sample
-# Legend of sample info only
-g  = plt.figure(figsize = (1,1)).add_subplot(111)
-g.axis('off')
-handles = []
-for item in color_dict.keys():
-        h = g.bar(0,0, color = color_dict[item],
-                  label = item, linewidth =0)
-        handles.append(h)
-first_legend = plt.legend(handles=handles, loc='upper right', title = 'Sample')
-filename = "Sample_legend.png"
-filename = os.path.join(metadata_images_dir, filename)
-plt.savefig(filename, bbox_inches = 'tight')
-# ### I.7.4. CLUSTERS COLORS
-'''if 'cluster' in df.columns:
-    cluster_color_values = sb.color_palette("hls",n_colors = len(df.cluster.unique()))
-    #print(sorted(test_df.cluster.unique()))
-    # Display those unique colors
-    sb.palplot(sb.color_palette(cluster_color_values))
-    cluster_color_dict = dict(zip(sorted(test_df.cluster.unique()), cluster_color_values))
-    print(cluster_color_dict)
-    # Create dataframe
-    cluster_color_df = color_dict_to_df(cluster_color_dict, "cluster")
-    cluster_color_df.head()
-    # Save to file in metadatadirectory
-    filename = "cluster_color_data.csv"
-    filename = os.path.join(metadata_dir, filename)
-    cluster_color_df.to_csv(filename, index = False)
-# Legend of cluster info only
-if 'cluster' in df.columns:
-    g  = plt.figure(figsize = (1,1)).add_subplot(111)
-    g.axis('off')
-    handles = []
-    for item in sorted(cluster_color_dict.keys()):
-            h = g.bar(0,0, color = cluster_color_dict[item],
-                      label = item, linewidth =0)
-            handles.append(h)
-    first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cluster'),
-    filename = "Clustertype_legend.png"
-    filename = os.path.join(metadata_images_dir, filename)
-    plt.savefig(filename, bbox_inches = 'tight')'''
-mlid.head()
-metadata
-import io
-import panel as pn
-pn.extension()
-file_input = pn.widgets.FileInput()
-file_input
-def transform_data(variable, window, sigma):
-    """Calculates the rolling average and identifies outliers"""
-    avg = metadata[variable].rolling(window=window).mean()
-    residual = metadata[variable] - avg
-    std = residual.rolling(window=window).std()
-    outliers = np.abs(residual) > std * sigma
-    return avg, avg[outliers]
-def get_plot(variable="Exp", window=30, sigma=10):
-    """Plots the rolling average and the outliers"""
-    avg, highlight = transform_data(variable, window, sigma)
-    return avg.hvplot(
-        height=300, legend=False,
-    ) * highlight.hvplot.scatter(padding=0.1, legend=False)
-variable_widget = pn.widgets.Select(name="Target", value="Exp", options=list(metadata.columns))
-window_widget = pn.widgets.IntSlider(name="window", value=30, start=1, end=60)
-sigma_widget = pn.widgets.IntSlider(name="sigma", value=10, start=0, end=20)
-app = pn.template.GoldenTemplate(
-    site="Cyc-IF",
-    title="Quality Control",
-    main=[
-        pn.Tabs(
-            ("Dataframes", pn.Column(
-                pn.Row(csv_files_button,pn.bind(handle_click, csv_files_button.param.clicks)),
-                pn.pane.Markdown("### The Dataframe uploaded:"), pn.pane.DataFrame(intial_dataframe),
-                #pn.pane.Markdown("### The Exposure time DataFrame is :"), pn.pane.DataFrame(exp_df.head()),
-                pn.pane.Markdown("### The DataFrame after merging CycIF data x metadata :"), pn.pane.DataFrame(merged_dataframe.head()),
-            )),
-            ("Quality Control", pn.Column(
-                quality_check(quality_control_df, not_intensities)
-                #pn.pane.Markdown("### The Quality check results are:"), quality_check_results(check_shape, check_no_null, check_all_expected_files_present, check_zero_intensities)
-            )),
-            ("Intensities", pn.Column(
-                pn.pane.Markdown("### The Not Intensities DataFrame after processing is :"), pn.pane.DataFrame(not_intensities_df, height=250),
-                pn.pane.Markdown("### Select Intensities to be included"), updated_intensities,
-                #pn.pane.Markdown("### The Intensities DataFrame"), intensities_df,
-                #pn.pane.Markdown("### The metadata obtained that specifies the localisation:"), pn.pane.DataFrame(mlid.head())
-            )),
-            ("Plots", pn.Column(
-                #pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(nucleus_size_line_graph_with_histogram, num_of_cell_removal),
-                #pn.pane.Markdown(" ### Nucleus Size Distribution: "), pn.Row(plot1,layout2),
-                #pn.pane.Markdown("### Nucleus Distribution Plot:"), pn.Column(nucleus_size_plot, nucleus_size_graph),
-                pn.pane.Markdown(" ### Intensity Average Plot:"), pn.Row(selected_marker_plot,num_of_cell_removal_intensity ),
-                #pn.Column(pn.Column(column_dropdown, generate_plot_button), quantile_slider, plot),
-                #pn.pane.Markdown("### Cytoplasm Intensity Plot:"), cytoplasm_intensity_plot,
-                #pn.pane.Markdown("### AF555_Cell_Intensity_Average:"), quantile_output_app,
-                #pn.pane.Markdown("### Distribution of AF555_Cell_Intensity_Average with Quantiles:"), quantile_intensity_plot)
-            )),
-),
-    ])
-app.servable()
-if __name__ == "__main__":
-    pn.serve(app, port=5007)