Spaces:

LaboLabrie
/

CycIF

Sleeping

App Files Files

xet

Community

KashyapiNagaHarshitha commited on Jun 20, 2024

Commit

554f382

verified ·

1 Parent(s): 8e6dd8c

Upload Background_Substraction.py

Browse files

Files changed (1) hide show

Background_Substraction.py +1084 -0

Background_Substraction.py ADDED Viewed

	@@ -0,0 +1,1084 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+import os
+import random
+import re
+import pandas as pd
+import numpy as np
+import seaborn as sb
+import matplotlib.pyplot as plt
+import matplotlib.colors as mplc
+import subprocess
+import warnings
+from scipy import signal
+import plotly.figure_factory as ff
+import plotly
+import plotly.graph_objs as go
+from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
+import plotly.express as px
+init_notebook_mode(connected = True)
+from my_modules import *
+# In[2]:
+#Silence FutureWarnings & UserWarnings
+warnings.filterwarnings('ignore', category= FutureWarning)
+warnings.filterwarnings('ignore', category= UserWarning)
+# In[3]:
+get_ipython().run_line_magic('store', '-r base_dir')
+get_ipython().run_line_magic('store', '-r set_path')
+get_ipython().run_line_magic('store', '-r ls_samples')
+get_ipython().run_line_magic('store', '-r selected_metadata_files')
+# In[4]:
+print(base_dir)
+print(set_path)
+print(ls_samples)
+print(selected_metadata_files)
+# ## II.2. *DIRECTORIES
+# In[5]:
+# Set base directory
+##### MAC WORKSTATION #####
+#base_dir = r'/Volumes/LaboLabrie/Projets/OC_TMA_Pejovic/Temp/Zoe/CyCIF_pipeline/'
+###########################
+##### WINDOWS WORKSTATION #####
+#base_dir = r'C:\Users\LaboLabrie\gerz2701\cyCIF-pipeline\Set_B'
+###############################
+##### LOCAL WORKSTATION #####
+#base_dir = r'/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/'
+#############################
+#set_name = 'Set_A'
+#set_name = 'test'
+set_name = set_path
+# In[7]:
+project_name = set_name               # Project name
+step_suffix = 'bs'                    # Curent part (here part II)
+previous_step_suffix_long = "_qc_eda" # Previous part (here QC/EDA NOTEBOOK)
+# Initial input data directory
+input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)
+# BS output directories
+output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
+# BS images subdirectory
+output_images_dir = os.path.join(output_data_dir,"images")
+# Data and Metadata directories
+# Metadata directories
+metadata_dir = os.path.join(base_dir, project_name + "_metadata")
+# images subdirectory
+metadata_images_dir = os.path.join(metadata_dir,"images")
+# Create directories if they don't already exist
+for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
+    if not os.path.exists(d):
+        print("Creation of the" , d, "directory...")
+        os.makedirs(d)
+    else :
+        print("The", d, "directory already exists !")
+os.chdir(input_data_dir)
+# In[8]:
+# Verify paths
+print('base_dir :', base_dir)
+print('input_data_dir :', input_data_dir)
+print('output_data_dir :', output_data_dir)
+print('output_images_dir :', output_images_dir)
+print('metadata_dir :', metadata_dir)
+print('metadata_images_dir :', metadata_images_dir)
+# ## II.3. FILES
+#Don't forget to put your data in the projname_data directory !
+# ### II.3.1. METADATA
+# In[9]:
+# Import all metadata we need from the QC/EDA chapter
+# METADATA
+filename = "marker_intensity_metadata.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+metadata = pd.read_csv(filename)
+# Verify size with verify_line_no() function in my_modules.py
+#verify_line_no(filename, metadata.shape[0] + 1)
+# Verify headers
+exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']
+compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")
+metadata = metadata.dropna()
+metadata.head()
+# ### II.3.2. NOT_INTENSITIES
+# In[10]:
+# NOT_INTENSITIES
+filename = "not_intensities.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+#not_intensities = []
+with open(filename, 'r') as fh:
+    not_intensities = fh.read().strip().split("\n")
+    # take str, strip whitespace, split on new line character
+not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
+                   'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
+                   'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+verify_line_no(filename, len(not_intensities))
+# Print to console
+print("not_intensities =\n", not_intensities)
+# ### II.3.3. FULL_TO_SHORT_COLUMN_NAMES
+# In[11]:
+# FULL_TO_SHORT_COLUMN_NAMES
+filename = "full_to_short_column_names.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: " + filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]
+# Print information
+print('full_to_short_names =\n',full_to_short_names)
+# ### II.3.4. SHORT_TO_FULL_COLUMN_NAMES
+# In[12]:
+# SHORT_TO_FULL_COLUMN_NAMES
+filename = "short_to_full_column_names.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: " + filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]
+# Print information
+print('short_to_full_names =\n',short_to_full_names)
+# ### II.3.5. SAMPLES COLORS
+# In[13]:
+# COLORS INFORMATION
+filename = "sample_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: " + filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+df = df.drop(columns = ['hex'])
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+sample_color_dict = df.set_index('Sample_ID')['rgb'].to_dict()
+# Print information
+print('sample_color_dict =\n',sample_color_dict)
+sample_color_dict = pd.DataFrame.from_dict(sample_color_dict, orient='index', columns=['R', 'G', 'B'])
+# In[14]:
+sample_color_dict
+# ### II.3.6. CHANNELS COLORS
+# In[15]:
+# CHANNELS
+filename = "channel_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+df = df.drop(columns = ['hex'])
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+channel_color_dict = df.set_index('Channel')['rgb'].to_dict()
+# Print information
+print('channel_color_dict =\n',channel_color_dict)
+channel_color_dict = pd.DataFrame.from_dict(channel_color_dict, orient='index', columns=['R', 'G', 'B'])
+# In[16]:
+channel_color_dict
+# ### II.3.7. ROUNDS COLORS
+# In[17]:
+# ROUND
+filename = "round_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+df = df.drop(columns = ['hex'])
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+round_color_dict = df.set_index('Round')['rgb'].to_dict()
+# Print information
+print('round_color_dict =\n',round_color_dict)
+round_color_dict = pd.DataFrame.from_dict(round_color_dict, orient='index', columns=['R', 'G', 'B'])
+# In[18]:
+round_color_dict
+# ### II.3.8. DATA
+# In[19]:
+# DATA
+# List files in the directory
+# Check if the directory exists
+if os.path.exists(input_data_dir):
+    ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_qc_eda.csv")]
+    print("The following CSV files were detected:")
+    print([sample for sample in ls_samples])
+else:
+    print(f"The directory {input_data_dir} does not exist.")
+# In[20]:
+# Import all the others files
+dfs = {}
+# Set variable to hold default header values
+# First gather information on expected headers using first file in ls_samples
+# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
+df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
+expected_headers = df.columns.values
+print(expected_headers)
+###############################
+# !! This may take a while !! #
+###############################
+for sample in ls_samples:
+    file_path = os.path.join(input_data_dir,sample)
+    try:
+        # Read the CSV file
+        df = pd.read_csv(file_path, index_col=0)
+        # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it
+        if not df.empty:
+            # Reorder the columns to match the expected headers list
+            df = df.reindex(columns=expected_headers)
+            print(sample, "file is processed !\n")
+            #print(df)
+    except pd.errors.EmptyDataError:
+        print(f'\nEmpty data error in {sample} file. Removing from analysis...')
+        ls_samples.remove(sample)
+    # Add df to dfs
+    dfs[sample] = df
+#print(dfs)
+# In[21]:
+# Merge dfs into one df
+df = pd.concat(dfs.values(), ignore_index=False , sort = False)
+#del dfs
+df.head()
+# In[22]:
+df.shape
+# In[23]:
+# Check for NaN entries (should not be any unless columns do not align)
+# False means no NaN entries
+# True means NaN entries
+df.isnull().any().any()
+# ## II.4. *FILTERING
+# In[24]:
+print("Number of cells before filtering :", df.shape[0])
+cells_before_filter = f"Number of cells before filtering :{df.shape[0]}"
+# In[25]:
+#print(df)
+# In[26]:
+# Delete small cells and objects w/high AF555 Signal (RBCs)
+# We usually use the 95th percentile calculated during QC_EDA
+df = df.loc[(df['Nucleus_Size'] > 42 )]
+df = df.loc[(df['Nucleus_Size'] < 216)]
+print("Number of cells after filtering on nucleus size:", df.shape[0])
+df = df.loc[(df['AF555_Cell_Intensity_Average'] < 2000)]
+print("Number of cells after filtering on AF555A ___ intensity:", df.shape[0])
+cells_after_filter_nucleus = f"Number of cells after filtering on nucleus size: {df.shape[0]}"
+cells_after_filter_intensity = f"Number of cells after filtering on AF555A ___ intensity: {df.shape[0]}"
+# In[27]:
+# Assign cell type
+# Assign tumor cells at each row at first (random assigning here just for development purposes)
+# Generate random values for cell_type column
+random_values = np.random.randint(0, 10, size=len(df))
+# Assign cell type based on random values
+def assign_cell_type(n):
+    return np.random.choice(['STROMA','CANCER','IMMUNE','ENDOTHELIAL'])
+df['cell_type'] = np.vectorize(assign_cell_type)(random_values)
+df['cell_subtype'] = df['cell_type'].copy()
+# In[28]:
+filtered_dataframe =  df
+df.head()
+# In[29]:
+quality_control_df = filtered_dataframe
+# In[30]:
+def check_index_format(index_str, ls_samples):
+    """
+    Checks if the given index string follows the specified format.
+    Args:
+        index_str (str): The index string to be checked.
+        ls_samples (list): A list of valid sample names.
+    Returns:
+        bool: True if the index string follows the format, False otherwise.
+    """
+    # Split the index string into parts
+    parts = index_str.split('_')
+    # Check if there are exactly 3 parts
+    if len(parts) != 3:
+        print(len(parts))
+        return False
+    # Check if the first part is in ls_samples
+    sample_name = parts[0]
+    if f'{sample_name}_qc_eda.csv' not in ls_samples:
+        print(sample_name)
+        return False
+    # Check if the second part is in ['cell', 'cytoplasm', 'nucleus']
+    location = parts[1]
+    valid_locations = ['Cell', 'Cytoplasm', 'Nucleus']
+    if location not in valid_locations:
+        print(location)
+        return False
+    # Check if the third part is a number
+    try:
+        index = int(parts[2])
+    except ValueError:
+        print(index)
+        return False
+    # If all checks pass, return True
+    return True
+# In[31]:
+# Let's take a look at a few features to make sure our dataframe is as expected
+df.index
+def check_format_ofindex(index):
+    for index in df.index:
+        check_index = check_index_format(index, ls_samples)
+        if check_index is False:
+            index_format = "Bad"
+            return index_format
+    index_format = "Good"
+    return index_format
+print(check_format_ofindex(df.index))
+# In[32]:
+import panel as pn
+import pandas as pd
+def quality_check(file, not_intensities):
+    # Load the output file
+    df = file
+    # Check Index
+    check_index = check_format_ofindex(df.index)
+    # Check Shape
+    check_shape = df.shape
+    # Check for NaN entries
+    check_no_null = df.isnull().any().any()
+    mean_intensity = df.loc[:, ~df.columns.isin(not_intensities)].mean(axis=1)
+    if (mean_intensity == 0).any():
+        df = df.loc[mean_intensity > 0, :]
+        print("df.shape after removing 0 mean values: ", df.shape)
+        check_zero_intensities = f'Shape after removing 0 mean values: {df.shape}'
+    else:
+        print("No zero intensity values.")
+        check_zero_intensities = "No zero intensity values."
+    # Create a quality check results table
+    quality_check_results_table = pd.DataFrame({
+        'Check': ['Index', 'Shape', 'Check for NaN Entries', 'Check for Zero Intensities'],
+        'Result': [str(check_index), str(check_shape), str(check_no_null), check_zero_intensities]
+    })
+    # Create a quality check results component
+    quality_check_results_component = pn.Card(
+        pn.pane.DataFrame(quality_check_results_table),
+        title="Quality Control Results",
+        header_background="#2196f3",
+        header_color="white",
+    )
+    return quality_check_results_component
+# ##  II.5. CELL TYPES COLORS
+# Establish colors to use throughout workflow
+# we want colors that are categorical, since Cell Type is a non-ordered category.
+# A categorical color palette will have dissimilar colors.
+# Get those unique colors
+cell_types = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']
+color_values = sb.color_palette("hls", n_colors = len(cell_types))
+# each color value is a tuple of three values: (R, G, B)
+print("Unique cell types are:",df.cell_type.unique())
+# Display those unique colors
+sb.palplot(sb.color_palette(color_values))
+# In[33]:
+# Define your custom colors for each cell type
+custom_colors = {
+    'CANCER': (0.1333, 0.5451, 0.1333),
+    'STROMA': (0.4, 0.4, 0.4),
+    'IMMUNE': (1, 1, 0),
+    'ENDOTHELIAL': (0.502, 0, 0.502)
+}
+# Retrieve the list of cell types
+cell_types = list(custom_colors.keys())
+# Extract the corresponding colors from the dictionary
+color_values = [custom_colors[cell] for cell in cell_types]
+# Display the colors
+sb.palplot(sb.color_palette(color_values))
+# In[34]:
+# Store in a dctionnary
+celltype_color_dict = dict(zip(cell_types, color_values))
+celltype_color_dict
+# In[35]:
+celltype_color_df = pd.DataFrame.from_dict(celltype_color_dict, orient='index', columns=['R', 'G', 'B'])
+# In[36]:
+# Save color information (mapping and legend) to metadata directory
+# Create dataframe
+celltype_color_df = color_dict_to_df(celltype_color_dict, "cell_type")
+celltype_color_df.head()
+# Save to file in metadatadirectory
+filename = "celltype_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+celltype_color_df.to_csv(filename, index = False)
+print("File" + filename + " was created!")
+# In[37]:
+celltype_color_df.head()
+# In[38]:
+# Legend of cell type info only
+g  = plt.figure(figsize = (1,1)).add_subplot(111)
+g.axis('off')
+handles = []
+for item in celltype_color_dict.keys():
+        h = g.bar(0,0, color = celltype_color_dict[item],
+                  label = item, linewidth =0)
+        handles.append(h)
+first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),
+filename = "Celltype_legend.png"
+filename = os.path.join(metadata_images_dir, filename)
+plt.savefig(filename, bbox_inches = 'tight')
+# In[39]:
+metadata
+# In[40]:
+df.columns.values
+# In[41]:
+df.shape
+# In[42]:
+metadata.shape
+# ##  II.6. *CELL SUBTYPES COLORS
+# In[43]:
+# Establish colors to use throughout workflow
+# we want colors that are categorical, since Cell Type is a non-ordered category.
+# A categorical color palette will have dissimilar colors.
+# Get those unique colors
+cell_subtypes = ['DC','B', 'TCD4','TCD8','M1','M2','Treg', \
+                 'IMMUNE_OTHER', 'CANCER', 'αSMA_myCAF',\
+                 'STROMA_OTHER', 'ENDOTHELIAL']
+color_values = sb.color_palette("Paired",n_colors = len(cell_subtypes))
+# each color value is a tuple of three values: (R, G, B)
+print("Unique cell types are:",df.cell_subtype.unique())
+# Display those unique colors
+sb.palplot(sb.color_palette(color_values))
+# In[44]:
+# Store in a dctionnary
+cellsubtype_color_dict = dict(zip(cell_subtypes, color_values))
+cellsubtype_color_dict
+# In[45]:
+cellsubtype_color_df = pd.DataFrame.from_dict(cellsubtype_color_dict, orient='index', columns=['R', 'G', 'B'])
+# In[46]:
+# Save color information (mapping and legend) to metadata directory
+# Create dataframe
+cellsubtype_color_df = color_dict_to_df(cellsubtype_color_dict, "cell_subtype")
+# Save to file in metadatadirectory
+filename = "cellsubtype_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+cellsubtype_color_df.to_csv(filename, index = False)
+print("File" + filename + " was created!")
+# In[47]:
+cellsubtype_color_df.head()
+# In[48]:
+# Legend of cell type info only
+g  = plt.figure(figsize = (1,1)).add_subplot(111)
+g.axis('off')
+handles = []
+for item in cellsubtype_color_dict.keys():
+        h = g.bar(0,0, color = cellsubtype_color_dict[item],
+                  label = item, linewidth =0)
+        handles.append(h)
+first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell subtype'),
+filename = "Cellsubtype_legend.png"
+filename = os.path.join(metadata_images_dir, filename)
+plt.savefig(filename, bbox_inches = 'tight')
+# ## II.7. IMMUNE CHECKPOINT COLORS
+# In[49]:
+# Assign IMMUNE SUBTYPES
+df['cell_subtype'] = df['cell_type'].copy()
+df['immune_checkpoint'] = 'none'
+df
+immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'None']
+color_values = sb.color_palette("husl",n_colors=len(immune_checkpoint))
+# each color value is a tuple of three values: (R, G, B)
+print("Unique immune checkpoint are:",df.immune_checkpoint.unique())
+# Display those unique colors
+sb.palplot(sb.color_palette(color_values))
+# In[50]:
+immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'B7H4_PDL1', 'None']
+# Base colors for the primary checkpoints
+base_colors = sb.color_palette("husl", n_colors=3)  # Three distinct colors
+# Function to mix two RGB colors
+def mix_colors(color1, color2):
+    return tuple((c1 + c2) / 2 for c1, c2 in zip(color1, color2))
+# Generate mixed colors for the combinations of checkpoints
+mixed_colors = [
+    mix_colors(base_colors[0], base_colors[1]),  # Mix B7H4 and PDL1
+#    mix_colors(base_colors[0], base_colors[2]),  # Mix B7H4 and PD1
+#    mix_colors(base_colors[1], base_colors[2]),  # Mix PDL1 and PD1
+    tuple(np.mean(base_colors, axis=0))  # Mix B7H4, PDL1, and PD1
+]
+# Adding the color for 'None'
+#none_color = [(0.8, 0.8, 0.8)]  # A shade of gray
+# Combine all colors into one list
+color_values = base_colors + mixed_colors #+ none_color
+# Display unique immune checkpoint combinations
+print("Unique immune checkpoint combinations are:", immune_checkpoint)
+# Display the unique colors
+sb.palplot(color_values)
+# In[51]:
+# Store in a dctionnary
+immunecheckpoint_color_dict = dict(zip(immune_checkpoint, color_values))
+immunecheckpoint_color_dict
+# In[52]:
+# Save color information (mapping and legend) to metadata directory
+# Create dataframe
+immunecheckpoint_color_df = color_dict_to_df(immunecheckpoint_color_dict, "immune_checkpoint")
+immunecheckpoint_color_df.head()
+# Save to file in metadatadirectory
+filename = "immunecheckpoint_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+immunecheckpoint_color_df.to_csv(filename, index = False)
+print("File " + filename + " was created!")
+# In[53]:
+# Legend of cell type info only
+g  = plt.figure(figsize = (1,1)).add_subplot(111)
+g.axis('off')
+handles = []
+for item in immunecheckpoint_color_dict.keys():
+        h = g.bar(0,0, color = immunecheckpoint_color_dict[item],
+                  label = item, linewidth =0)
+        handles.append(h)
+first_legend = plt.legend(handles=handles, loc='upper right', title = 'Immune checkpoint'),
+filename = "Cellsubtype_legend.png"
+filename = os.path.join(metadata_images_dir, filename)
+plt.savefig(filename, bbox_inches = 'tight')
+# ## II.7. BACKGROUND SUBSTRACTION
+# In[54]:
+def do_background_sub(col, df, metadata):
+    #print(col.name)
+    location = metadata.loc[metadata['full_column'] == col.name, 'localisation'].values[0]
+    #print('location = ' + location)
+    channel = metadata.loc[metadata['full_column'] == col.name, 'Channel'].values[0]
+    #print('channel = ' + channel)
+    af_target = metadata.loc[
+        (metadata['Channel']==channel) \
+        & (metadata['localisation']==location) \
+        & (metadata['target_lower'].str.contains(r'^af\d{3}$')),\
+        'full_column'].values[0]
+    return col - df.loc[:,af_target]
+# In[55]:
+metadata_with_localisation = metadata
+metadata_with_localisation
+# In[56]:
+#Normalization
+df.loc[:, ~df.columns.isin(not_intensities)] = \
+    df.loc[:, ~df.columns.isin(not_intensities)].apply(lambda column: divide_exp_time(column, 'Exp', metadata), axis = 0)
+# In[57]:
+normalization_df = df
+normalization_df.head()
+# In[58]:
+# Do background subtraction
+# this uses a df (metadata) outside of
+# the scope of the lambda...
+# careful that this might break inside of a script...
+df.loc[:,~df.columns.isin(not_intensities)] = \
+    df.loc[:,~df.columns.isin(not_intensities)].apply(lambda column: do_background_sub(column, df, metadata),axis = 0)
+# In[59]:
+df
+background_substraction_df = df
+background_substraction_df.head()
+# In[60]:
+# Drop AF columns
+df = df.filter(regex='^(?!AF\d{3}).*')
+print(df.columns.values)
+# In[61]:
+intensities_df = df.loc[:, ~df.columns.isin(not_intensities)]
+intensities_df
+# In[62]:
+normalization_df.head()
+# In[63]:
+metadata_df = metadata_with_localisation
+intensities_df = intensities_df  # Assuming you have loaded the intensities DataFrame
+# Create a list of column names from the intensities DataFrame
+column_names = intensities_df.columns.tolist()
+# Create a Select widget for choosing a column
+column_selector = pn.widgets.Select(name='Select Column', options=column_names)
+# Create a Markdown widget to display the selected column's information
+column_info_md = pn.pane.Markdown(name='Column Information', width=400, object='Select a column to view its information.')
+# Define a function to update the column information
+def update_column_info(event):
+    selected_column = event.new
+    if selected_column:
+        # Get the selected column's intensity
+        intensity = intensities_df[selected_column].values
+        # Get the corresponding channel, localization, and experiment from the metadata
+        channel = metadata_df.loc[metadata_df['full_column'] == selected_column, 'Channel'].values[0]
+        localization = metadata_df.loc[metadata_df['full_column'] == selected_column, 'localisation'].values[0]
+        exposure = metadata_df.loc[metadata_df['full_column'] == selected_column, 'Exp'].values[0]
+        # Create a Markdown string with the column information
+        column_info_text = f"**Intensity:** {intensity}\n\n**Channel:** {channel}\n\n**Localization:** {localization}\n\n**Exposure:** {exposure}"
+        # Update the Markdown widget with the column information
+        column_info_md.object = column_info_text
+    else:
+        column_info_md.object = 'Select a column to view its information.'
+# Watch for changes in the column selector and update the column information
+column_selector.param.watch(update_column_info, 'value')
+# Create a Panel app and display the widgets
+bs_info = pn.Column(column_selector, column_info_md)
+pn.extension()
+bs_info.servable()
+# In[64]:
+normalization_df.head()
+# In[65]:
+import panel as pn
+df_widget = pn.widgets.DataFrame(metadata, name="MetaData")
+app2 = pn.template.GoldenTemplate(
+    site="Cyc-IF",
+    title=" Background-Substraction",
+    main=[pn.Tabs(("Background-Substraction",pn.Column(
+        #pn.Column(pn.pane.Markdown("### Celltype thresholds"), pn.pane.DataFrame(celltype_color_df)),
+        #pn.Column(pn.pane.Markdown("### Cell Subtype thresholds"), pn.pane.DataFrame(cellsubtype_color_df)),
+        #pn.Column(pn.pane.Markdown("### Cells Before Filtering"),pn.pane.Str(cells_before_filter)),
+        #pn.Column(pn.pane.Markdown("### Cells After Filtering Nucleus"),pn.pane.Str(cells_after_filter_nucleus)),
+        #pn.Column(pn.pane.Markdown("### Cells After Filtering Intensity"),pn.pane.Str(cells_after_filter_intensity)),
+        #pn.Column(pn.pane.Markdown("### Dataframe after filtering"), pn.pane.DataFrame(filtered_dataframe.head())),
+        pn.Column(pn.pane.Markdown("### The metadata obtained that specifies the localisation:"), metadata_with_localisation.head(8)),
+        pn.Column(pn.pane.Markdown("### The channels and exposure of each intensities column"), bs_info),
+        pn.Column(pn.pane.Markdown("### Dataframe after perfroming normalization"),pn.pane.DataFrame(normalization_df.head(), width = 1500)),
+        pn.Column(pn.pane.Markdown("### Dataframe after background Substraction"), pn.Feed(background_substraction_df.head(),),
+    ))),
+     ("Quality Control", pn.Column(
+                quality_check(quality_control_df, not_intensities)
+                #pn.pane.Markdown("### The Quality check results are:"), quality_check_results(check_shape, check_no_null, check_all_expected_files_present, check_zero_intensities)
+            ))
+                 )],)
+# In[66]:
+app2.show(port = 1003)
+# ## II.8. SAVE
+# In[67]:
+# Save the data by Sample_ID
+# Check for the existence of the output file first
+for sample in ls_samples:
+    sample_id = sample.split('_')[0]
+    filename = os.path.join(output_data_dir,  sample_id + "_" + step_suffix + ".csv")
+    if os.path.exists(filename):
+        print("File by name "+filename+" already exists.")
+    else:
+        sample_id_csv = sample_id + '.csv'
+        df_save = df.loc[df['Sample_ID'] == sample_id_csv, :]
+        #print(df_save)
+        filename = os.path.join(output_data_dir,  sample_id + "_" + step_suffix + ".csv")
+        df_save.to_csv(filename, index=True, index_label='ID')  # Set index parameter to True to retain the index column
+        print("File " + filename + " was created!")