Spaces:

LaboLabrie
/

CycIF

Sleeping

App Files Files

xet

Community

CycIF / Background_Substraction.py

KashyapiNagaHarshitha

Update Background_Substraction.py

0028dcd verified over 1 year ago

raw

history blame

31.3 kB

	#!/usr/bin/env python
	# coding: utf-8


	# In[1]:
	import os
	import random
	import re
	import pandas as pd
	import numpy as np
	import seaborn as sb
	import matplotlib.pyplot as plt
	import matplotlib.colors as mplc
	import subprocess
	import warnings
	from scipy import signal
	import plotly.figure_factory as ff
	import plotly
	import plotly.graph_objs as go
	from plotly.offline import download_plotlyjs, plot
	import plotly.express as px
	from my_modules import *
	os.getcwd()
	# In[2]:

	pn.extension()

	#Silence FutureWarnings & UserWarnings
	warnings.filterwarnings('ignore', category= FutureWarning)
	warnings.filterwarnings('ignore', category= UserWarning)


	# ## II.2. *DIRECTORIES

	# In[5]:


	# Set base directory

	##### MAC WORKSTATION #####
	#base_dir = r'/Volumes/LaboLabrie/Projets/OC_TMA_Pejovic/Temp/Zoe/CyCIF_pipeline/'
	###########################

	##### WINDOWS WORKSTATION #####
	#base_dir = r'C:\Users\LaboLabrie\gerz2701\cyCIF-pipeline\Set_B'
	###############################

	##### LOCAL WORKSTATION #####
	#input_path = '/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/'
	#############################

	#set_name = 'Set_A'
	#set_name = 'test'


	#present_dir = os.path.dirname(os.path.realpath(__file__))

	#input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
	#base_dir = input_path
	'''
	# Function to change permissions recursively with error handling
	def change_permissions_recursive(path, mode):
	for root, dirs, files in os.walk(path):
	for dir in dirs:
	try:
	os.chmod(os.path.join(root, dir), mode)
	except Exception as e:
	print(f"An error occurred while changing permissions for directory {os.path.join(root, dir)}: {e}")
	for file in files:
	try:
	os.chmod(os.path.join(root, file), mode)
	except Exception as e:
	print(f"An error occurred while changing permissions for file {os.path.join(root, file)}: {e}")


	change_permissions_recursive(base_dir, 0o777)
	change_permissions_recursive('/code', 0o777)
	'''

	base_dir = '/code/wetransfer_data-zip_2024-05-17_1431'
	set_path = 'test'
	selected_metadata_files = ['Slide_B_DD1s1.one_1.tif.csv', 'Slide_B_DD1s1.one_2.tif.csv']
	ls_samples = ['Ashlar_Exposure_Time.csv', 'new_data.csv', 'DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv']

	set_name = set_path


	# In[7]:


	project_name = set_name # Project name
	step_suffix = 'bs' # Curent part (here part II)
	previous_step_suffix_long = "_qc_eda" # Previous part (here QC/EDA NOTEBOOK)

	# Initial input data directory
	input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)

	# BS output directories
	output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
	# BS images subdirectory
	output_images_dir = os.path.join(output_data_dir,"images")

	# Data and Metadata directories
	# Metadata directories
	metadata_dir = os.path.join(base_dir, project_name + "_metadata")
	# images subdirectory
	metadata_images_dir = os.path.join(metadata_dir,"images")

	# Create directories if they don't already exist
	for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
	if not os.path.exists(d):
	print("Creation of the" , d, "directory...")
	os.makedirs(d)
	else :
	print("The", d, "directory already exists !")

	os.chdir(input_data_dir)


	# In[8]:


	# Verify paths
	print('base_dir :', base_dir)
	print('input_data_dir :', input_data_dir)
	print('output_data_dir :', output_data_dir)
	print('output_images_dir :', output_images_dir)
	print('metadata_dir :', metadata_dir)
	print('metadata_images_dir :', metadata_images_dir)

	# ## II.3. FILES
	#Don't forget to put your data in the projname_data directory !
	# ### II.3.1. METADATA

	# In[9]:


	# Import all metadata we need from the QC/EDA chapter

	# METADATA
	filename = "marker_intensity_metadata.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	if not os.path.exists(filename):
	print("WARNING: Could not find desired file: "+filename)
	else :
	print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	metadata = pd.read_csv(filename)

	# Verify size with verify_line_no() function in my_modules.py
	#verify_line_no(filename, metadata.shape[0] + 1)

	# Verify headers
	exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']
	compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")

	metadata = metadata.dropna()
	metadata.head()

	# ### II.3.2. NOT_INTENSITIES

	# In[10]:


	# NOT_INTENSITIES
	filename = "not_intensities.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	if not os.path.exists(filename):
	print("WARNING: Could not find desired file: "+filename)
	else :
	print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	#not_intensities = []
	with open(filename, 'r') as fh:
	not_intensities = fh.read().strip().split("\n")
	# take str, strip whitespace, split on new line character

	not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
	'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
	'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']

	# Verify size
	print("Verifying data read from file is the correct length...\n")
	verify_line_no(filename, len(not_intensities))

	# Print to console
	print("not_intensities =\n", not_intensities)

	import os
	import pandas as pd

	# Function to compare headers (assuming you have this function defined in your my_modules.py)
	def compare_headers(expected, actual, description):
	missing = [col for col in expected if col not in actual]
	if missing:
	print(f"WARNING: Missing expected columns in {description}: {missing}")
	else:
	print(f"All expected columns are present in {description}.")

	# Get the current script directory
	present_dir = os.path.dirname(os.path.realpath(__file__))

	# Define the input path
	input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
	base_dir = input_path
	set_path = 'test'

	# Project and step names
	project_name = set_path # Project name
	previous_step_suffix_long = "_qc_eda" # Previous part (here QC/EDA NOTEBOOK)

	# Initial input data directory
	input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)

	# Metadata directories
	metadata_dir = os.path.join(base_dir, project_name + "_metadata")
	metadata_images_dir = os.path.join(metadata_dir, "images")

	# Define writable directory
	writable_directory = '/tmp'

	# Check and read metadata file
	filename = "marker_intensity_metadata.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check if the file exists
	if not os.path.exists(filename):
	print("WARNING: Could not find desired file: " + filename)
	else:
	print("The", filename, "file was imported for further analysis!")

	# Open, read in information
	metadata = pd.read_csv(filename)

	# Verify headers
	exp_cols = ['Round', 'Target', 'Channel', 'target_lower', 'full_column', 'marker', 'localisation']
	compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")

	metadata = metadata.dropna()
	print(metadata.head())

	# Example of writing to the writable directory
	output_file_path = os.path.join(writable_directory, 'processed_metadata.csv')
	try:
	metadata.to_csv(output_file_path, index=False)
	print(f"Processed metadata written successfully to {output_file_path}")
	except PermissionError as e:
	print(f"Permission denied: Unable to write the file at {output_file_path}. Error: {e}")
	except Exception as e:
	print(f"An error occurred: {e}")

	# ### II.3.3. FULL_TO_SHORT_COLUMN_NAMES

	# In[11]:


	# FULL_TO_SHORT_COLUMN_NAMES
	filename = "full_to_short_column_names.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	if not os.path.exists(filename):
	print("WARNING: Could not find desired file: " + filename)
	else :
	print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header = 0)

	# Verify size
	print("Verifying data read from file is the correct length...\n")
	#verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]

	# Print information
	print('full_to_short_names =\n',full_to_short_names)


	# ### II.3.4. SHORT_TO_FULL_COLUMN_NAMES

	# In[12]:


	# SHORT_TO_FULL_COLUMN_NAMES
	filename = "short_to_full_column_names.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	if not os.path.exists(filename):
	print("WARNING: Could not find desired file: " + filename)
	else :
	print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header = 0)

	# Verify size
	print("Verifying data read from file is the correct length...\n")
	#verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]

	# Print information
	print('short_to_full_names =\n',short_to_full_names)


	# ### II.3.5. SAMPLES COLORS

	# In[13]:


	# COLORS INFORMATION
	filename = "sample_color_data.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	if not os.path.exists(filename):
	print("WARNING: Could not find desired file: " + filename)
	else :
	print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header = 0)
	df = df.drop(columns = ['hex'])


	# our tuple of float values for rgb, (r, g, b) was read in
	# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
	# substrings and convert them back into floats
	df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)

	# Verify size
	print("Verifying data read from file is the correct length...\n")
	#verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	sample_color_dict = df.set_index('Sample_ID')['rgb'].to_dict()

	# Print information
	print('sample_color_dict =\n',sample_color_dict)
	sample_color_dict = pd.DataFrame.from_dict(sample_color_dict, orient='index', columns=['R', 'G', 'B'])


	# In[14]:


	sample_color_dict


	# ### II.3.6. CHANNELS COLORS

	# In[15]:


	# CHANNELS
	filename = "channel_color_data.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	if not os.path.exists(filename):
	print("WARNING: Could not find desired file: "+filename)
	else :
	print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header = 0)
	df = df.drop(columns = ['hex'])

	# our tuple of float values for rgb, (r, g, b) was read in
	# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
	# substrings and convert them back into floats
	df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)

	# Verify size
	print("Verifying data read from file is the correct length...\n")
	#verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	channel_color_dict = df.set_index('Channel')['rgb'].to_dict()

	# Print information
	print('channel_color_dict =\n',channel_color_dict)
	channel_color_dict = pd.DataFrame.from_dict(channel_color_dict, orient='index', columns=['R', 'G', 'B'])


	# In[16]:


	channel_color_dict


	# ### II.3.7. ROUNDS COLORS

	# In[17]:


	# ROUND
	filename = "round_color_data.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	if not os.path.exists(filename):
	print("WARNING: Could not find desired file: "+filename)
	else :
	print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header = 0)
	df = df.drop(columns = ['hex'])

	# our tuple of float values for rgb, (r, g, b) was read in
	# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
	# substrings and convert them back into floats
	df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)

	# Verify size
	print("Verifying data read from file is the correct length...\n")
	#verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	round_color_dict = df.set_index('Round')['rgb'].to_dict()

	# Print information
	print('round_color_dict =\n',round_color_dict)
	round_color_dict = pd.DataFrame.from_dict(round_color_dict, orient='index', columns=['R', 'G', 'B'])


	# In[18]:


	round_color_dict


	# ### II.3.8. DATA

	# In[19]:


	# DATA
	# List files in the directory
	# Check if the directory exists
	if os.path.exists(input_data_dir):
	ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_qc_eda.csv")]

	print("The following CSV files were detected:")
	print([sample for sample in ls_samples])
	else:
	print(f"The directory {input_data_dir} does not exist.")


	# In[20]:


	# Import all the others files
	dfs = {}

	# Set variable to hold default header values
	# First gather information on expected headers using first file in ls_samples
	# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
	df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
	expected_headers = df.columns.values
	print(expected_headers)

	###############################
	# !! This may take a while !! #
	###############################
	for sample in ls_samples:
	file_path = os.path.join(input_data_dir,sample)

	try:
	# Read the CSV file
	df = pd.read_csv(file_path, index_col=0)
	# Check if the DataFrame is empty, if so, don't continue trying to process df and remove it

	if not df.empty:
	# Reorder the columns to match the expected headers list
	df = df.reindex(columns=expected_headers)
	print(sample, "file is processed !\n")
	#print(df)

	except pd.errors.EmptyDataError:
	print(f'\nEmpty data error in {sample} file. Removing from analysis...')
	ls_samples.remove(sample)

	# Add df to dfs
	dfs[sample] = df

	#print(dfs)


	# In[21]:


	# Merge dfs into one df
	df = pd.concat(dfs.values(), ignore_index=False , sort = False)
	#del dfs
	df.head()


	# In[22]:


	df.shape


	# In[23]:


	# Check for NaN entries (should not be any unless columns do not align)
	# False means no NaN entries
	# True means NaN entries
	df.isnull().any().any()


	# ## II.4. *FILTERING

	# In[24]:


	print("Number of cells before filtering :", df.shape[0])
	cells_before_filter = f"Number of cells before filtering :{df.shape[0]}"


	# In[25]:


	#print(df)


	# In[26]:


	# Delete small cells and objects w/high AF555 Signal (RBCs)
	# We usually use the 95th percentile calculated during QC_EDA
	df = df.loc[(df['Nucleus_Size'] > 42 )]
	df = df.loc[(df['Nucleus_Size'] < 216)]
	print("Number of cells after filtering on nucleus size:", df.shape[0])

	df = df.loc[(df['AF555_Cell_Intensity_Average'] < 2000)]
	print("Number of cells after filtering on AF555A ___ intensity:", df.shape[0])
	cells_after_filter_nucleus = f"Number of cells after filtering on nucleus size: {df.shape[0]}"
	cells_after_filter_intensity = f"Number of cells after filtering on AF555A ___ intensity: {df.shape[0]}"


	# In[27]:


	# Assign cell type
	# Assign tumor cells at each row at first (random assigning here just for development purposes)
	# Generate random values for cell_type column
	random_values = np.random.randint(0, 10, size=len(df))

	# Assign cell type based on random values
	def assign_cell_type(n):
	return np.random.choice(['STROMA','CANCER','IMMUNE','ENDOTHELIAL'])

	df['cell_type'] = np.vectorize(assign_cell_type)(random_values)
	df['cell_subtype'] = df['cell_type'].copy()


	# In[28]:


	filtered_dataframe = df
	df.head()


	# In[29]:


	quality_control_df = filtered_dataframe


	# In[30]:


	def check_index_format(index_str, ls_samples):
	"""
	Checks if the given index string follows the specified format.

	Args:
	index_str (str): The index string to be checked.
	ls_samples (list): A list of valid sample names.

	Returns:
	bool: True if the index string follows the format, False otherwise.
	"""
	# Split the index string into parts
	parts = index_str.split('_')

	# Check if there are exactly 3 parts
	if len(parts) != 3:
	print(len(parts))
	return False

	# Check if the first part is in ls_samples
	sample_name = parts[0]
	if f'{sample_name}_qc_eda.csv' not in ls_samples:
	print(sample_name)
	return False

	# Check if the second part is in ['cell', 'cytoplasm', 'nucleus']
	location = parts[1]
	valid_locations = ['Cell', 'Cytoplasm', 'Nucleus']
	if location not in valid_locations:
	print(location)
	return False

	# Check if the third part is a number
	try:
	index = int(parts[2])
	except ValueError:
	print(index)
	return False

	# If all checks pass, return True
	return True


	# In[31]:


	# Let's take a look at a few features to make sure our dataframe is as expected
	df.index
	def check_format_ofindex(index):
	for index in df.index:
	check_index = check_index_format(index, ls_samples)
	if check_index is False:
	index_format = "Bad"
	return index_format

	index_format = "Good"
	return index_format
	print(check_format_ofindex(df.index))


	# In[32]:


	import panel as pn
	import pandas as pd

	def quality_check(file, not_intensities):
	# Load the output file
	df = file

	# Check Index
	check_index = check_format_ofindex(df.index)

	# Check Shape
	check_shape = df.shape

	# Check for NaN entries
	check_no_null = df.isnull().any().any()

	mean_intensity = df.loc[:, ~df.columns.isin(not_intensities)].mean(axis=1)
	if (mean_intensity == 0).any():
	df = df.loc[mean_intensity > 0, :]
	print("df.shape after removing 0 mean values: ", df.shape)
	check_zero_intensities = f'Shape after removing 0 mean values: {df.shape}'
	else:
	print("No zero intensity values.")
	check_zero_intensities = "No zero intensity values."

	# Create a quality check results table
	quality_check_results_table = pd.DataFrame({
	'Check': ['Index', 'Shape', 'Check for NaN Entries', 'Check for Zero Intensities'],
	'Result': [str(check_index), str(check_shape), str(check_no_null), check_zero_intensities]
	})

	# Create a quality check results component
	quality_check_results_component = pn.Card(
	pn.pane.DataFrame(quality_check_results_table),
	title="Quality Control Results",
	header_background="#2196f3",
	header_color="white",
	)

	return quality_check_results_component


	# ## II.5. CELL TYPES COLORS
	# Establish colors to use throughout workflow

	# we want colors that are categorical, since Cell Type is a non-ordered category.
	# A categorical color palette will have dissimilar colors.
	# Get those unique colors
	cell_types = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']
	color_values = sb.color_palette("hls", n_colors = len(cell_types))
	# each color value is a tuple of three values: (R, G, B)

	print("Unique cell types are:",df.cell_type.unique())
	# Display those unique colors
	sb.palplot(sb.color_palette(color_values))
	# In[33]:


	# Define your custom colors for each cell type
	custom_colors = {
	'CANCER': (0.1333, 0.5451, 0.1333),
	'STROMA': (0.4, 0.4, 0.4),
	'IMMUNE': (1, 1, 0),
	'ENDOTHELIAL': (0.502, 0, 0.502)
	}

	# Retrieve the list of cell types
	cell_types = list(custom_colors.keys())

	# Extract the corresponding colors from the dictionary
	color_values = [custom_colors[cell] for cell in cell_types]

	# Display the colors
	sb.palplot(sb.color_palette(color_values))


	# In[34]:


	# Store in a dctionnary
	celltype_color_dict = dict(zip(cell_types, color_values))
	celltype_color_dict


	# In[35]:


	celltype_color_df = pd.DataFrame.from_dict(celltype_color_dict, orient='index', columns=['R', 'G', 'B'])


	# In[36]:


	# Save color information (mapping and legend) to metadata directory
	# Create dataframe
	celltype_color_df = color_dict_to_df(celltype_color_dict, "cell_type")
	celltype_color_df.head()

	# Save to file in metadatadirectory
	present_dir = os.path.dirname(os.path.realpath(__file__))
	filename = os.path.join(present_dir, "celltype_color_data.csv")
	#filename = "celltype_color_data.csv"
	filename = os.path.join(metadata_dir, filename)
	celltype_color_df.to_csv(filename, index = False)
	print("File" + filename + " was created!")


	# In[37]:


	celltype_color_df.head()


	# In[38]:


	# Legend of cell type info only
	g = plt.figure(figsize = (1,1)).add_subplot(111)
	g.axis('off')
	handles = []
	for item in celltype_color_dict.keys():
	h = g.bar(0,0, color = celltype_color_dict[item],
	label = item, linewidth =0)
	handles.append(h)
	first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),


	filename = "Celltype_legend.png"
	filename = os.path.join(metadata_images_dir, filename)
	plt.savefig(filename, bbox_inches = 'tight')


	# In[39]:


	metadata


	# In[40]:


	df.columns.values


	# In[41]:


	df.shape


	# In[42]:


	metadata.shape


	# ## II.6. *CELL SUBTYPES COLORS

	# In[43]:


	# Establish colors to use throughout workflow

	# we want colors that are categorical, since Cell Type is a non-ordered category.
	# A categorical color palette will have dissimilar colors.
	# Get those unique colors
	cell_subtypes = ['DC','B', 'TCD4','TCD8','M1','M2','Treg', \
	'IMMUNE_OTHER', 'CANCER', 'αSMA_myCAF',\
	'STROMA_OTHER', 'ENDOTHELIAL']
	color_values = sb.color_palette("Paired",n_colors = len(cell_subtypes))
	# each color value is a tuple of three values: (R, G, B)

	print("Unique cell types are:",df.cell_subtype.unique())
	# Display those unique colors
	sb.palplot(sb.color_palette(color_values))


	# In[44]:


	# Store in a dctionnary
	cellsubtype_color_dict = dict(zip(cell_subtypes, color_values))
	cellsubtype_color_dict


	# In[45]:


	cellsubtype_color_df = pd.DataFrame.from_dict(cellsubtype_color_dict, orient='index', columns=['R', 'G', 'B'])


	# In[46]:


	# Save color information (mapping and legend) to metadata directory
	# Create dataframe
	cellsubtype_color_df = color_dict_to_df(cellsubtype_color_dict, "cell_subtype")

	# Save to file in metadatadirectory
	filename = "cellsubtype_color_data.csv"
	filename = os.path.join(metadata_dir, filename)
	cellsubtype_color_df.to_csv(filename, index = False)
	print("File" + filename + " was created!")


	# In[47]:


	cellsubtype_color_df.head()


	# In[48]:


	# Legend of cell type info only
	g = plt.figure(figsize = (1,1)).add_subplot(111)
	g.axis('off')
	handles = []
	for item in cellsubtype_color_dict.keys():
	h = g.bar(0,0, color = cellsubtype_color_dict[item],
	label = item, linewidth =0)
	handles.append(h)
	first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell subtype'),


	filename = "Cellsubtype_legend.png"
	filename = os.path.join(metadata_images_dir, filename)
	plt.savefig(filename, bbox_inches = 'tight')


	# ## II.7. IMMUNE CHECKPOINT COLORS

	# In[49]:


	# Assign IMMUNE SUBTYPES
	df['cell_subtype'] = df['cell_type'].copy()
	df['immune_checkpoint'] = 'none'
	df

	immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'None']
	color_values = sb.color_palette("husl",n_colors=len(immune_checkpoint))
	# each color value is a tuple of three values: (R, G, B)

	print("Unique immune checkpoint are:",df.immune_checkpoint.unique())
	# Display those unique colors
	sb.palplot(sb.color_palette(color_values))
	# In[50]:


	immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'B7H4_PDL1', 'None']

	# Base colors for the primary checkpoints
	base_colors = sb.color_palette("husl", n_colors=3) # Three distinct colors

	# Function to mix two RGB colors
	def mix_colors(color1, color2):
	return tuple((c1 + c2) / 2 for c1, c2 in zip(color1, color2))

	# Generate mixed colors for the combinations of checkpoints
	mixed_colors = [
	mix_colors(base_colors[0], base_colors[1]), # Mix B7H4 and PDL1
	# mix_colors(base_colors[0], base_colors[2]), # Mix B7H4 and PD1
	# mix_colors(base_colors[1], base_colors[2]), # Mix PDL1 and PD1
	tuple(np.mean(base_colors, axis=0)) # Mix B7H4, PDL1, and PD1
	]

	# Adding the color for 'None'
	#none_color = [(0.8, 0.8, 0.8)] # A shade of gray

	# Combine all colors into one list
	color_values = base_colors + mixed_colors #+ none_color

	# Display unique immune checkpoint combinations
	print("Unique immune checkpoint combinations are:", immune_checkpoint)
	# Display the unique colors
	sb.palplot(color_values)


	# In[51]:


	# Store in a dctionnary
	immunecheckpoint_color_dict = dict(zip(immune_checkpoint, color_values))
	immunecheckpoint_color_dict


	# In[52]:


	# Save color information (mapping and legend) to metadata directory
	# Create dataframe
	immunecheckpoint_color_df = color_dict_to_df(immunecheckpoint_color_dict, "immune_checkpoint")
	immunecheckpoint_color_df.head()

	# Save to file in metadatadirectory
	filename = "immunecheckpoint_color_data.csv"
	filename = os.path.join(metadata_dir, filename)
	immunecheckpoint_color_df.to_csv(filename, index = False)
	print("File " + filename + " was created!")


	# In[53]:


	# Legend of cell type info only
	g = plt.figure(figsize = (1,1)).add_subplot(111)
	g.axis('off')
	handles = []
	for item in immunecheckpoint_color_dict.keys():
	h = g.bar(0,0, color = immunecheckpoint_color_dict[item],
	label = item, linewidth =0)
	handles.append(h)
	first_legend = plt.legend(handles=handles, loc='upper right', title = 'Immune checkpoint'),


	filename = "Cellsubtype_legend.png"
	filename = os.path.join(metadata_images_dir, filename)
	plt.savefig(filename, bbox_inches = 'tight')


	# ## II.7. BACKGROUND SUBSTRACTION

	# In[54]:


	def do_background_sub(col, df, metadata):
	#print(col.name)
	location = metadata.loc[metadata['full_column'] == col.name, 'localisation'].values[0]
	#print('location = ' + location)
	channel = metadata.loc[metadata['full_column'] == col.name, 'Channel'].values[0]
	#print('channel = ' + channel)
	af_target = metadata.loc[
	(metadata['Channel']==channel) \
	& (metadata['localisation']==location) \
	& (metadata['target_lower'].str.contains(r'^af\d{3}$')),\
	'full_column'].values[0]
	return col - df.loc[:,af_target]


	# In[55]:


	metadata_with_localisation = metadata
	metadata_with_localisation


	# In[56]:


	#Normalization

	df.loc[:, ~df.columns.isin(not_intensities)] = \
	df.loc[:, ~df.columns.isin(not_intensities)].apply(lambda column: divide_exp_time(column, 'Exp', metadata), axis = 0)


	# In[57]:


	normalization_df = df
	normalization_df.head()


	# In[58]:


	# Do background subtraction
	# this uses a df (metadata) outside of
	# the scope of the lambda...
	# careful that this might break inside of a script...

	df.loc[:,~df.columns.isin(not_intensities)] = \
	df.loc[:,~df.columns.isin(not_intensities)].apply(lambda column: do_background_sub(column, df, metadata),axis = 0)


	# In[59]:


	df
	background_substraction_df = df
	background_substraction_df.head()


	# In[60]:


	# Drop AF columns
	df = df.filter(regex='^(?!AF\d{3}).*')
	print(df.columns.values)


	# In[61]:


	intensities_df = df.loc[:, ~df.columns.isin(not_intensities)]
	intensities_df


	# In[62]:


	normalization_df.head()


	# In[63]:


	metadata_df = metadata_with_localisation
	intensities_df = intensities_df # Assuming you have loaded the intensities DataFrame

	# Create a list of column names from the intensities DataFrame
	column_names = intensities_df.columns.tolist()

	# Create a Select widget for choosing a column
	column_selector = pn.widgets.Select(name='Select Column', options=column_names)

	# Create a Markdown widget to display the selected column's information
	column_info_md = pn.pane.Markdown(name='Column Information', width=400, object='Select a column to view its information.')

	# Define a function to update the column information
	def update_column_info(event):
	selected_column = event.new
	if selected_column:
	# Get the selected column's intensity
	intensity = intensities_df[selected_column].values

	# Get the corresponding channel, localization, and experiment from the metadata
	channel = metadata_df.loc[metadata_df['full_column'] == selected_column, 'Channel'].values[0]
	localization = metadata_df.loc[metadata_df['full_column'] == selected_column, 'localisation'].values[0]
	exposure = metadata_df.loc[metadata_df['full_column'] == selected_column, 'Exp'].values[0]

	# Create a Markdown string with the column information
	column_info_text = f"Intensity: {intensity}\n\nChannel: {channel}\n\nLocalization: {localization}\n\nExposure: {exposure}"

	# Update the Markdown widget with the column information
	column_info_md.object = column_info_text
	else:
	column_info_md.object = 'Select a column to view its information.'

	# Watch for changes in the column selector and update the column information
	column_selector.param.watch(update_column_info, 'value')

	# Create a Panel app and display the widgets
	bs_info = pn.Column(column_selector, column_info_md)
	bs_info

	normalization_df.head()


	# In[65]:


	import panel as pn
	df_widget = pn.widgets.DataFrame(metadata, name="MetaData")
	app2 = pn.template.GoldenTemplate(
	site="Cyc-IF",
	title=" Background-Substraction",
	main=[pn.Tabs(("Background-Substraction",pn.Column(
	#pn.Column(pn.pane.Markdown("### Celltype thresholds"), pn.pane.DataFrame(celltype_color_df)),
	#pn.Column(pn.pane.Markdown("### Cell Subtype thresholds"), pn.pane.DataFrame(cellsubtype_color_df)),
	#pn.Column(pn.pane.Markdown("### Cells Before Filtering"),pn.pane.Str(cells_before_filter)),
	#pn.Column(pn.pane.Markdown("### Cells After Filtering Nucleus"),pn.pane.Str(cells_after_filter_nucleus)),
	#pn.Column(pn.pane.Markdown("### Cells After Filtering Intensity"),pn.pane.Str(cells_after_filter_intensity)),
	#pn.Column(pn.pane.Markdown("### Dataframe after filtering"), pn.pane.DataFrame(filtered_dataframe.head())),
	pn.Column(pn.pane.Markdown("### The metadata obtained that specifies the localisation:"), metadata_with_localisation.head(8)),
	pn.Column(pn.pane.Markdown("### The channels and exposure of each intensities column"), bs_info),
	pn.Column(pn.pane.Markdown("### Dataframe after perfroming normalization"),pn.pane.DataFrame(normalization_df.head(), width = 1500)),
	pn.Column(pn.pane.Markdown("### Dataframe after background Substraction"), pn.pane.DataFrame(background_substraction_df.head()),
	))),
	("Quality Control", pn.Column(
	quality_check(quality_control_df, not_intensities)
	#pn.pane.Markdown("### The Quality check results are:"), quality_check_results(check_shape, check_no_null, check_all_expected_files_present, check_zero_intensities)
	))
	)],)
	app2.servable()