Spaces:

LaboLabrie
/

CycIF

Sleeping

App Files Files Community

KashyapiNagaHarshitha commited on Jun 21, 2024

Commit

e70c547

verified ·

1 Parent(s): fbc6b4f

Upload Z_Score.py

Browse files

Files changed (1) hide show

Z_Score.py +1128 -0

Z_Score.py ADDED Viewed

	@@ -0,0 +1,1128 @@

+#!/usr/bin/env python
+# coding: utf-8
+import os
+import random
+import re
+import pandas as pd
+import numpy as np
+import seaborn as sb
+import matplotlib.pyplot as plt
+import matplotlib.colors as mplc
+import subprocess
+import warnings
+from scipy import signal
+from scipy.stats.stats import pearsonr
+import plotly.figure_factory as ff
+import plotly
+import plotly.graph_objs as go
+from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
+import plotly.express as px
+from my_modules import *
+import panel as pn
+#Silence FutureWarnings & UserWarnings
+warnings.filterwarnings('ignore', category= FutureWarning)
+warnings.filterwarnings('ignore', category= UserWarning)
+# ## III.2. *DIRECTORIES
+# In[4]:
+# Set base directory
+##### MAC WORKSTATION #####
+#base_dir = r'/Volumes/LaboLabrie/Projets/OC_TMA_Pejovic/Temp/Zoe/CyCIF_pipeline/'
+###########################
+##### WINDOWS WORKSTATION #####
+#base_dir = r'C:\Users\LaboLabrie\gerz2701\cyCIF-pipeline\Set_B'
+###############################
+##### LOCAL WORKSTATION #####
+base_dir = r'/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431'
+#############################
+#set_name = 'Set_A'
+set_name = 'test'
+# In[5]:
+base_dir = '/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431'
+set_path = 'test'
+selected_metadata_files = "['Slide_B_DD1s1.one_1.tif.csv', 'Slide_B_DD1s1.one_2.tif.csv']"
+ls_samples = "['Ashlar_Exposure_Time.csv', 'new_data.csv', 'DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv']"
+print(base_dir)
+print(set_path)
+print(ls_samples)
+print(selected_metadata_files)
+project_name = set_name            # Project name
+step_suffix = 'zscore'              # Curent part (here part III)
+previous_step_suffix_long = "_bs"   # Previous part (here BS NOTEBOOK)
+# Initial input data directory
+input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)
+# ZSCORE/LOG2 output directories
+output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
+# ZSCORE/LOG2 images subdirectory
+output_images_dir = os.path.join(output_data_dir,"images")
+# Data and Metadata directories
+# Metadata directories
+metadata_dir = os.path.join(base_dir, project_name + "_metadata")
+# images subdirectory
+metadata_images_dir = os.path.join(metadata_dir,"images")
+# Create directories if they don't already exist
+for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
+    if not os.path.exists(d):
+        print("Creation of the" , d, "directory...")
+        os.makedirs(d)
+    else :
+        print("The", d, "directory already exists !")
+os.chdir(input_data_dir)
+# In[7]:
+# Verify paths
+print('base_dir :', base_dir)
+print('input_data_dir :', input_data_dir)
+print('output_data_dir :', output_data_dir)
+print('output_images_dir :', output_images_dir)
+print('metadata_dir :', metadata_dir)
+print('metadata_images_dir :', metadata_images_dir)
+# ## III.3. FILES
+#Don't forget to put your data in the projname_data directory !
+# ### III.3.1. METADATA
+# In[8]:
+# Import all metadata we need from the BS chapter
+# METADATA
+filename = "marker_intensity_metadata.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+metadata = pd.read_csv(filename)
+# Verify size with verify_line_no() function in my_modules.py
+#verify_line_no(filename, metadata.shape[0] + 1)
+# Verify headers
+exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']
+compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")
+metadata = metadata.dropna()
+metadata.head()
+# ### III.3.2. NOT_INTENSITIES
+# In[9]:
+filename = "not_intensities.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+not_intensities = []
+with open(filename, 'r') as fh:
+    not_intensities = fh.read().strip().split("\n")
+    # take str, strip whitespace, split on new line character
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, len(not_intensities))
+# Print to console
+print("not_intensities =\n", not_intensities)
+pd.DataFrame(not_intensities)
+# ### III.3.3. FULL_TO_SHORT_COLUMN_NAMES
+# In[10]:
+filename = "full_to_short_column_names.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: " + filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]
+# CD45 instead of CD45b
+if project_name == 'Slide_A' :
+    full_to_short_names['CD45_Cytoplasm_Intensity_Average'] = full_to_short_names.pop('CD45b_Cytoplasm_Intensity_Average')
+    full_to_short_names['CD45_Cytoplasm_Intensity_Average'] = 'CD45_Cytoplasm'
+# Print information
+print('full_to_short_names =\n',full_to_short_names)
+# ### III.3.4. SHORT_TO_FULL_COLUMN_NAMES
+# In[11]:
+filename = "short_to_full_column_names.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: " + filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]
+# CD45 instead of CD45b
+if project_name == 'Slide_A' :
+    short_to_full_names['CD45_Cytoplasm'] = short_to_full_names.pop('CD45b_Cytoplasm')
+    short_to_full_names['CD45_Cytoplasm'] = 'CD45_Cytoplasm_Intensity_Average'
+# Print information
+print('short_to_full_names =\n',short_to_full_names)
+# ### III.3.5. SAMPLES COLORS
+# In[12]:
+filename = "sample_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: " + filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+df = df.drop(columns = ['hex'])
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+sample_color_dict = df.set_index('Sample_ID')['rgb']
+# Print information
+print('sample_color_dict =\n',sample_color_dict)
+# ### III.3.6. CHANNELS COLORS
+# In[13]:
+filename = "channel_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+df = df.drop(columns = ['hex'])
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+channel_color_dict = df.set_index('Channel')['rgb']
+# Print information
+print('channel_color_dict =\n',channel_color_dict)
+# ### III.3.7. ROUNDS COLORS
+# In[14]:
+# ROUND
+filename = "round_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+df = df.drop(columns = ['hex'])
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+round_color_dict = df.set_index('Round')['rgb']
+# Print information
+print('round_color_dict =\n',round_color_dict)
+# ### III.3.8. CELL TYPES COLORS
+# In[15]:
+data = pd.read_csv('/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata/celltype_color_data.csv')
+data
+# In[16]:
+filename = "celltype_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+#df = df.drop(columns = ['hex'])
+# Assuming the RGB values are already in separate columns 'R', 'G', 'B'
+if all(col in df.columns for col in ['R', 'G', 'B']):
+    # Create the 'rgb' column as tuples of floats
+    df['rgb'] = list(zip(df['R'], df['G'], df['B']))
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+#df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+cell_type_color_dict = df.set_index('cell_type')['rgb']
+# Print information
+print('cell_type_color_dict =\n',cell_type_color_dict)
+# ### III.3.9. CELL SUBTYPES COLORS
+# In[17]:
+df = pd.read_csv(filename)
+df.head()
+# In[18]:
+filename = "cellsubtype_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else :
+    print("The",filename,"file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header = 0)
+df = df.drop(columns = ['hex'])
+# our tuple of float values for rgb, (r, g, b) was read in
+# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
+# substrings and convert them back into floats
+df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+cell_subtype_color_dict = df.set_index('cell_subtype')['rgb'].to_dict()
+# Print information
+print('cell_subtype_color_dict =\n',cell_subtype_color_dict)
+# In[19]:
+df = pd.read_csv(filename)
+df.head()
+# ### III.3.10. IMMUNE CHECKPOINT COLORS
+# In[20]:
+metadata_dir = "/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/test_metadata"
+filename = "immunecheckpoint_color_data.csv"
+filename = os.path.join(metadata_dir, filename)
+# Check file exists
+if not os.path.exists(filename):
+    print("WARNING: Could not find desired file: "+filename)
+else:
+    print("The", filename, "file was imported for further analysis!")
+# Open, read in information
+df = pd.read_csv(filename, header=0)
+df = df.drop(columns=['hex'])
+# Convert the 'rgb' column from string to tuple
+df['rgb'] = df['rgb'].apply(rgb_tuple_from_str)
+# Verify size
+print("Verifying data read from file is the correct length...\n")
+#verify_line_no(filename, df.shape[0] + 1)
+# Turn into dictionary
+immune_checkpoint_color_dict = df.set_index('immune_checkpoint')['rgb'].to_dict()
+# Print information
+print('immune_checkpoint_color_dict =\n', immune_checkpoint_color_dict)
+immune_checkpoint_color_df = pd.DataFrame(immune_checkpoint_color_dict)
+immune_checkpoint_color_df
+# ### III.3.10. DATA
+# In[21]:
+# DATA
+# List files in the directory
+# Check if the directory exists
+if os.path.exists(input_data_dir):
+    # List files in the directory
+    ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_bs.csv")]
+    print("The following CSV files were detected:")
+    print([sample for sample in ls_samples])
+else:
+    print(f"The directory {input_data_dir} does not exist.")
+# In[22]:
+# Import all the others files
+dfs = {}
+# Set variable to hold default header values
+# First gather information on expected headers using first file in ls_samples
+# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
+df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
+expected_headers = df.columns.values
+#print(expected_headers)
+###############################
+# !! This may take a while !! #
+###############################
+for sample in ls_samples:
+    file_path = os.path.join(input_data_dir,sample)
+    print(file_path)
+    try:
+        # Read the CSV file
+        df = pd.read_csv(file_path, index_col=0)
+        # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it
+        if not df.empty:
+            # Reorder the columns to match the expected headers list
+            df = df.reindex(columns=expected_headers)
+            print(sample, "file is processed !\n")
+            #print(df)
+    except pd.errors.EmptyDataError:
+        print(f'\nEmpty data error in {sample} file. Removing from analysis...')
+        ls_samples.remove(sample)
+    # Add df to dfs
+    dfs[sample] = df
+#print(dfs)
+# In[23]:
+# Merge dfs into one df
+df = pd.concat(dfs.values(), ignore_index=False , sort = False)
+del dfs
+merged_df = df
+# In[24]:
+merged_df
+# In[25]:
+merged_df_shape = df.shape
+# In[26]:
+merged_df_index =df.index
+# In[27]:
+merged_df_col_values = df.columns.values
+# In[28]:
+# Check for NaN entries (should not be any unless columns do not align)
+# False means no NaN entries
+# True means NaN entries
+merged_df_null_values = df.isnull().any().any()
+# In[29]:
+df.isnull().any().any()
+# ## III.4. MARKERS
+# In[30]:
+# Listing all the markers of interest for downstream analyses
+# !!TODO WITH MARILYNE!!
+markers = [
+    '53BP1_Nucleus_Intensity_Average',
+    'AR_Nucleus_Intensity_Average',
+    'CCNB1_Cell_Intensity_Average',
+    'CCND1_Nucleus_Intensity_Average',
+    'CCNE_Nucleus_Intensity_Average',
+    'CD31_Cytoplasm_Intensity_Average',
+    'CKs_Cytoplasm_Intensity_Average',
+    'ERa_Nucleus_Intensity_Average',
+    'Ecad_Cytoplasm_Intensity_Average',
+    'GATA3_Nucleus_Intensity_Average',
+    'H3K27_Nucleus_Intensity_Average',
+    'H3K4me3_Nucleus_Intensity_Average',
+    'HER2_Cytoplasm_Intensity_Average',
+    'HSP90_Cell_Intensity_Average',
+    'Ki67_Nucleus_Intensity_Average',
+    'PAX8_Nucleus_Intensity_Average',
+    'PCNA_Nucleus_Intensity_Average',
+    'PRg_Nucleus_Intensity_Average',
+    'S100b_Cytoplasm_Intensity_Average',
+    'TP53_Cell_Intensity_Average',
+    'Vimentin_Cytoplasm_Intensity_Average',
+    'pAKT_Cytoplasm_Intensity_Average',
+    'pATM_Nucleus_Intensity_Average',
+    'pATR_Nucleus_Intensity_Average',
+    'pERK_Cell_Intensity_Average',
+    'pRB_Nucleus_Intensity_Average',
+    'pS6_Cytoplasm_Intensity_Average',
+    'AXL_Cytoplasm_Intensity_Average',
+    'B7H4_Cell_Intensity_Average',
+    'CD11c_Cytoplasm_Intensity_Average',
+    'CD163_Cytoplasm_Intensity_Average',
+    'CD20_Cytoplasm_Intensity_Average',
+    'CD31_Cytoplasm_Intensity_Average',
+    'CD44_Cytoplasm_Intensity_Average',
+    'CD45_Cytoplasm_Intensity_Average',
+    'CD45b_Cytoplasm_Intensity_Average',
+    'CD4_Cytoplasm_Intensity_Average',
+    'CD68_Cytoplasm_Intensity_Average',
+    'CD8_Cytoplasm_Intensity_Average',
+    'CKs_Cytoplasm_Intensity_Average',
+    'ColVI_Cytoplasm_Intensity_Average',
+    'Desmin_Cytoplasm_Intensity_Average',
+    'Ecad_Cytoplasm_Intensity_Average',
+    'FOXP3_Nucleus_Intensity_Average',
+    'Fibronectin_Cytoplasm_Intensity_Average',
+    'GATA3_Nucleus_Intensity_Average',
+    'HLA_Cytoplasm_Intensity_Average',
+    'Ki67_Nucleus_Intensity_Average',
+    'MMP9_Cytoplasm_Intensity_Average',
+    'PD1_Cytoplasm_Intensity_Average',
+    'PDGFR_Cytoplasm_Intensity_Average',
+    'PDL1_Cytoplasm_Intensity_Average',
+    'Sting_Cytoplasm_Intensity_Average',
+    'Vimentin_Cytoplasm_Intensity_Average',
+    'aSMA_Cytoplasm_Intensity_Average'
+]
+# In[31]:
+# Check if all columns in the markers list are present in the DataFrame
+missing_columns = [col for col in markers if col not in df.columns]
+if missing_columns:
+    # If columns are missing that can be because the markers may be present in the other slide
+    print(f"The following columns are not present in the DataFrame ({len(missing_columns)} columns missing): \n{missing_columns}\n")
+    # Filter the DataFrame to keep only the columns that are in the markers list and also exist in the DataFrame
+    intersected_columns = list(set(markers).intersection(df.columns))
+    df_markers = df[intersected_columns]
+else:
+    # Filter the DataFrame to keep only the columns in the markers list
+    df_markers = df[markers]
+initial_df_marker = df_markers
+df_markers.head()
+# In[32]:
+# Rename CD45b into CD45 (Slide A!)
+if project_name == 'Slide_A' :
+    df_markers.rename(columns={"CD45b_Cytoplasm_Intensity_Average": "CD45_Cytoplasm_Intensity_Average"}, inplace=True)
+df_markers.columns.values
+# In[33]:
+df_markers.shape
+# In[34]:
+min_values = df_markers.min().tolist()
+min_values
+# In[35]:
+# Keep not_intensities and markers columns
+# Combine both lists
+combined_columns = list(set(markers) | set(not_intensities))
+# Filter the DataFrame to keep only the combined columns present in both df and combined_columns
+df_markers_not_intensities = df[df.columns.intersection(combined_columns)]
+# In[36]:
+df_markers_not_intensities
+# In[37]:
+df_markers_not_intensities.shape
+# ## III.5. NORMALISATION
+# In[38]:
+df_markers.min().tolist()
+# In[39]:
+'''# LOG2 TRANFORMATION
+#Values need to be higher than 0 for Log2 transformation.
+print("df_marker.shape before normalisation: ", df_markers.shape)
+df_marker_shape_before_norm = df_markers.shape
+# Option 1
+# This step might not be the best approach because in creates pattern in the data.
+# set anything that is below 0 to 0, so that we can do the log transform, +1 to all columns
+#for f in df_markers.columns[~df_markers.columns.isin(not_intensities)]:
+    #df_markers.loc[df_markers[f] < 0,f] = 0
+#option2
+# Add the min from min values (from above) +1 to all columns
+#df_markers.loc[:, ~df_markers.columns.isin(not_intensities)] = \
+    #df_markers.loc[:,~df_markers.columns.isin(not_intensities)].copy() + 1
+# Add the minimum value + 1 to each column
+# OR'''
+# In[40]:
+min_value = df_markers.min().min()
+print("min value = ", min_value)
+df_markers = df_markers + (np.abs(min_value))
+# +1
+df_markers = df_markers + 1
+df_after_norm = df_markers
+df_marker_shape_after_norm = df_markers.shape
+print("df_markers.shape after normalisation: ", df_markers.shape)
+df_markers.min().tolist()
+# Apply log2
+df_markers.loc[:,~df_markers.columns.isin(not_intensities)] = \
+    np.log2(df_markers.loc[:, ~df_markers.columns.isin(not_intensities)])
+print('log2 transform finished')
+df_markers
+# In[75]:
+#main
+pn.extension()
+not_intensities = []  # Add columns to exclude from transformation if any
+# Define transformation functions
+def modify(df):
+    min_value = df.min().min()
+    df = df + (np.abs(min_value))
+    df = df + 1
+    df.loc[:, ~df.columns.isin(not_intensities)] = np.log2(df.loc[:, ~df.columns.isin(not_intensities)])
+    return df
+def shift(df):
+    df.loc[:, ~df.columns.isin(not_intensities)] = np.log2(df.loc[:, ~df.columns.isin(not_intensities)])
+    return df
+# Define the panel widgets
+operation = pn.widgets.RadioButtonGroup(name='Operation', options=['Modify', 'Shift'], button_type='success')
+# Define a function to update the DataFrame based on the selected operation
+def update_dataframe(operation):
+    df = df_markers.copy()
+    if operation == 'Modify':
+        modified_df = modify(df)
+    elif operation == 'Shift':
+        modified_df = shift(df)
+    return modified_df.head()
+# Create a panel layout
+layout = pn.Column(
+    pn.pane.Markdown("### Data Transformation"),
+    operation,
+    pn.pane.Markdown("### Transformed DataFrame"),
+    pn.bind(lambda op: update_dataframe(op), operation)
+)
+#df_after_norm
+df_markers.columns.tolist()
+# Check for NaN entries (should not be any unless columns do not align)
+# False means no NaN entries
+# True means NaN entries
+df_markers.isnull().any().any()
+count_nan_in_df_markers = df_markers.isnull().sum().sum()
+print(count_nan_in_df_markers)
+# ## III.6. Z-SCORE TRANSFORMATION
+# In[49]:
+# Filter the DataFrame df to keep only the columns specified in the not_intensities list
+#df = df.loc[:, not_intensities]
+#df
+# Check if all columns in the markers list are present in the DataFrame
+missing_columns = [col for col in not_intensities if col not in df.columns]
+if missing_columns:
+    print(f"The following columns are not present in the DataFrame ({len(missing_columns)} columns missing): \
+        \n{missing_columns}")
+    # Filter the DataFrame to keep only the columns that are in the markers list and also exist in the DataFrame
+    intersected_columns = list(set(not_intensities).intersection(df.columns))
+    df = df[intersected_columns]
+else:
+    # Filter the DataFrame to keep only the columns in the markers list
+    df.loc[:, not_intensities]
+df
+# In[50]:
+df
+# In[51]:
+df_merged = df_markers.merge(df, left_index=True, right_on='ID', how='left')
+df_merged
+# In[52]:
+df_merged.columns.tolist()
+# In[53]:
+# Create a copy, just in case you need to restart the kernel
+df_merged_copy = df_merged
+# In[54]:
+# Filters the rows of the DataFrame df_merged based on the values in the 'Sample_ID' column
+# df_subset will contain a subset of rows from df_merged where the 'Sample_ID' matches the values in the list 'keep' ('TMA.csv' in this case)
+keep = ['TMA.csv']
+df_subset = df_merged.loc[df_merged['Sample_ID'].isin(keep),:].copy()
+df_subset
+# In[55]:
+# Convert the DataFrame to numeric, forcing errors to NaN
+df_numeric = df_subset.apply(pd.to_numeric, errors='coerce')
+# Z-score normalization
+# Z-score the rows (apply() with axis = 1, only perform on intensity data)
+# Apply Z-score normalization only on numeric columns
+df_subset.loc[:, ~df_subset.columns.isin(not_intensities)] = \
+    df_numeric.loc[:, ~df_numeric.columns.isin(not_intensities)].apply(
+        lambda row: (row - row.median()) / row.std(ddof=0), axis=1)
+# Drop columns with all NaN values (if any)
+df_subset.dropna(how='all', inplace=True, axis=1)
+print('zscore rows finished')
+###############################
+# !! This may take a while !! #
+###############################
+'''df_subset.loc[:,~df_subset.columns.isin(not_intensities)] = \
+    df_subset.loc[:,~df_subset.columns.isin(not_intensities)].apply(
+        lambda row: (row - row.median())/(row.std(ddof=0)), axis = 1)
+df_subset.dropna(how = 'all', inplace = True, axis = 1)
+print('zscore rows finished')'''
+# In[56]:
+df_subset
+df_numeric = df_merged.apply(pd.to_numeric, errors='coerce')
+# Z-score the rows (apply() with axis = 1, only perform on intensity data)
+###############################
+# !! This may take a while !! #
+###############################
+df_merged.loc[:,~df_merged.columns.isin(not_intensities)] = \
+    df_numeric.loc[:,~df_numeric.columns.isin(not_intensities)].apply(
+        lambda row: (row - row.median())/(row.std(ddof=0)), axis = 1)
+df_merged.dropna(how = 'all', inplace = True, axis = 1)
+print('zscore rows finished')
+'''# Z-score the rows (apply() with axis = 1, only perform on intensity data)
+###############################
+# !! This may take a while !! #
+###############################
+df_merged.loc[:,~df_merged.columns.isin(not_intensities)] = \
+    df_merged.loc[:,~df_merged.columns.isin(not_intensities)].apply(
+        lambda row: (row - row.median())/(row.std(ddof=0)), axis = 1)
+df_merged.dropna(how = 'all', inplace = True, axis = 1)
+print('zscore rows finished')'''
+df_merged
+# In[59]:
+# Ensuring that the selected columns in df have been adjusted or normalized using the median values
+df_merged.loc[:,~df_merged.columns.isin(not_intensities)] = \
+    df_merged.loc[:,~df_merged.columns.isin(not_intensities)] - df_subset.loc[:,~df_subset.columns.isin(not_intensities)].median()
+df_merged
+# In[60]:
+df_merged_zscore = df_merged.loc[:,~df_merged.columns.isin(not_intensities)] = \
+    df_merged.loc[:,~df_merged.columns.isin(not_intensities)] / df_subset.loc[:,~df_subset.columns.isin(not_intensities)].std(ddof=0)
+df_merged_zscore
+# In[61]:
+# Check for NaN entries (should not be any unless columns do not align)
+# False means no NaN entries
+# True means NaN entries
+df.isnull().any().any()
+# In[62]:
+quality_control_df = df_merged_zscore
+# In[63]:
+def check_index_format(index_str, ls_samples):
+    """
+    Checks if the given index string follows the specified format.
+    Args:
+        index_str (str): The index string to be checked.
+        ls_samples (list): A list of valid sample names.
+    Returns:
+        bool: True if the index string follows the format, False otherwise.
+    """
+    # Split the index string into parts
+    parts = index_str.split('_')
+    # Check if there are exactly 3 parts
+    if len(parts) != 3:
+        print(len(parts))
+        return False
+    # Check if the first part is in ls_samples
+    sample_name = parts[0]
+    if f'{sample_name}_bs.csv' not in ls_samples:
+        print(sample_name)
+        return False
+    # Check if the second part is in ['cell', 'cytoplasm', 'nucleus']
+    location = parts[1]
+    valid_locations = ['Cell', 'Cytoplasm', 'Nucleus']
+    if location not in valid_locations:
+        print(location)
+        return False
+    # Check if the third part is a number
+    try:
+        index = int(parts[2])
+    except ValueError:
+        print(index)
+        return False
+    # If all checks pass, return True
+    return True
+# Let's take a look at a few features to make sure our dataframe is as expected
+def check_format_ofindex(index):
+    for index in df.index:
+        check_index = check_index_format(index, ls_samples)
+        if check_index is False:
+            index_format = "Bad"
+            return index_format
+    index_format = "Good"
+    return index_format
+# In[64]:
+import panel as pn
+import pandas as pd
+def quality_check(file, not_intensities):
+    # Load the output file
+    df = file
+    # Check Index
+    check_index = check_format_ofindex(df.index)
+    # Check Shape
+    check_shape = df.shape
+    # Check for NaN entries
+    check_no_null = df.isnull().any().any()
+    mean_intensity = df.loc[:, ~df.columns.isin(not_intensities)].mean(axis=1)
+    if (mean_intensity == 0).any():
+        df = df.loc[mean_intensity > 0, :]
+        print("df.shape after removing 0 mean values: ", df.shape)
+        check_zero_intensities = f'Shape after removing 0 mean values: {df.shape}'
+    else:
+        print("No zero intensity values.")
+        check_zero_intensities = "No zero intensity values."
+    # Create a quality check results table
+    quality_check_results_table = pd.DataFrame({
+        'Check': ['Index', 'Shape', 'Check for NaN Entries', 'Check for Zero Intensities'],
+        'Result': [str(check_index), str(check_shape), str(check_no_null), check_zero_intensities]
+    })
+    # Create a quality check results component
+    quality_check_results_component = pn.Card(
+        pn.pane.DataFrame(quality_check_results_table),
+        title="Quality Control Results",
+        header_background="#2196f3",
+        header_color="white",
+    )
+    return quality_check_results_component
+# In[76]:
+import panel as pn
+# Assuming your DataFrames are already defined as:
+# metadata, merged_df, initial_df_marker, df_markers_not_intensities, df_after_norm,
+# df_markers, df_subset, df_merged_zscore
+# Create widgets and panes
+df_widget = pn.widgets.DataFrame(metadata, name="MetaData")
+# Define the three tabs content
+metadata_tab = pn.Column(
+    pn.pane.Markdown("### Sample Metadata"),
+    pn.pane.DataFrame(metadata.head()),
+    pn.pane.Markdown("### Intial Dataframe"),
+    pn.pane.DataFrame(initial_df_marker.head(), width = 1500),
+    pn.Row(pn.pane.Markdown("### Shape: "), pn.pane.Markdown(str(merged_df.shape))),
+    pn.pane.Markdown("### Merged Dataframe"),
+    pn.pane.DataFrame(merged_df.head(), width = 1500),
+    pn.Row(pn.pane.Markdown("### Shape: "), pn.pane.Markdown(str(initial_df_marker.shape))),
+    pn.pane.Markdown("### Markers and not intensities Dataframe"),
+    pn.pane.DataFrame(df_markers_not_intensities.head(), width = 1500),
+    pn.Row(pn.pane.Markdown("### Shape: "),
+    pn.pane.Markdown(str(df_markers_not_intensities.shape)))
+)
+normalization_tab = pn.Column(
+    #pn.pane.Markdown("### Normalisation performed"),
+    #pn.pane.DataFrame(df_after_norm.head()),
+    #pn.Row(pn.pane.Markdown("### Shape before normalization: ")),
+    #pn.pane.Markdown(str(df_marker_shape_before_norm))),
+    #pn.Row(pn.pane.Markdown("### Shape after normalization: ")),
+    #pn.pane.Markdown(str(df_marker_shape_after_norm))),
+    #pn.pane.Markdown("### Performed log 2 transformation"),
+    #pn.pane.DataFrame(df_markers.head())
+    layout
+)
+zscore_tab = pn.Column(
+    pn.pane.Markdown("### Performed Z-score transformation"),
+    pn.pane.DataFrame(df_subset.head(), width = 1500),
+    pn.pane.Markdown("### Z-score transformation finished"),
+    pn.pane.DataFrame(df_merged_zscore.head(), width = 1500)
+)
+quality_control_tab = pn.Column(
+    pn.pane.Markdown("### Quality Control"),
+    quality_check(quality_control_df, not_intensities)
+)
+# Create the GoldenTemplate
+app3 = pn.template.GoldenTemplate(
+    site="Cyc-IF",
+    title="Z-Score Computation",
+    main=[
+        pn.Tabs(
+            ("Metadata", metadata_tab),
+            ("Normalization", normalization_tab),
+            ("Z-Score", zscore_tab),
+            ("Quality Control", quality_control_tab)
+        )
+    ]
+)
+app3.servable()
+if __name__ == "__main__":
+    pn.serve(app3, port=5007)