Spaces:

LaboLabrie
/

CycIF

Sleeping

App Files Files Community

CycIF / Step5_Marker_Threshold_Classification.py

KashyapiNagaHarshitha

Upload Step5_Marker_Threshold_Classification.py

6372547 verified 12 months ago

raw

history blame contribute delete

55.8 kB

	#!/usr/bin/env python
	# coding: utf-8
	# # IV. MARKERS TRESHOLDS NOTEBOOK
	# ## IV.1. PACKAGES IMPORT

	import os
	import random
	import re
	import pandas as pd
	import numpy as np
	import seaborn as sb
	import matplotlib.pyplot as plt
	import matplotlib.colors as mplc
	import subprocess
	import warnings
	import panel as pn
	import json
	from scipy import signal
	from scipy.stats import pearsonr
	import plotly.figure_factory as ff
	import plotly
	import plotly.graph_objs as go
	from plotly.subplots import make_subplots
	from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
	import plotly.express as px
	import sys
	sys.setrecursionlimit(5000)
	from my_modules import *
	#Silence FutureWarnings & UserWarnings
	warnings.filterwarnings('ignore', category= FutureWarning)
	warnings.filterwarnings('ignore', category= UserWarning)


	# ## IV.2. *DIRECTORIES
	# Set base directory
	#input_path = '/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431'
	#set_path = 'test'
	present_dir = os.path.dirname(os.path.realpath(__file__))
	stored_variables_path = os.path.join(present_dir,'stored_variables.json')
	with open(stored_variables_path, 'r') as file:
	stored_vars = json.load(file)
	directory = stored_vars['base_dir']
	input_path = os.path.join(present_dir,directory)
	set_path = stored_vars['set_path']
	selected_metadata_files = stored_vars['selected_metadata_files']
	ls_samples = stored_vars['ls_samples']
	base_dir = input_path
	set_name = set_path
	project_name = set_name # Project name
	step_suffix = 'mt' # Curent part (here part IV)
	previous_step_suffix_long = "_zscore" # Previous part (here ZSCORE NOTEBOOK)

	# Initial input data directory
	input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)

	# ZSCORE/LOG2 output directories
	output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
	# ZSCORE/LOG2 images subdirectory
	output_images_dir = os.path.join(output_data_dir,"images")

	# Data and Metadata directories
	# Metadata directories
	metadata_dir = os.path.join(base_dir, project_name + "_metadata")
	# images subdirectory
	metadata_images_dir = os.path.join(metadata_dir,"images")

	# Create directories if they don't already exist
	#for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
	# if not os.path.exists(d):
	#print("Creation of the" , d, "directory...")
	# os.makedirs(d)
	#else :
	# print("The", d, "directory already exists !")

	#os.chdir(input_data_dir)


	# Verify paths
	#print('base_dir :', base_dir)
	#print('input_data_dir :', input_data_dir)
	#print('output_data_dir :', output_data_dir)
	#print('output_images_dir :', output_images_dir)
	#print('metadata_dir :', metadata_dir)
	#print('metadata_images_dir :', metadata_images_dir)


	# ## IV.3. FILES

	# ### IV.3.1. METADATA


	filename = "marker_intensity_metadata.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	#if not os.path.exists(filename):
	# print("WARNING: Could not find desired file: "+filename)
	#else :
	# print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	metadata = pd.read_csv(filename)

	# Verify size with verify_line_no() function in my_modules.py
	#verify_line_no(filename, metadata.shape[0] + 1)

	# Verify headers
	exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']
	compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")

	metadata = metadata.dropna()
	metadata.head()


	# ### IV.3.2. NOT_INTENSITIES
	filename = "not_intensities.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	#if not os.path.exists(filename):
	# print("WARNING: Could not find desired file: "+filename)
	#else :
	# print("The",filename,"file was imported for further analysis!")

	not_intensities = []
	with open(filename, 'r') as fh:
	not_intensities = fh.read().strip().split("\n")
	# take str, strip whitespace, split on new line character

	# Verify size
	#print("\nVerifying data read from file is the correct length...\n")
	#verify_line_no(filename, len(not_intensities))

	# Print to console
	#print("not_intensities =\n", not_intensities)


	# ### IV.3.3. FULL_TO_SHORT_COLUMN_NAMES

	filename = "full_to_short_column_names.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	#if not os.path.exists(filename):
	# print("WARNING: Could not find desired file: " + filename)
	#else :
	# print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header = 0)

	# Verify size
	print("Verifying data read from file is the correct length...\n")
	#verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]
	#print('full_to_short_names =\n',full_to_short_names)


	# ### IV.3.4. SHORT_TO_FULL_COLUMN_NAMES


	filename = "short_to_full_column_names.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	#if not os.path.exists(filename):
	# print("WARNING: Could not find desired file: " + filename)
	#else :
	# print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header = 0)

	# Verify size
	#print("Verifying data read from file is the correct length...\n")
	#verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]
	# Print information
	#print('short_to_full_names =\n',short_to_full_names)


	# ### IV.3.10. DATA

	# List files in the directory
	# Check if the directory exists
	if os.path.exists(input_data_dir):
	# List files in the directory
	ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_zscore.csv")]
	# print("The following CSV files were detected:")
	# print([sample for sample in ls_samples])
	#else:
	# print(f"The directory {input_data_dir} does not exist.")
	# Import all the others files
	dfs = {}

	# Set variable to hold default header values
	# First gather information on expected headers using first file in ls_samples
	# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
	df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
	expected_headers = df.columns.values
	#print('Header order should be :\n', expected_headers, '\n')

	###############################
	# !! This may take a while !! #
	###############################
	for sample in ls_samples:
	file_path = os.path.join(input_data_dir,sample)

	try:
	# Read the CSV file
	df = pd.read_csv(file_path, index_col=0)
	# Check if the DataFrame is empty, if so, don't continue trying to process df and remove it

	if not df.empty:
	# Reorder the columns to match the expected headers list
	df = df.reindex(columns=expected_headers)
	# print(sample, "file is processed !\n")
	#print(df)

	except pd.errors.EmptyDataError:
	# print(f'\nEmpty data error in {sample} file. Removing from analysis...')
	ls_samples.remove(sample)

	# Add df to dfs
	dfs[sample] = df

	#print(dfs)

	# Merge dfs into one df
	df = pd.concat(dfs.values(), ignore_index=False , sort = False)
	del dfs

	print(df.head())

	intial_df = pn.pane.DataFrame(df.head(40), width = 2500)


	# ### Marker Classification

	# ## IV.5. *DOTPLOTS

	df
	# Load existing data from stored_variables.json with error handling
	try:
	with open(stored_variables_path, 'r') as file:
	data = json.load(file)
	except json.JSONDecodeError as e:
	# print(f"Error reading JSON file: {e}")
	data = {}

	# Debug: Print loaded data to verify keys
	#print(data)

	df
	df.head()


	# ### IV.7.2. DOTPLOTS-DETERMINED TRESHOLD
	#Empty dict in stored_variables to store the cell type classification for each marker
	#stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
	try:
	with open(stored_variables_path, 'r') as f:
	stored_variables = json.load(f)
	except FileNotFoundError:
	stored_variables = {}

	# Check if 'thresholds' field is present, if not, add it
	if 'cell_type_classification' not in stored_variables:
	cell_type_classification = {}
	stored_variables['cell_type_classification'] = cell_type_classification
	with open(stored_variables_path, 'w') as f:
	json.dump(stored_variables, f, indent=4)

	#Empty dict in stored_variables to store the cell subtype classification for each marker
	#stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
	try:
	with open(stored_variables_path, 'r') as f:
	stored_variables = json.load(f)
	except FileNotFoundError:
	stored_variables = {}

	# Check if 'thresholds' field is present, if not, add it
	if 'cell_subtype_classification' not in stored_variables:
	cell_type_classification = {}
	stored_variables['cell_subtype_classification'] = cell_type_classification
	with open(stored_variables_path, 'w') as f:
	json.dump(stored_variables, f, indent=4)

	df
	data = df


	import json
	import panel as pn

	# Load existing stored variables
	with open(stored_variables_path, 'r') as f:
	stored_variables = json.load(f)

	# Initialize a dictionary to hold threshold inputs
	threshold_inputs = {}

	# Create widgets for each marker to get threshold inputs from the user
	for marker in stored_variables['markers']:
	threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1)

	# Load stored_variables.json
	#stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
	try:
	with open(stored_variables_path, 'r') as f:
	stored_variables = json.load(f)
	except FileNotFoundError:
	stored_variables = {}

	# Check if 'thresholds' field is present, if not, add it
	if 'thresholds' not in stored_variables:
	thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()}
	stored_variables['thresholds'] = thresholds
	with open(stored_variables_path, 'w') as f:
	json.dump(stored_variables, f, indent=4)

	# Save button to save thresholds to stored_variables.json
	def save_thresholds(event):
	thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()}
	stored_variables['thresholds'] = thresholds
	with open(stored_variables_path, 'w') as f:
	json.dump(stored_variables, f, indent=4)
	pn.state.notifications.success('Thresholds saved successfully!')

	save_button2 = pn.widgets.Button(name='Save Thresholds', button_type='primary')
	save_button2.on_click(save_thresholds)

	# Create a GridSpec layout
	grid = pn.GridSpec()

	# Add the widgets to the grid with three per row
	row = 0
	col = 0
	for marker in stored_variables['markers']:
	grid[row, col] = threshold_inputs[marker]
	col += 1
	if col == 5:
	col = 0
	row += 1

	# Add the save button at the end
	grid[row + 1, :5] = save_button2

	# Panel layout
	threshold_panel = pn.Column(
	pn.pane.Markdown("## Define Thresholds for Markers"),
	grid)


	import pandas as pd
	import json

	# Load stored variables from the JSON file
	with open(stored_variables_path, 'r') as file:
	stored_variables = json.load(file)
	# Step 1: Identify intensities
	intensities = list(df.columns)

	def assign_cell_type(row):
	for intensity in intensities:
	marker = intensity.split('_')[0] # Extract marker from intensity name
	if marker in stored_variables['thresholds']:
	threshold = stored_variables['thresholds'][marker]
	if row[intensity] > threshold:
	for cell_type, markers in stored_variables['cell_type_classification'].items():
	if marker in markers:
	return cell_type
	return 'STROMA' # Default if no condition matches

	# Step 5: Apply the classification function to the DataFrame
	df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1)
	df.head()
	# Check if 'IMMUNE' is present in any row of the cell_type column
	present_stroma = df['cell_type'].str.contains('STROMA').sum()
	present_cancer = df['cell_type'].str.contains('CANCER').sum()
	present_immune = df['cell_type'].str.contains('IMMUNE').sum()
	present_endothelial = df['cell_type'].str.contains('ENDOTHELIAL').sum()
	# Print the result
	#print(present_stroma)
	#print(present_cancer)
	#print(present_immune)
	#print(present_endothelial)
	#print(len(df))
	df.head(30)
	df

	# ## IV.8. *HEATMAPS
	#print(df.columns)
	# Assuming df_merged is your DataFrame
	if 'Sample_ID.1' in df.columns:
	df = df.rename(columns={'Sample_ID.1': 'Sample_ID'})
	# print("After renaming Sample_ID", df.columns)
	# Selecting a subset of rows from the DataFrame df based on the 'Sample_ID' column
	# and then randomly choosing 20,000 rows from that subset to create the DataFrame test_dfkeep = ['TMA.csv']
	with open(stored_variables_path, 'r') as file:
	ls_samples = stored_vars['ls_samples']
	keep = ls_samples

	keep_cell_type = ['ENDOTHELIAL','CANCER', 'STROMA', 'IMMUNE']
	#if 'Sample_ID' in df.columns:
	# print("The",df.loc[df['cell_type'].isin(keep_cell_type)])
	test2_df = df.loc[(df['cell_type'].isin(keep_cell_type))
	& (df['Sample_ID'].isin(keep)), :].copy()
	#print(test2_df.head())

	random_rows = np.random.choice(len(test2_df),20000)
	df2 = test2_df.iloc[random_rows,:].copy()

	df2
	#print(df2)


	# ### COLORS

	# #### SAMPLES COLORS
	color_values = sb.color_palette("husl",n_colors = len(ls_samples))
	sb.palplot(sb.color_palette(color_values))

	TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]
	TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray")
	sb.palplot(sb.color_palette(TMA_color_values))

	# Store in a dictionary
	color_dict = dict()
	color_dict = dict(zip(df.Sample_ID.unique(), color_values))

	# Replace all TMA samples' colors with gray
	i = 0
	for key in color_dict.keys():
	if 'TMA' in key:
	color_dict[key] = TMA_color_values[i]
	i +=1

	color_dict

	color_df_sample = color_dict_to_df(color_dict, "Sample_ID")

	# Save to file in metadatadirectory
	filename = "sample_color_data.csv"
	filename = os.path.join(metadata_dir, filename)
	color_df_sample.to_csv(filename, index = False)

	color_df_sample

	# Legend of sample info only
	g = plt.figure(figsize = (1,1)).add_subplot(111)
	g.axis('off')
	handles = []
	for item in color_dict.keys():
	h = g.bar(0,0, color = color_dict[item],
	label = item, linewidth =0)
	handles.append(h)
	first_legend = plt.legend(handles=handles, loc='upper right', title = 'Sample')

	filename = "Sample_legend.png"
	filename = os.path.join(metadata_images_dir, filename)
	plt.savefig(filename, bbox_inches = 'tight')

	filename = "sample_color_data.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	#if not os.path.exists(filename):
	# print("WARNING: Could not find desired file: " + filename)
	#else :
	# print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header = 0)
	df = df.drop(columns = ['hex'])

	# our tuple of float values for rgb, (r, g, b) was read in
	# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
	# substrings and convert them back into floats
	df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)

	# Verify size
	#print("Verifying data read from file is the correct length...\n")
	#verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	sample_color_dict = df.set_index('Sample_ID')['rgb'].to_dict()

	# Print information
	#print('sample_color_dict =\n',sample_color_dict)


	# #### CELL TYPES COLORS

	# Define your custom colors for each cell type
	custom_colors = {
	'CANCER': (0.1333, 0.5451, 0.1333),
	'STROMA': (0.4, 0.4, 0.4),
	'IMMUNE': (1, 1, 0),
	'ENDOTHELIAL': (0.502, 0, 0.502)
	}

	# Retrieve the list of cell types
	cell_types = list(custom_colors.keys())

	# Extract the corresponding colors from the dictionary
	color_values = [custom_colors[cell] for cell in cell_types]

	# Display the colors
	sb.palplot(sb.color_palette(color_values))

	# Store in a dctionnary
	celltype_color_dict = dict(zip(cell_types, color_values))
	celltype_color_dict

	# Save color information (mapping and legend) to metadata directory
	# Create dataframe
	celltype_color_df = color_dict_to_df(celltype_color_dict, "cell_type")
	celltype_color_df.head()

	# Save to file in metadatadirectory
	filename = "celltype_color_data.csv"
	filename = os.path.join(metadata_dir, filename)
	celltype_color_df.to_csv(filename, index = False)
	#print("File" + filename + " was created!")

	# Legend of cell type info only
	g = plt.figure(figsize = (1,1)).add_subplot(111)
	g.axis('off')
	handles = []
	for item in celltype_color_dict.keys():
	h = g.bar(0,0, color = celltype_color_dict[item],
	label = item, linewidth =0)
	handles.append(h)
	first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),


	filename = "Celltype_legend.png"
	filename = os.path.join(metadata_images_dir, filename)
	plt.savefig(filename, bbox_inches = 'tight')

	filename = "celltype_color_data.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	#if not os.path.exists(filename):
	# print("WARNING: Could not find desired file: "+filename)
	#else :
	# print("The",filename,"file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header = 0)
	df = df.drop(columns = ['hex'])

	# our tuple of float values for rgb, (r, g, b) was read in
	# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
	# substrings and convert them back into floats
	df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)

	# Verify size
	#print("Verifying data read from file is the correct length...\n")
	#verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	cell_type_color_dict = df.set_index('cell_type')['rgb'].to_dict()

	# Print information
	#print('cell_type_color_dict =\n',cell_type_color_dict)

	# Colors dictionaries
	sample_row_colors =df2.Sample_ID.map(sample_color_dict)
	#print(sample_row_colors[1:5])

	cell_type_row_colors = df2.cell_type.map(cell_type_color_dict)
	#print(cell_type_row_colors[1:5])


	# ## Cell Subtype Colours
	import pandas as pd
	import os

	def rgb_tuple_from_str(rgb_str):
	# Cleaning the string to remove any unexpected 'np.float64'
	rgb_str = rgb_str.replace("(","").replace(")","").replace(" ","").replace("np.float64", "")
	try:
	rgb = list(map(float, rgb_str.split(",")))
	return tuple(rgb)
	except ValueError as e:
	# print(f"Error converting {rgb_str} to floats: {e}")
	return None # or handle the error as needed

	filename = "cellsubtype_color_data.csv"
	filename = os.path.join(metadata_dir, filename)

	# Check file exists
	#if not os.path.exists(filename):
	# print("WARNING: Could not find desired file: " + filename)
	#else:
	# print("The", filename, "file was imported for further analysis!")

	# Open, read in information
	df = pd.read_csv(filename, header=0)
	df = df.drop(columns=['hex'])

	# Clean the 'rgb' column to remove unexpected strings
	df['rgb'] = df['rgb'].str.replace("np.float64", "", regex=False)

	# Apply the function to convert string to tuple of floats
	df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis=1)

	# Verify size
	#print("Verifying data read from file is the correct length...\n")
	# verify_line_no(filename, df.shape[0] + 1)

	# Turn into dictionary
	cell_subtype_color_dict = df.set_index('cell_subtype')['rgb'].to_dict()

	# Print information
	#print('cell_subtype_color_dict =\n', cell_subtype_color_dict)

	df2

	# Colors dictionaries
	sample_row_colors =df2.Sample_ID.map(sample_color_dict)
	#print(sample_row_colors[1:5])

	cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict)
	#print(cell_subtype_row_colors[1:5])


	# #### Cell Type
	df
	#print(f"Loaded sample files: {ls_samples}")
	selected_intensities = list(df.columns)
	selected_intensities = list(df.columns)
	#print(selected_intensities)
	df
	df2
	df = df2
	df
	import json
	import pandas as pd
	import numpy as np
	import panel as pn
	import plotly.graph_objects as go

	pn.extension('plotly')
	# Load the selected intensities from the JSON file
	with open(stored_variables_path, 'r') as f:
	json_data = json.load(f)

	ls_samples = json_data["ls_samples"]
	#print(f"Loaded sample files: {ls_samples}")

	# Checkbox group to select files
	checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=ls_samples)

	# Initially empty dropdowns for X and Y axis selection
	x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[])
	y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[])

	# Input field for the number of random samples
	random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100)

	# Sliders for interactive X and Y lines
	x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01)
	y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01)

	# Placeholder for the dot plot
	plot_placeholder = pn.pane.Plotly()

	# Placeholder for the digital reconstruction plot
	reconstruction_placeholder = pn.pane.Plotly()

	# Function to create the dot plot
	def create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos):
	if not selected_files:
	# print("No files selected.")
	return go.Figure()

	keep = selected_files

	test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
	# print(f"Number of samples in test2_df: {len(test2_df)}")
	if len(test2_df) > n_samples:
	random_rows = np.random.choice(len(test2_df), n_samples)
	test_df = test2_df.iloc[random_rows, :].copy()
	else:
	test_df = test2_df

	# print(f"Number of samples in test_df: {len(test_df)}")

	if x_axis not in test_df.columns or y_axis not in test_df.columns:
	# print(f"Selected axes {x_axis} or {y_axis} not in DataFrame columns.")
	return go.Figure()

	fig = go.Figure()
	title = 'Threshold'

	fig.add_trace(go.Scatter(
	x=test_df[x_axis],
	y=test_df[y_axis],
	mode='markers',
	marker=dict(color='LightSkyBlue', size=2)
	))

	# Add vertical and horizontal lines
	fig.add_vline(x=x_line_pos, line_width=2, line_dash="dash", line_color="red")
	fig.add_hline(y=y_line_pos, line_width=2, line_dash="dash", line_color="red")

	fig.update_layout(
	title=title,
	plot_bgcolor='white',
	autosize=True,
	margin=dict(l=20, r=20, t=40, b=20),
	xaxis=dict(title=x_axis, linecolor='black', range=[test_df[x_axis].min(), test_df[x_axis].max()]),
	yaxis=dict(title=y_axis, linecolor='black', range=[test_df[y_axis].min(), test_df[y_axis].max()])
	)
	return fig

	def assign_cell_types_again():
	with open(stored_variables_path, 'r') as file:
	stored_variables = json.load(file)
	intensities = list(df.columns)
	def assign_cell_type(row):
	for intensity in intensities:
	marker = intensity.split('_')[0] # Extract marker from intensity name
	if marker in stored_variables['thresholds']:
	threshold = stored_variables['thresholds'][marker]
	if row[intensity] > threshold:
	for cell_type, markers in stored_variables['cell_type_classification'].items():
	if marker in markers:
	return cell_type
	return 'STROMA' # Default if no condition matches
	df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1)
	return df

	# Function to create the digital reconstruction plot
	def create_reconstruction_plot(selected_files):
	if not selected_files:
	# print("No files selected.")
	return go.Figure()
	df = assign_cell_types_again()
	fig = go.Figure()

	for sample in selected_files:
	sample_id = sample
	sample_id2 = sample.split('_')[0]
	location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_type']]

	title = sample_id2 + " Background Subtracted XY Map cell types"

	for celltype in df.loc[df['Sample_ID'] == sample_id, 'cell_type'].unique():
	fig.add_scatter(
	mode='markers',
	marker=dict(size=3, opacity=0.5, color='rgb' + str(cell_type_color_dict[celltype])),
	x=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_X'],
	y=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_Y_Inv'],
	name=celltype
	)

	fig.update_layout(
	title=title,
	plot_bgcolor='white',
	autosize=True,
	margin=dict(l=20, r=20, t=40, b=20),
	legend=dict(
	title='Cell Types',
	font=dict(
	family='Arial',
	size=12,
	color='black'
	),
	bgcolor='white',
	bordercolor='black',
	borderwidth=0.4,
	itemsizing='constant'
	),
	xaxis=dict(title='Nuc_X', linecolor='black', range=[location_colors['Nuc_X'].min(), location_colors['Nuc_X'].max()]),
	yaxis=dict(title='Nuc_Y_Inv', linecolor='black', range=[location_colors['Nuc_Y_Inv'].min(), location_colors['Nuc_Y_Inv'].max()])
	)

	return fig

	def update_dropdown_options(event):
	selected_files = checkbox_group.value
	# print(f"Selected files in update_dropdown_options: {selected_files}")
	if selected_files:
	keep = selected_files
	test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
	selected_intensities = list(test2_df.columns)
	selected_intensities = [col for col in selected_intensities if '_Intensity_Average' in col]
	# print(f"Updated dropdown options: {selected_intensities}")
	x_axis_dropdown.options = selected_intensities
	y_axis_dropdown.options = selected_intensities
	else:
	x_axis_dropdown.options = []
	y_axis_dropdown.options = []

	def update_slider_ranges(event):
	selected_files = checkbox_group.value
	x_axis = x_axis_dropdown.value
	y_axis = y_axis_dropdown.value
	# print("Axis:",x_axis,y_axis)
	if selected_files and x_axis and y_axis:
	keep = selected_files
	test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
	x_range = (test2_df[x_axis].min(), test2_df[x_axis].max())
	y_range = (test2_df[y_axis].min(), test2_df[y_axis].max())
	# print("Ranges:",x_range,y_range)
	x_line_slider.start = -abs(x_range[1])
	x_line_slider.end = abs(x_range[1])
	y_line_slider.start = -abs(y_range[1])
	y_line_slider.end = abs(y_range[1])
	x_line_slider.value = 0
	y_line_slider.value = 0

	def on_value_change(event):
	selected_files = checkbox_group.value
	x_axis = x_axis_dropdown.value
	y_axis = y_axis_dropdown.value
	n_samples = random_sample_input.value
	x_line_pos = x_line_slider.value
	y_line_pos = y_line_slider.value
	# print(f"Selected files: {selected_files}")
	# print(f"X-Axis: {x_axis}, Y-Axis: {y_axis}, Number of samples: {n_samples}, X Line: {x_line_pos}, Y Line: {y_line_pos}")
	plot = create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos)
	reconstruction_plot = create_reconstruction_plot(selected_files)
	plot_placeholder.object = plot
	reconstruction_placeholder.object = reconstruction_plot

	# Link value changes to function
	checkbox_group.param.watch(update_dropdown_options, 'value')
	checkbox_group.param.watch(update_slider_ranges, 'value')
	x_axis_dropdown.param.watch(update_slider_ranges, 'value')
	y_axis_dropdown.param.watch(update_slider_ranges, 'value')
	x_axis_dropdown.param.watch(on_value_change, 'value')
	y_axis_dropdown.param.watch(on_value_change, 'value')
	random_sample_input.param.watch(on_value_change, 'value')
	x_line_slider.param.watch(on_value_change, 'value')
	y_line_slider.param.watch(on_value_change, 'value')

	# Layout
	plot_with_reconstruction = pn.Column(
	"## Select Files to Construct Dot Plot",
	checkbox_group,
	x_axis_dropdown,
	y_axis_dropdown,
	random_sample_input,
	pn.Row(x_line_slider, y_line_slider),
	pn.Row(
	pn.Column(
	"## Dot Plot",
	pn.Column(plot_placeholder)),
	pn.Column(
	"## Digital Reconstruction Plot",
	reconstruction_placeholder),
	))

	# Serve the app
	#plot_with_reconstruction.show()

	# ## MAKE HEATMAPS

	# ### Cell Subtype
	# Create data structure to hold everything we need for row/column annotations
	# annotations is a dictionary
	## IMPORTANT - if you use 'annotations', it MUST have both 'rows' and 'cols'
	## objects inside. These can be empty lists, but they must be there!
	anns = {}

	# create a data structure to hold everything we need for only row annotations
	# row_annotations is a list, where each item therein is a dictioary corresponding
	# to all of the data pertaining to that particular annotation
	# Adding each item (e.g., Sample, then Cluster), one at a time to ensure ordering
	# is as anticipated on figure
	row_annotations = []
	row_annotations.append({'label':'Sample',
	'type':'row',
	'mapping':sample_row_colors,
	'dict':sample_color_dict,
	'location':'center left',
	'bbox_to_anchor':(0.1, 0.9)})
	row_annotations.append({'label':'Cell type',
	'type':'row',
	'mapping':cell_type_row_colors,
	'dict':cell_type_color_dict,
	'location':'center left',
	'bbox_to_anchor':(0.17, 0.9)})
	anns['rows'] = row_annotations

	# Now we repeat the process for column annotations
	col_annotations = []
	anns['cols'] = col_annotations
	# To simplify marker display in the following figures (heatmap, etc)
	figure_marker_names = {key: value.split('_')[0] for key, value in full_to_short_names.items()}
	not_intensities
	df2
	df2.drop('cell_subtype', axis = 'columns')
	not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
	'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
	'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']
	df2 = assign_cell_types_again()
	df2.drop('cell_subtype', axis = 'columns')
	df2.head()
	# Save one heatmap

	data = df
	data
	#print(data.columns)
	# Selecting a subset of rows from df based on the 'Sample_ID' column
	# and then random>ly choosing 50,000 rows from that subset to create the DataFrame test_df
	with open(stored_variables_path, 'r') as file:
	ls_samples = stored_vars['ls_samples']
	keep = list(ls_samples)
	keep_cell_type = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']

	# Check the individual conditions
	cell_type_condition = data['cell_type'].isin(keep_cell_type)
	sample_id_condition = data['Sample_ID'].isin(keep)
	#print("Cell type condition:")
	#print(cell_type_condition.head())
	#print("Sample ID condition:")
	#print(sample_id_condition.head())

	# Combine the conditions
	combined_condition = cell_type_condition & sample_id_condition
	#print("Combined condition:")
	#print(combined_condition.head())

	# Apply the combined condition to filter the DataFrame
	test2_df = data.loc[combined_condition].copy()
	#print("Filtered DataFrame:")
	#print(test2_df.head())

	#test2_df = data.loc[data['cell_type'].isin(keep_cell_type) & data['Sample_ID'].isin(keep)].copy()
	#print("Test2_df",test2_df.head())
	#print(len(test2_df))

	#random_rows = np.random.choice(len(test2_df),len(test2_df))
	random_rows = np.random.choice(len(test2_df),1000)
	test_df = test2_df.iloc[random_rows,:].copy()
	#print(len(test_df))
	test_df
	import json
	import panel as pn
	import param
	import pandas as pd

	# Initialize Panel extension
	pn.extension('tabulator')

	# Path to the stored variables file
	file_path = stored_variables_path

	# Load existing data from stored_variables.json with error handling
	def load_data():
	try:
	with open(file_path, 'r') as file:
	return json.load(file)
	except json.JSONDecodeError as e:
	print(f"Error reading JSON file: {e}")
	return {}

	data = load_data()

	# Define markers, cell types, and cell subtypes from the loaded data
	markers = data.get('markers', [])
	cell_types = data.get('cell_type', [])
	cell_subtypes = data.get('cell_subtype', [])

	# Sanitize option names
	def sanitize_options(options):
	return [opt.replace(' ', '_').replace('+', 'plus').replace('α', 'a').replace("'", "") for opt in options]

	sanitized_cell_types = sanitize_options(cell_types)
	sanitized_cell_subtypes = sanitize_options(cell_subtypes)

	# Helper function to create a Parameterized class and DataFrame
	def create_classification_df(items, item_label):
	params = {item_label: param.String()}
	for marker in markers:
	params[marker] = param.Boolean(default=False)

	Classification = type(f'{item_label}Classification', (param.Parameterized,), params)

	classification_widgets = []
	for item in items:
	item_params = {marker: False for marker in markers}
	item_params[item_label] = item
	classification_widgets.append(Classification(**item_params))

	classification_df = pd.DataFrame([cw.param.values() for cw in classification_widgets])
	classification_df = classification_df[[item_label] + markers]
	return classification_df

	# Create DataFrames for cell types and cell subtypes
	cell_type_df = create_classification_df(sanitized_cell_types, 'CELL_TYPE')
	cell_subtype_df = create_classification_df(sanitized_cell_subtypes, 'CELL_SUBTYPE')

	# Define formatters for Tabulator widgets
	tabulator_formatters = {marker: {'type': 'tickCross'} for marker in markers}

	# Create Tabulator widgets
	cell_type_table = pn.widgets.Tabulator(cell_type_df, formatters=tabulator_formatters)
	cell_subtype_table = pn.widgets.Tabulator(cell_subtype_df, formatters=tabulator_formatters)

	# Save functions for cell types and cell subtypes
	def save_data(table, classification_key, item_label):
	current_data = table.value
	df_bool = current_data.replace({'✔': True, '✘': False})

	classification = {}
	for i, row in df_bool.iterrows():
	item = row[item_label]
	selected_markers = [marker for marker in markers if row[marker]]
	classification[item] = selected_markers

	data[classification_key] = classification
	# try:
	with open(file_path, 'w') as file:
	json.dump(data, file, indent=4)
	# print(f"{classification_key} saved successfully.")
	# except IOError as e:
	# print(f"Error writing JSON file: {e}")

	# Button actions
	def save_cell_type_selections(event):
	save_data(cell_type_table, 'cell_type_classification', 'CELL_TYPE')

	def save_cell_subtype_selections(event):
	save_data(cell_subtype_table, 'cell_subtype_classification', 'CELL_SUBTYPE')

	# Create save buttons
	save_cell_type_button = pn.widgets.Button(name='Save Cell Type Selections', button_type='primary')
	save_cell_type_button.on_click(save_cell_type_selections)

	save_cell_subtype_button = pn.widgets.Button(name='Save Cell Subtype Selections', button_type='primary')
	save_cell_subtype_button.on_click(save_cell_subtype_selections)
	cell_type_classification_app_main = pn.Column(
	pn.pane.Markdown("# Cell Type Classification"),
	cell_type_table,
	save_cell_type_button
	)
	cell_subtype_classification_app_main = pn.Column(
	pn.pane.Markdown("# Cell Subtype Classification"),
	cell_subtype_table,
	save_cell_subtype_button
	)
	#cell_subtype_classification_app_main.show()

	import json
	import panel as pn

	# Load existing stored variables
	with open(stored_variables_path, 'r') as f:
	stored_variables = json.load(f)

	# Initialize a dictionary to hold threshold inputs
	subtype_threshold_inputs = {}

	# Create widgets for each marker to get threshold inputs from the user
	for marker in stored_variables['markers']:
	subtype_threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1)

	try:
	with open(stored_variables_path, 'r') as f:
	stored_variables = json.load(f)
	except FileNotFoundError:
	stored_variables = {}

	# Check if 'thresholds' field is present, if not, add it
	if 'subtype_thresholds' not in stored_variables:
	subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()}
	stored_variables['subtype_thresholds'] = subtype_thresholds
	with open(stored_variables_path, 'w') as f:
	json.dump(stored_variables, f, indent=4)

	# Save button to save thresholds to stored_variables.json
	def save_thresholds(event):
	subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()}
	stored_variables['subtype_thresholds'] = subtype_thresholds
	with open(stored_variables_path, 'w') as f:
	json.dump(stored_variables, f, indent=4)
	save_button = pn.widgets.Button(name='Save Thresholds', button_type='primary')
	save_button.on_click(save_thresholds)

	# Create a GridSpec layout
	subtype_grid = pn.GridSpec()

	# Add the widgets to the grid with five per row
	row = 0
	col = 0
	for marker in stored_variables['markers']:
	subtype_grid[row, col] = subtype_threshold_inputs[marker]
	col += 1
	if col == 5:
	col = 0
	row += 1

	# Add the save button at the end, spanning across all columns of the new row
	subtype_grid[row + 1, :5] = save_button

	# Panel layout
	subtype_threshold_panel = pn.Column(
	pn.pane.Markdown("## Define Thresholds for Markers"),
	subtype_grid)

	# Display the panel
	#subtype_threshold_panel.show()

	with open(stored_variables_path, 'r') as file:
	stored_variables = json.load(file)
	intensities = list(df.columns)
	def assign_cell_subtypes(row):
	for intensity in intensities:
	marker = intensity.split('_')[0] # Extract marker from intensity name
	if marker in stored_variables['subtype_thresholds']:
	threshold = stored_variables['subtype_thresholds'][marker]
	if row[intensity] > threshold:
	for cell_subtype, markers in stored_variables['cell_subtype_classification'].items():
	if marker in markers:
	return cell_subtype
	return 'DC'

	df = assign_cell_types_again()
	df['cell_subtype'] = df.apply(lambda row: assign_cell_subtypes(row), axis=1)

	df
	data
	# Define a color dictionary
	cell_subtype_color_dict = {
	'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
	'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),
	'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),
	'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313),
	'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6),
	'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),
	'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),
	'M2': (1.0, 0.4980392156862745, 0.0),
	'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),
	'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),
	'Cancer': (1.0, 1.0, 0.6),
	'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392),
	'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
	'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765)
	}
	# Add the 'rgb' prefix to the colors
	cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()}

	# Load stored variables from JSON file
	def load_stored_variables(path):
	with open(path, 'r') as file:
	return json.load(file)

	# Get subtype intensities columns
	subtype_intensities = [col for col in df.columns if '_Intensity_Average' in col]

	# Assign cell subtype based on thresholds and classifications
	def assign_cell_subtype(row):
	#print("new_row")
	stored_variables = load_stored_variables(stored_variables_path)
	for subtype_intensity in subtype_intensities:
	marker = subtype_intensity.split('_')[0]
	if marker in stored_variables['subtype_thresholds']:
	subtype_threshold = stored_variables['subtype_thresholds'][marker]
	if row[subtype_intensity] > subtype_threshold:
	for cell_subtype, markers in stored_variables['cell_subtype_classification'].items():
	#print(cell_subtype,marker,markers)
	if marker in markers:
	#print("Markers:",marker)
	return cell_subtype # Return the assigned subtype
	return 'DC' # Default value if no conditions match

	# Main function to assign cell subtypes to DataFrame
	def assign_cell_subtypes_again():
	df['cell_subtype'] = df.apply(lambda row: assign_cell_subtype(row), axis=1)
	return df

	import json
	import pandas as pd
	import numpy as np
	import panel as pn
	import plotly.graph_objects as go

	pn.extension('plotly')

	# Load the selected intensities from the JSON file
	with open(stored_variables_path, 'r') as f:
	json_data = json.load(f)

	subtype_ls_samples = json_data["ls_samples"]
	#print(f"Loaded sample files: {subtype_ls_samples}")


	# Checkbox group to select files
	subtype_checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=subtype_ls_samples)

	# Initially empty dropdowns for X and Y axis selection
	subtype_x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[])
	subtype_y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[])

	# Input field for the number of random samples
	subtype_random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100)

	# Sliders for interactive X and Y lines
	subtype_x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01)
	subtype_y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01)

	# Placeholder for the dot plot
	subtype_plot_placeholder = pn.pane.Plotly()

	# Placeholder for the digital reconstruction plot
	subtype_reconstruction_placeholder = pn.pane.Plotly()

	def update_color_dict():
	# Define a color dictionary
	cell_subtype_color_dict = {
	'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
	'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),
	'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),
	'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313),
	'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6),
	'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),
	'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),
	'M2': (1.0, 0.4980392156862745, 0.0),
	'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),
	'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),
	'Cancer': (1.0, 1.0, 0.6),
	'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392),
	'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
	'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765)
	}
	# Add the 'rgb' prefix to the colors
	cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()}
	return cell_subtype_color_dict

	# Function to create the dot plot
	def create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos):
	if not subtype_selected_files:
	# print("No files selected.")
	return go.Figure()
	subtype_keep = subtype_selected_files
	# print(df)
	subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
	#subtype_test2_df = df.loc[df['Sample_ID'].isin('TMA.csv'), :].copy()
	# print(f"Number of samples in test2_df: {len(subtype_test2_df)}")
	if len(subtype_test2_df) > subtype_n_samples:
	subtype_random_rows = np.random.choice(len(subtype_test2_df), subtype_n_samples)
	subtype_test_df = subtype_test2_df.iloc[subtype_random_rows, :].copy()
	else:
	subtype_test_df = subtype_test2_df

	# print(f"Number of samples in test_df: {len(subtype_test_df)}")

	if subtype_x_axis not in subtype_test_df.columns or subtype_y_axis not in subtype_test_df.columns:
	# print(f"Selected axes {subtype_x_axis} or {subtype_y_axis} not in DataFrame columns.")
	return go.Figure()

	fig = go.Figure()
	title = 'Threshold'

	fig.add_trace(go.Scatter(
	x=subtype_test_df[subtype_x_axis],
	y=subtype_test_df[subtype_y_axis],
	mode='markers',
	marker=dict(color='LightSkyBlue', size=2)
	))

	# Add vertical and horizontal lines
	fig.add_vline(x=subtype_x_line_pos, line_width=2, line_dash="dash", line_color="red")
	fig.add_hline(y=subtype_y_line_pos, line_width=2, line_dash="dash", line_color="red")

	fig.update_layout(
	title=title,
	plot_bgcolor='white',
	autosize=True,
	margin=dict(l=20, r=20, t=40, b=20),
	xaxis=dict(title=subtype_x_axis, linecolor='black', range=[subtype_test_df[subtype_x_axis].min(), subtype_test_df[subtype_x_axis].max()]),
	yaxis=dict(title=subtype_y_axis, linecolor='black', range=[subtype_test_df[subtype_y_axis].min(), subtype_test_df[subtype_y_axis].max()])
	)
	return fig

	def create_subtype_reconstruction_plot(subtype_selected_files):
	cell_subtype_color_dict = update_color_dict()
	# print(subtype_selected_files)
	if not subtype_selected_files:
	# print("No files selected.")
	return go.Figure()
	df = assign_cell_subtypes_again()
	subtype_fig = go.Figure()

	for sample in subtype_selected_files:
	sample_id = sample
	sample_id2 = sample.split('_')[0]
	location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_subtype']]
	# print(location_colors.head())
	title = sample_id2 + " Background Subtracted XY Map cell subtypes"
	for cellsubtype in df.loc[df['Sample_ID'] == sample_id, 'cell_subtype'].unique():
	color = str(cell_subtype_color_dict[cellsubtype])
	subtype_fig.add_scatter(
	mode='markers',
	marker=dict(size=3, opacity=0.5, color=color),
	x=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_X'],
	y=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_Y_Inv'],
	name=cellsubtype
	)

	subtype_fig.update_layout(title=title, plot_bgcolor='white')
	subtype_fig.update_xaxes(title_text='Nuc_X', linecolor='black')
	subtype_fig.update_yaxes(title_text='Nuc_Y_Inv', linecolor='black')

	# Adjust the size of the points
	for trace in subtype_fig.data:
	trace.marker.size = 2

	subtype_fig.update_layout(
	title=title,
	plot_bgcolor='white',
	legend=dict(
	title='Cell Subtypes', # Legend title
	font=dict(
	family='Arial',
	size=12,
	color='black'
	),
	bgcolor='white',
	bordercolor='black',
	borderwidth=0.4,
	itemsizing='constant'
	)
	)
	# Save the figure as an image if needed
	#subtype_fig.write_image(output_images_dir + "/" + title.replace(" ", "_") + ".png", width=1200, height=800, scale=4)
	# print(sample_id, "processed!")

	return subtype_fig

	def update_subtype_dropdown_options(event):
	# print(1)
	subtype_selected_files = subtype_checkbox_group.value
	# print(f"Selected files in update_dropdown_options: {subtype_selected_files}")
	if subtype_selected_files:
	subtype_keep = subtype_selected_files
	subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
	subtype_selected_intensities = list(subtype_test2_df.columns)
	subtype_selected_intensities = [col for col in subtype_selected_intensities if '_Intensity_Average' in col]
	# print(f"Updated dropdown options: {subtype_selected_intensities}")
	subtype_x_axis_dropdown.options = subtype_selected_intensities
	subtype_y_axis_dropdown.options = subtype_selected_intensities
	else:
	subtype_x_axis_dropdown.options = []
	subtype_y_axis_dropdown.options = []

	def update_subtype_slider_ranges(event):
	subtype_selected_files = subtype_checkbox_group.value
	subtype_x_axis = subtype_x_axis_dropdown.value
	subtype_y_axis = subtype_y_axis_dropdown.value

	if subtype_selected_files and subtype_x_axis and subtype_y_axis:
	subtype_keep = subtype_selected_files
	subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
	subtype_x_range = (subtype_test2_df[subtype_x_axis].min(), subtype_test2_df[subtype_x_axis].max())
	subtype_y_range = (subtype_test2_df[subtype_y_axis].min(), subtype_test2_df[subtype_y_axis].max())
	subtype_x_line_slider.start = -abs(subtype_x_range[1])
	subtype_x_line_slider.end = abs(subtype_x_range[1])
	subtype_y_line_slider.start = -abs(subtype_y_range[1])
	subtype_y_line_slider.end = abs(subtype_y_range[1])
	subtype_x_line_slider.value = 0
	subtype_y_line_slider.value = 0

	def on_subtype_value_change(event):
	subtype_selected_files = subtype_checkbox_group.value
	subtype_x_axis = subtype_x_axis_dropdown.value
	subtype_y_axis = subtype_y_axis_dropdown.value
	subtype_n_samples = subtype_random_sample_input.value
	subtype_x_line_pos = subtype_x_line_slider.value
	subtype_y_line_pos = subtype_y_line_slider.value
	# print(f"Selected files: {subtype_selected_files}")
	# print(f"X-Axis: {subtype_x_axis}, Y-Axis: {subtype_y_axis}, Number of samples: {subtype_n_samples}, X Line: {subtype_x_line_pos}, Y Line: {subtype_y_line_pos}")
	subtype_plot = create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos)
	subtype_reconstruction_plot = create_subtype_reconstruction_plot(subtype_selected_files)
	subtype_plot_placeholder.object = subtype_plot
	subtype_reconstruction_placeholder.object = subtype_reconstruction_plot

	# Link value changes to function
	subtype_checkbox_group.param.watch(update_subtype_dropdown_options, 'value')
	subtype_checkbox_group.param.watch(update_subtype_slider_ranges, 'value')
	subtype_x_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value')
	subtype_y_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value')
	subtype_x_axis_dropdown.param.watch(on_subtype_value_change, 'value')
	subtype_y_axis_dropdown.param.watch(on_subtype_value_change, 'value')
	subtype_random_sample_input.param.watch(on_subtype_value_change, 'value')
	subtype_x_line_slider.param.watch(on_subtype_value_change, 'value')
	subtype_y_line_slider.param.watch(on_subtype_value_change, 'value')

	# Layout
	plot_with_subtype_reconstruction = pn.Column(
	"## Select Files to Construct Dot Plot",
	subtype_checkbox_group,
	subtype_x_axis_dropdown,
	subtype_y_axis_dropdown,
	subtype_random_sample_input,
	pn.Row(subtype_x_line_slider, subtype_y_line_slider),
	pn.Row(
	pn.Column(
	"## Dot Plot",
	pn.Column(subtype_plot_placeholder)),
	pn.Column(
	"## Cell Subtype Digital Reconstruction Plot",
	subtype_reconstruction_placeholder),
	)
	)

	subtype_x_axis = subtype_x_axis_dropdown.value
	subtype_y_axis = subtype_y_axis_dropdown.value
	#print(subtype_x_axis ,subtype_y_axis)


	# Normalize the values in df2.cell_subtype
	df2['cell_subtype'] = df2['cell_subtype'].str.strip().str.lower()

	# Normalize the keys in cell_subtype_color_dict
	cell_subtype_color_dict = {k.strip().lower(): v for k, v in cell_subtype_color_dict.items()}

	# Map the cell_subtype values to colors
	cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict)

	# Debugging: print the unique values and the resulting mapped colors
	#print("Unique values in df2.cell_subtype:", df2.cell_subtype.unique())
	#print("Keys in cell_subtype_color_dict:", cell_subtype_color_dict.keys())
	#print(cell_subtype_row_colors[1:5])
	data
	cell_subtype_color_dict
	# Remove the 'rgb' prefix

	cell_subtype_color_dict = {k: v[3:] for k, v in cell_subtype_color_dict.items()}
	cell_subtype_color_dict

	# Colors dictionaries
	sample_row_colors =df.Sample_ID.map(sample_color_dict)
	#print(sample_row_colors[1:5])

	cell_subtype_row_colors = df.cell_subtype.map(cell_subtype_color_dict)
	#print(cell_subtype_row_colors[1:5])

	# Count of each immune_checkpoint type by cell_subtype
	counts = df.groupby(['cell_type', 'cell_subtype']).size().reset_index(name='count')
	counts

	total = sum(counts['count'])
	counts['percentage'] = counts.groupby('cell_subtype')['count'].transform(lambda x: (x / total) * 100)

	#print(counts)


	# ## IV.10. SAVE

	# Save the data by Sample_ID
	# Check for the existence of the output file first
	for sample in ls_samples:
	#sample_id = sample.split('_')[0]
	sample_id = sample
	filename = os.path.join(output_data_dir, sample_id + "_" + step_suffix + ".csv")
	if os.path.exists(filename):
	df_save = df.loc[df['Sample_ID'] == sample_id, :]
	df_save.to_csv(filename, index=True, index_label='ID', mode='w') # 'mode='w'' overwrites the file
	# print("File " + filename + " was overwritten!")
	else:
	df_save = df.loc[df['Sample_ID'] == sample_id, :]
	df_save.to_csv(filename, index=True, index_label='ID') # Save normally if the file doesn't exist
	# print("File " + filename + " was created and saved !")

	# All samples
	filename = os.path.join(output_data_dir, "all_Samples_" + project_name + ".csv")
	# Save the DataFrame to a CSV file
	df.to_csv(filename, index=True, index_label='ID')
	#print("Merged file " + filename + " created!")

	# ## Panel App
	# Create widgets and panes
	df_widget = pn.widgets.DataFrame(metadata, name="MetaData")
	# Define the three tabs content
	metadata_tab = pn.Column(pn.pane.Markdown("## Initial DataFrame"),intial_df)
	dotplot_tab = pn.Column(plot_with_reconstruction)
	celltype_classification_tab = pn.Column(cell_type_classification_app_main, threshold_panel)
	cellsubtype_classification_tab = pn.Column(cell_subtype_classification_app_main, subtype_threshold_panel)
	subtype_dotplot_tab = pn.Column(plot_with_subtype_reconstruction,)

	app4_5 = pn.template.GoldenTemplate(
	site="Cyc-IF",
	title="Marker Threshold & Classification",
	main=[
	pn.Tabs(
	("Metadata", metadata_tab),
	("Classify-Celltype-Marker",celltype_classification_tab),
	("Cell_Types", dotplot_tab),
	("Classify-Cell Subtype-Marker",cellsubtype_classification_tab),
	("Cell-Subtypes", subtype_dotplot_tab),
	# ("Heatmap",pn.Column(celltype_heatmap, cell_subtype_heatmap))
	)
	]
	)
	app4_5.show()