CycIF / Step5_Marker_Threshold_Classification.py
KashyapiNagaHarshitha's picture
Upload Step5_Marker_Threshold_Classification.py
6372547 verified
#!/usr/bin/env python
# coding: utf-8
# # IV. MARKERS TRESHOLDS NOTEBOOK
# ## IV.1. PACKAGES IMPORT
import os
import random
import re
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import matplotlib.colors as mplc
import subprocess
import warnings
import panel as pn
import json
from scipy import signal
from scipy.stats import pearsonr
import plotly.figure_factory as ff
import plotly
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.express as px
import sys
sys.setrecursionlimit(5000)
from my_modules import *
#Silence FutureWarnings & UserWarnings
warnings.filterwarnings('ignore', category= FutureWarning)
warnings.filterwarnings('ignore', category= UserWarning)
# ## IV.2. *DIRECTORIES
# Set base directory
#input_path = '/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431'
#set_path = 'test'
present_dir = os.path.dirname(os.path.realpath(__file__))
stored_variables_path = os.path.join(present_dir,'stored_variables.json')
with open(stored_variables_path, 'r') as file:
stored_vars = json.load(file)
directory = stored_vars['base_dir']
input_path = os.path.join(present_dir,directory)
set_path = stored_vars['set_path']
selected_metadata_files = stored_vars['selected_metadata_files']
ls_samples = stored_vars['ls_samples']
base_dir = input_path
set_name = set_path
project_name = set_name # Project name
step_suffix = 'mt' # Curent part (here part IV)
previous_step_suffix_long = "_zscore" # Previous part (here ZSCORE NOTEBOOK)
# Initial input data directory
input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)
# ZSCORE/LOG2 output directories
output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
# ZSCORE/LOG2 images subdirectory
output_images_dir = os.path.join(output_data_dir,"images")
# Data and Metadata directories
# Metadata directories
metadata_dir = os.path.join(base_dir, project_name + "_metadata")
# images subdirectory
metadata_images_dir = os.path.join(metadata_dir,"images")
# Create directories if they don't already exist
#for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
# if not os.path.exists(d):
#print("Creation of the" , d, "directory...")
# os.makedirs(d)
#else :
# print("The", d, "directory already exists !")
#os.chdir(input_data_dir)
# Verify paths
#print('base_dir :', base_dir)
#print('input_data_dir :', input_data_dir)
#print('output_data_dir :', output_data_dir)
#print('output_images_dir :', output_images_dir)
#print('metadata_dir :', metadata_dir)
#print('metadata_images_dir :', metadata_images_dir)
# ## IV.3. FILES
# ### IV.3.1. METADATA
filename = "marker_intensity_metadata.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
#if not os.path.exists(filename):
# print("WARNING: Could not find desired file: "+filename)
#else :
# print("The",filename,"file was imported for further analysis!")
# Open, read in information
metadata = pd.read_csv(filename)
# Verify size with verify_line_no() function in my_modules.py
#verify_line_no(filename, metadata.shape[0] + 1)
# Verify headers
exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']
compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")
metadata = metadata.dropna()
metadata.head()
# ### IV.3.2. NOT_INTENSITIES
filename = "not_intensities.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
#if not os.path.exists(filename):
# print("WARNING: Could not find desired file: "+filename)
#else :
# print("The",filename,"file was imported for further analysis!")
not_intensities = []
with open(filename, 'r') as fh:
not_intensities = fh.read().strip().split("\n")
# take str, strip whitespace, split on new line character
# Verify size
#print("\nVerifying data read from file is the correct length...\n")
#verify_line_no(filename, len(not_intensities))
# Print to console
#print("not_intensities =\n", not_intensities)
# ### IV.3.3. FULL_TO_SHORT_COLUMN_NAMES
filename = "full_to_short_column_names.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
#if not os.path.exists(filename):
# print("WARNING: Could not find desired file: " + filename)
#else :
# print("The",filename,"file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header = 0)
# Verify size
print("Verifying data read from file is the correct length...\n")
#verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]
#print('full_to_short_names =\n',full_to_short_names)
# ### IV.3.4. SHORT_TO_FULL_COLUMN_NAMES
filename = "short_to_full_column_names.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
#if not os.path.exists(filename):
# print("WARNING: Could not find desired file: " + filename)
#else :
# print("The",filename,"file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header = 0)
# Verify size
#print("Verifying data read from file is the correct length...\n")
#verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]
# Print information
#print('short_to_full_names =\n',short_to_full_names)
# ### IV.3.10. DATA
# List files in the directory
# Check if the directory exists
if os.path.exists(input_data_dir):
# List files in the directory
ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_zscore.csv")]
# print("The following CSV files were detected:")
# print([sample for sample in ls_samples])
#else:
# print(f"The directory {input_data_dir} does not exist.")
# Import all the others files
dfs = {}
# Set variable to hold default header values
# First gather information on expected headers using first file in ls_samples
# Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
expected_headers = df.columns.values
#print('Header order should be :\n', expected_headers, '\n')
###############################
# !! This may take a while !! #
###############################
for sample in ls_samples:
file_path = os.path.join(input_data_dir,sample)
try:
# Read the CSV file
df = pd.read_csv(file_path, index_col=0)
# Check if the DataFrame is empty, if so, don't continue trying to process df and remove it
if not df.empty:
# Reorder the columns to match the expected headers list
df = df.reindex(columns=expected_headers)
# print(sample, "file is processed !\n")
#print(df)
except pd.errors.EmptyDataError:
# print(f'\nEmpty data error in {sample} file. Removing from analysis...')
ls_samples.remove(sample)
# Add df to dfs
dfs[sample] = df
#print(dfs)
# Merge dfs into one df
df = pd.concat(dfs.values(), ignore_index=False , sort = False)
del dfs
print(df.head())
intial_df = pn.pane.DataFrame(df.head(40), width = 2500)
# ### Marker Classification
# ## IV.5. *DOTPLOTS
df
# Load existing data from stored_variables.json with error handling
try:
with open(stored_variables_path, 'r') as file:
data = json.load(file)
except json.JSONDecodeError as e:
# print(f"Error reading JSON file: {e}")
data = {}
# Debug: Print loaded data to verify keys
#print(data)
df
df.head()
# ### IV.7.2. DOTPLOTS-DETERMINED TRESHOLD
#Empty dict in stored_variables to store the cell type classification for each marker
#stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
try:
with open(stored_variables_path, 'r') as f:
stored_variables = json.load(f)
except FileNotFoundError:
stored_variables = {}
# Check if 'thresholds' field is present, if not, add it
if 'cell_type_classification' not in stored_variables:
cell_type_classification = {}
stored_variables['cell_type_classification'] = cell_type_classification
with open(stored_variables_path, 'w') as f:
json.dump(stored_variables, f, indent=4)
#Empty dict in stored_variables to store the cell subtype classification for each marker
#stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
try:
with open(stored_variables_path, 'r') as f:
stored_variables = json.load(f)
except FileNotFoundError:
stored_variables = {}
# Check if 'thresholds' field is present, if not, add it
if 'cell_subtype_classification' not in stored_variables:
cell_type_classification = {}
stored_variables['cell_subtype_classification'] = cell_type_classification
with open(stored_variables_path, 'w') as f:
json.dump(stored_variables, f, indent=4)
df
data = df
import json
import panel as pn
# Load existing stored variables
with open(stored_variables_path, 'r') as f:
stored_variables = json.load(f)
# Initialize a dictionary to hold threshold inputs
threshold_inputs = {}
# Create widgets for each marker to get threshold inputs from the user
for marker in stored_variables['markers']:
threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1)
# Load stored_variables.json
#stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
try:
with open(stored_variables_path, 'r') as f:
stored_variables = json.load(f)
except FileNotFoundError:
stored_variables = {}
# Check if 'thresholds' field is present, if not, add it
if 'thresholds' not in stored_variables:
thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()}
stored_variables['thresholds'] = thresholds
with open(stored_variables_path, 'w') as f:
json.dump(stored_variables, f, indent=4)
# Save button to save thresholds to stored_variables.json
def save_thresholds(event):
thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()}
stored_variables['thresholds'] = thresholds
with open(stored_variables_path, 'w') as f:
json.dump(stored_variables, f, indent=4)
pn.state.notifications.success('Thresholds saved successfully!')
save_button2 = pn.widgets.Button(name='Save Thresholds', button_type='primary')
save_button2.on_click(save_thresholds)
# Create a GridSpec layout
grid = pn.GridSpec()
# Add the widgets to the grid with three per row
row = 0
col = 0
for marker in stored_variables['markers']:
grid[row, col] = threshold_inputs[marker]
col += 1
if col == 5:
col = 0
row += 1
# Add the save button at the end
grid[row + 1, :5] = save_button2
# Panel layout
threshold_panel = pn.Column(
pn.pane.Markdown("## Define Thresholds for Markers"),
grid)
import pandas as pd
import json
# Load stored variables from the JSON file
with open(stored_variables_path, 'r') as file:
stored_variables = json.load(file)
# Step 1: Identify intensities
intensities = list(df.columns)
def assign_cell_type(row):
for intensity in intensities:
marker = intensity.split('_')[0] # Extract marker from intensity name
if marker in stored_variables['thresholds']:
threshold = stored_variables['thresholds'][marker]
if row[intensity] > threshold:
for cell_type, markers in stored_variables['cell_type_classification'].items():
if marker in markers:
return cell_type
return 'STROMA' # Default if no condition matches
# Step 5: Apply the classification function to the DataFrame
df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1)
df.head()
# Check if 'IMMUNE' is present in any row of the cell_type column
present_stroma = df['cell_type'].str.contains('STROMA').sum()
present_cancer = df['cell_type'].str.contains('CANCER').sum()
present_immune = df['cell_type'].str.contains('IMMUNE').sum()
present_endothelial = df['cell_type'].str.contains('ENDOTHELIAL').sum()
# Print the result
#print(present_stroma)
#print(present_cancer)
#print(present_immune)
#print(present_endothelial)
#print(len(df))
df.head(30)
df
# ## IV.8. *HEATMAPS
#print(df.columns)
# Assuming df_merged is your DataFrame
if 'Sample_ID.1' in df.columns:
df = df.rename(columns={'Sample_ID.1': 'Sample_ID'})
# print("After renaming Sample_ID", df.columns)
# Selecting a subset of rows from the DataFrame df based on the 'Sample_ID' column
# and then randomly choosing 20,000 rows from that subset to create the DataFrame test_dfkeep = ['TMA.csv']
with open(stored_variables_path, 'r') as file:
ls_samples = stored_vars['ls_samples']
keep = ls_samples
keep_cell_type = ['ENDOTHELIAL','CANCER', 'STROMA', 'IMMUNE']
#if 'Sample_ID' in df.columns:
# print("The",df.loc[df['cell_type'].isin(keep_cell_type)])
test2_df = df.loc[(df['cell_type'].isin(keep_cell_type))
& (df['Sample_ID'].isin(keep)), :].copy()
#print(test2_df.head())
random_rows = np.random.choice(len(test2_df),20000)
df2 = test2_df.iloc[random_rows,:].copy()
df2
#print(df2)
# ### COLORS
# #### SAMPLES COLORS
color_values = sb.color_palette("husl",n_colors = len(ls_samples))
sb.palplot(sb.color_palette(color_values))
TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]
TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray")
sb.palplot(sb.color_palette(TMA_color_values))
# Store in a dictionary
color_dict = dict()
color_dict = dict(zip(df.Sample_ID.unique(), color_values))
# Replace all TMA samples' colors with gray
i = 0
for key in color_dict.keys():
if 'TMA' in key:
color_dict[key] = TMA_color_values[i]
i +=1
color_dict
color_df_sample = color_dict_to_df(color_dict, "Sample_ID")
# Save to file in metadatadirectory
filename = "sample_color_data.csv"
filename = os.path.join(metadata_dir, filename)
color_df_sample.to_csv(filename, index = False)
color_df_sample
# Legend of sample info only
g = plt.figure(figsize = (1,1)).add_subplot(111)
g.axis('off')
handles = []
for item in color_dict.keys():
h = g.bar(0,0, color = color_dict[item],
label = item, linewidth =0)
handles.append(h)
first_legend = plt.legend(handles=handles, loc='upper right', title = 'Sample')
filename = "Sample_legend.png"
filename = os.path.join(metadata_images_dir, filename)
plt.savefig(filename, bbox_inches = 'tight')
filename = "sample_color_data.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
#if not os.path.exists(filename):
# print("WARNING: Could not find desired file: " + filename)
#else :
# print("The",filename,"file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header = 0)
df = df.drop(columns = ['hex'])
# our tuple of float values for rgb, (r, g, b) was read in
# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
# substrings and convert them back into floats
df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
# Verify size
#print("Verifying data read from file is the correct length...\n")
#verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
sample_color_dict = df.set_index('Sample_ID')['rgb'].to_dict()
# Print information
#print('sample_color_dict =\n',sample_color_dict)
# #### CELL TYPES COLORS
# Define your custom colors for each cell type
custom_colors = {
'CANCER': (0.1333, 0.5451, 0.1333),
'STROMA': (0.4, 0.4, 0.4),
'IMMUNE': (1, 1, 0),
'ENDOTHELIAL': (0.502, 0, 0.502)
}
# Retrieve the list of cell types
cell_types = list(custom_colors.keys())
# Extract the corresponding colors from the dictionary
color_values = [custom_colors[cell] for cell in cell_types]
# Display the colors
sb.palplot(sb.color_palette(color_values))
# Store in a dctionnary
celltype_color_dict = dict(zip(cell_types, color_values))
celltype_color_dict
# Save color information (mapping and legend) to metadata directory
# Create dataframe
celltype_color_df = color_dict_to_df(celltype_color_dict, "cell_type")
celltype_color_df.head()
# Save to file in metadatadirectory
filename = "celltype_color_data.csv"
filename = os.path.join(metadata_dir, filename)
celltype_color_df.to_csv(filename, index = False)
#print("File" + filename + " was created!")
# Legend of cell type info only
g = plt.figure(figsize = (1,1)).add_subplot(111)
g.axis('off')
handles = []
for item in celltype_color_dict.keys():
h = g.bar(0,0, color = celltype_color_dict[item],
label = item, linewidth =0)
handles.append(h)
first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),
filename = "Celltype_legend.png"
filename = os.path.join(metadata_images_dir, filename)
plt.savefig(filename, bbox_inches = 'tight')
filename = "celltype_color_data.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
#if not os.path.exists(filename):
# print("WARNING: Could not find desired file: "+filename)
#else :
# print("The",filename,"file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header = 0)
df = df.drop(columns = ['hex'])
# our tuple of float values for rgb, (r, g, b) was read in
# as a string '(r, g, b)'. We need to extract the r-, g-, and b-
# substrings and convert them back into floats
df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
# Verify size
#print("Verifying data read from file is the correct length...\n")
#verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
cell_type_color_dict = df.set_index('cell_type')['rgb'].to_dict()
# Print information
#print('cell_type_color_dict =\n',cell_type_color_dict)
# Colors dictionaries
sample_row_colors =df2.Sample_ID.map(sample_color_dict)
#print(sample_row_colors[1:5])
cell_type_row_colors = df2.cell_type.map(cell_type_color_dict)
#print(cell_type_row_colors[1:5])
# ## Cell Subtype Colours
import pandas as pd
import os
def rgb_tuple_from_str(rgb_str):
# Cleaning the string to remove any unexpected 'np.float64'
rgb_str = rgb_str.replace("(","").replace(")","").replace(" ","").replace("np.float64", "")
try:
rgb = list(map(float, rgb_str.split(",")))
return tuple(rgb)
except ValueError as e:
# print(f"Error converting {rgb_str} to floats: {e}")
return None # or handle the error as needed
filename = "cellsubtype_color_data.csv"
filename = os.path.join(metadata_dir, filename)
# Check file exists
#if not os.path.exists(filename):
# print("WARNING: Could not find desired file: " + filename)
#else:
# print("The", filename, "file was imported for further analysis!")
# Open, read in information
df = pd.read_csv(filename, header=0)
df = df.drop(columns=['hex'])
# Clean the 'rgb' column to remove unexpected strings
df['rgb'] = df['rgb'].str.replace("np.float64", "", regex=False)
# Apply the function to convert string to tuple of floats
df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis=1)
# Verify size
#print("Verifying data read from file is the correct length...\n")
# verify_line_no(filename, df.shape[0] + 1)
# Turn into dictionary
cell_subtype_color_dict = df.set_index('cell_subtype')['rgb'].to_dict()
# Print information
#print('cell_subtype_color_dict =\n', cell_subtype_color_dict)
df2
# Colors dictionaries
sample_row_colors =df2.Sample_ID.map(sample_color_dict)
#print(sample_row_colors[1:5])
cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict)
#print(cell_subtype_row_colors[1:5])
# #### Cell Type
df
#print(f"Loaded sample files: {ls_samples}")
selected_intensities = list(df.columns)
selected_intensities = list(df.columns)
#print(selected_intensities)
df
df2
df = df2
df
import json
import pandas as pd
import numpy as np
import panel as pn
import plotly.graph_objects as go
pn.extension('plotly')
# Load the selected intensities from the JSON file
with open(stored_variables_path, 'r') as f:
json_data = json.load(f)
ls_samples = json_data["ls_samples"]
#print(f"Loaded sample files: {ls_samples}")
# Checkbox group to select files
checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=ls_samples)
# Initially empty dropdowns for X and Y axis selection
x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[])
y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[])
# Input field for the number of random samples
random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100)
# Sliders for interactive X and Y lines
x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01)
y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01)
# Placeholder for the dot plot
plot_placeholder = pn.pane.Plotly()
# Placeholder for the digital reconstruction plot
reconstruction_placeholder = pn.pane.Plotly()
# Function to create the dot plot
def create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos):
if not selected_files:
# print("No files selected.")
return go.Figure()
keep = selected_files
test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
# print(f"Number of samples in test2_df: {len(test2_df)}")
if len(test2_df) > n_samples:
random_rows = np.random.choice(len(test2_df), n_samples)
test_df = test2_df.iloc[random_rows, :].copy()
else:
test_df = test2_df
# print(f"Number of samples in test_df: {len(test_df)}")
if x_axis not in test_df.columns or y_axis not in test_df.columns:
# print(f"Selected axes {x_axis} or {y_axis} not in DataFrame columns.")
return go.Figure()
fig = go.Figure()
title = 'Threshold'
fig.add_trace(go.Scatter(
x=test_df[x_axis],
y=test_df[y_axis],
mode='markers',
marker=dict(color='LightSkyBlue', size=2)
))
# Add vertical and horizontal lines
fig.add_vline(x=x_line_pos, line_width=2, line_dash="dash", line_color="red")
fig.add_hline(y=y_line_pos, line_width=2, line_dash="dash", line_color="red")
fig.update_layout(
title=title,
plot_bgcolor='white',
autosize=True,
margin=dict(l=20, r=20, t=40, b=20),
xaxis=dict(title=x_axis, linecolor='black', range=[test_df[x_axis].min(), test_df[x_axis].max()]),
yaxis=dict(title=y_axis, linecolor='black', range=[test_df[y_axis].min(), test_df[y_axis].max()])
)
return fig
def assign_cell_types_again():
with open(stored_variables_path, 'r') as file:
stored_variables = json.load(file)
intensities = list(df.columns)
def assign_cell_type(row):
for intensity in intensities:
marker = intensity.split('_')[0] # Extract marker from intensity name
if marker in stored_variables['thresholds']:
threshold = stored_variables['thresholds'][marker]
if row[intensity] > threshold:
for cell_type, markers in stored_variables['cell_type_classification'].items():
if marker in markers:
return cell_type
return 'STROMA' # Default if no condition matches
df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1)
return df
# Function to create the digital reconstruction plot
def create_reconstruction_plot(selected_files):
if not selected_files:
# print("No files selected.")
return go.Figure()
df = assign_cell_types_again()
fig = go.Figure()
for sample in selected_files:
sample_id = sample
sample_id2 = sample.split('_')[0]
location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_type']]
title = sample_id2 + " Background Subtracted XY Map cell types"
for celltype in df.loc[df['Sample_ID'] == sample_id, 'cell_type'].unique():
fig.add_scatter(
mode='markers',
marker=dict(size=3, opacity=0.5, color='rgb' + str(cell_type_color_dict[celltype])),
x=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_X'],
y=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_Y_Inv'],
name=celltype
)
fig.update_layout(
title=title,
plot_bgcolor='white',
autosize=True,
margin=dict(l=20, r=20, t=40, b=20),
legend=dict(
title='Cell Types',
font=dict(
family='Arial',
size=12,
color='black'
),
bgcolor='white',
bordercolor='black',
borderwidth=0.4,
itemsizing='constant'
),
xaxis=dict(title='Nuc_X', linecolor='black', range=[location_colors['Nuc_X'].min(), location_colors['Nuc_X'].max()]),
yaxis=dict(title='Nuc_Y_Inv', linecolor='black', range=[location_colors['Nuc_Y_Inv'].min(), location_colors['Nuc_Y_Inv'].max()])
)
return fig
def update_dropdown_options(event):
selected_files = checkbox_group.value
# print(f"Selected files in update_dropdown_options: {selected_files}")
if selected_files:
keep = selected_files
test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
selected_intensities = list(test2_df.columns)
selected_intensities = [col for col in selected_intensities if '_Intensity_Average' in col]
# print(f"Updated dropdown options: {selected_intensities}")
x_axis_dropdown.options = selected_intensities
y_axis_dropdown.options = selected_intensities
else:
x_axis_dropdown.options = []
y_axis_dropdown.options = []
def update_slider_ranges(event):
selected_files = checkbox_group.value
x_axis = x_axis_dropdown.value
y_axis = y_axis_dropdown.value
# print("Axis:",x_axis,y_axis)
if selected_files and x_axis and y_axis:
keep = selected_files
test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
x_range = (test2_df[x_axis].min(), test2_df[x_axis].max())
y_range = (test2_df[y_axis].min(), test2_df[y_axis].max())
# print("Ranges:",x_range,y_range)
x_line_slider.start = -abs(x_range[1])
x_line_slider.end = abs(x_range[1])
y_line_slider.start = -abs(y_range[1])
y_line_slider.end = abs(y_range[1])
x_line_slider.value = 0
y_line_slider.value = 0
def on_value_change(event):
selected_files = checkbox_group.value
x_axis = x_axis_dropdown.value
y_axis = y_axis_dropdown.value
n_samples = random_sample_input.value
x_line_pos = x_line_slider.value
y_line_pos = y_line_slider.value
# print(f"Selected files: {selected_files}")
# print(f"X-Axis: {x_axis}, Y-Axis: {y_axis}, Number of samples: {n_samples}, X Line: {x_line_pos}, Y Line: {y_line_pos}")
plot = create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos)
reconstruction_plot = create_reconstruction_plot(selected_files)
plot_placeholder.object = plot
reconstruction_placeholder.object = reconstruction_plot
# Link value changes to function
checkbox_group.param.watch(update_dropdown_options, 'value')
checkbox_group.param.watch(update_slider_ranges, 'value')
x_axis_dropdown.param.watch(update_slider_ranges, 'value')
y_axis_dropdown.param.watch(update_slider_ranges, 'value')
x_axis_dropdown.param.watch(on_value_change, 'value')
y_axis_dropdown.param.watch(on_value_change, 'value')
random_sample_input.param.watch(on_value_change, 'value')
x_line_slider.param.watch(on_value_change, 'value')
y_line_slider.param.watch(on_value_change, 'value')
# Layout
plot_with_reconstruction = pn.Column(
"## Select Files to Construct Dot Plot",
checkbox_group,
x_axis_dropdown,
y_axis_dropdown,
random_sample_input,
pn.Row(x_line_slider, y_line_slider),
pn.Row(
pn.Column(
"## Dot Plot",
pn.Column(plot_placeholder)),
pn.Column(
"## Digital Reconstruction Plot",
reconstruction_placeholder),
))
# Serve the app
#plot_with_reconstruction.show()
# ## MAKE HEATMAPS
# ### Cell Subtype
# Create data structure to hold everything we need for row/column annotations
# annotations is a dictionary
## IMPORTANT - if you use 'annotations', it MUST have both 'rows' and 'cols'
## objects inside. These can be empty lists, but they must be there!
anns = {}
# create a data structure to hold everything we need for only row annotations
# row_annotations is a list, where each item therein is a dictioary corresponding
# to all of the data pertaining to that particular annotation
# Adding each item (e.g., Sample, then Cluster), one at a time to ensure ordering
# is as anticipated on figure
row_annotations = []
row_annotations.append({'label':'Sample',
'type':'row',
'mapping':sample_row_colors,
'dict':sample_color_dict,
'location':'center left',
'bbox_to_anchor':(0.1, 0.9)})
row_annotations.append({'label':'Cell type',
'type':'row',
'mapping':cell_type_row_colors,
'dict':cell_type_color_dict,
'location':'center left',
'bbox_to_anchor':(0.17, 0.9)})
anns['rows'] = row_annotations
# Now we repeat the process for column annotations
col_annotations = []
anns['cols'] = col_annotations
# To simplify marker display in the following figures (heatmap, etc)
figure_marker_names = {key: value.split('_')[0] for key, value in full_to_short_names.items()}
not_intensities
df2
df2.drop('cell_subtype', axis = 'columns')
not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']
df2 = assign_cell_types_again()
df2.drop('cell_subtype', axis = 'columns')
df2.head()
# Save one heatmap
data = df
data
#print(data.columns)
# Selecting a subset of rows from df based on the 'Sample_ID' column
# and then random>ly choosing 50,000 rows from that subset to create the DataFrame test_df
with open(stored_variables_path, 'r') as file:
ls_samples = stored_vars['ls_samples']
keep = list(ls_samples)
keep_cell_type = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']
# Check the individual conditions
cell_type_condition = data['cell_type'].isin(keep_cell_type)
sample_id_condition = data['Sample_ID'].isin(keep)
#print("Cell type condition:")
#print(cell_type_condition.head())
#print("Sample ID condition:")
#print(sample_id_condition.head())
# Combine the conditions
combined_condition = cell_type_condition & sample_id_condition
#print("Combined condition:")
#print(combined_condition.head())
# Apply the combined condition to filter the DataFrame
test2_df = data.loc[combined_condition].copy()
#print("Filtered DataFrame:")
#print(test2_df.head())
#test2_df = data.loc[data['cell_type'].isin(keep_cell_type) & data['Sample_ID'].isin(keep)].copy()
#print("Test2_df",test2_df.head())
#print(len(test2_df))
#random_rows = np.random.choice(len(test2_df),len(test2_df))
random_rows = np.random.choice(len(test2_df),1000)
test_df = test2_df.iloc[random_rows,:].copy()
#print(len(test_df))
test_df
import json
import panel as pn
import param
import pandas as pd
# Initialize Panel extension
pn.extension('tabulator')
# Path to the stored variables file
file_path = stored_variables_path
# Load existing data from stored_variables.json with error handling
def load_data():
try:
with open(file_path, 'r') as file:
return json.load(file)
except json.JSONDecodeError as e:
print(f"Error reading JSON file: {e}")
return {}
data = load_data()
# Define markers, cell types, and cell subtypes from the loaded data
markers = data.get('markers', [])
cell_types = data.get('cell_type', [])
cell_subtypes = data.get('cell_subtype', [])
# Sanitize option names
def sanitize_options(options):
return [opt.replace(' ', '_').replace('+', 'plus').replace('α', 'a').replace("'", "") for opt in options]
sanitized_cell_types = sanitize_options(cell_types)
sanitized_cell_subtypes = sanitize_options(cell_subtypes)
# Helper function to create a Parameterized class and DataFrame
def create_classification_df(items, item_label):
params = {item_label: param.String()}
for marker in markers:
params[marker] = param.Boolean(default=False)
Classification = type(f'{item_label}Classification', (param.Parameterized,), params)
classification_widgets = []
for item in items:
item_params = {marker: False for marker in markers}
item_params[item_label] = item
classification_widgets.append(Classification(**item_params))
classification_df = pd.DataFrame([cw.param.values() for cw in classification_widgets])
classification_df = classification_df[[item_label] + markers]
return classification_df
# Create DataFrames for cell types and cell subtypes
cell_type_df = create_classification_df(sanitized_cell_types, 'CELL_TYPE')
cell_subtype_df = create_classification_df(sanitized_cell_subtypes, 'CELL_SUBTYPE')
# Define formatters for Tabulator widgets
tabulator_formatters = {marker: {'type': 'tickCross'} for marker in markers}
# Create Tabulator widgets
cell_type_table = pn.widgets.Tabulator(cell_type_df, formatters=tabulator_formatters)
cell_subtype_table = pn.widgets.Tabulator(cell_subtype_df, formatters=tabulator_formatters)
# Save functions for cell types and cell subtypes
def save_data(table, classification_key, item_label):
current_data = table.value
df_bool = current_data.replace({'✔': True, '✘': False})
classification = {}
for i, row in df_bool.iterrows():
item = row[item_label]
selected_markers = [marker for marker in markers if row[marker]]
classification[item] = selected_markers
data[classification_key] = classification
# try:
with open(file_path, 'w') as file:
json.dump(data, file, indent=4)
# print(f"{classification_key} saved successfully.")
# except IOError as e:
# print(f"Error writing JSON file: {e}")
# Button actions
def save_cell_type_selections(event):
save_data(cell_type_table, 'cell_type_classification', 'CELL_TYPE')
def save_cell_subtype_selections(event):
save_data(cell_subtype_table, 'cell_subtype_classification', 'CELL_SUBTYPE')
# Create save buttons
save_cell_type_button = pn.widgets.Button(name='Save Cell Type Selections', button_type='primary')
save_cell_type_button.on_click(save_cell_type_selections)
save_cell_subtype_button = pn.widgets.Button(name='Save Cell Subtype Selections', button_type='primary')
save_cell_subtype_button.on_click(save_cell_subtype_selections)
cell_type_classification_app_main = pn.Column(
pn.pane.Markdown("# Cell Type Classification"),
cell_type_table,
save_cell_type_button
)
cell_subtype_classification_app_main = pn.Column(
pn.pane.Markdown("# Cell Subtype Classification"),
cell_subtype_table,
save_cell_subtype_button
)
#cell_subtype_classification_app_main.show()
import json
import panel as pn
# Load existing stored variables
with open(stored_variables_path, 'r') as f:
stored_variables = json.load(f)
# Initialize a dictionary to hold threshold inputs
subtype_threshold_inputs = {}
# Create widgets for each marker to get threshold inputs from the user
for marker in stored_variables['markers']:
subtype_threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1)
try:
with open(stored_variables_path, 'r') as f:
stored_variables = json.load(f)
except FileNotFoundError:
stored_variables = {}
# Check if 'thresholds' field is present, if not, add it
if 'subtype_thresholds' not in stored_variables:
subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()}
stored_variables['subtype_thresholds'] = subtype_thresholds
with open(stored_variables_path, 'w') as f:
json.dump(stored_variables, f, indent=4)
# Save button to save thresholds to stored_variables.json
def save_thresholds(event):
subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()}
stored_variables['subtype_thresholds'] = subtype_thresholds
with open(stored_variables_path, 'w') as f:
json.dump(stored_variables, f, indent=4)
save_button = pn.widgets.Button(name='Save Thresholds', button_type='primary')
save_button.on_click(save_thresholds)
# Create a GridSpec layout
subtype_grid = pn.GridSpec()
# Add the widgets to the grid with five per row
row = 0
col = 0
for marker in stored_variables['markers']:
subtype_grid[row, col] = subtype_threshold_inputs[marker]
col += 1
if col == 5:
col = 0
row += 1
# Add the save button at the end, spanning across all columns of the new row
subtype_grid[row + 1, :5] = save_button
# Panel layout
subtype_threshold_panel = pn.Column(
pn.pane.Markdown("## Define Thresholds for Markers"),
subtype_grid)
# Display the panel
#subtype_threshold_panel.show()
with open(stored_variables_path, 'r') as file:
stored_variables = json.load(file)
intensities = list(df.columns)
def assign_cell_subtypes(row):
for intensity in intensities:
marker = intensity.split('_')[0] # Extract marker from intensity name
if marker in stored_variables['subtype_thresholds']:
threshold = stored_variables['subtype_thresholds'][marker]
if row[intensity] > threshold:
for cell_subtype, markers in stored_variables['cell_subtype_classification'].items():
if marker in markers:
return cell_subtype
return 'DC'
df = assign_cell_types_again()
df['cell_subtype'] = df.apply(lambda row: assign_cell_subtypes(row), axis=1)
df
data
# Define a color dictionary
cell_subtype_color_dict = {
'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),
'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),
'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313),
'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6),
'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),
'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),
'M2': (1.0, 0.4980392156862745, 0.0),
'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),
'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),
'Cancer': (1.0, 1.0, 0.6),
'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392),
'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765)
}
# Add the 'rgb' prefix to the colors
cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()}
# Load stored variables from JSON file
def load_stored_variables(path):
with open(path, 'r') as file:
return json.load(file)
# Get subtype intensities columns
subtype_intensities = [col for col in df.columns if '_Intensity_Average' in col]
# Assign cell subtype based on thresholds and classifications
def assign_cell_subtype(row):
#print("new_row")
stored_variables = load_stored_variables(stored_variables_path)
for subtype_intensity in subtype_intensities:
marker = subtype_intensity.split('_')[0]
if marker in stored_variables['subtype_thresholds']:
subtype_threshold = stored_variables['subtype_thresholds'][marker]
if row[subtype_intensity] > subtype_threshold:
for cell_subtype, markers in stored_variables['cell_subtype_classification'].items():
#print(cell_subtype,marker,markers)
if marker in markers:
#print("Markers:",marker)
return cell_subtype # Return the assigned subtype
return 'DC' # Default value if no conditions match
# Main function to assign cell subtypes to DataFrame
def assign_cell_subtypes_again():
df['cell_subtype'] = df.apply(lambda row: assign_cell_subtype(row), axis=1)
return df
import json
import pandas as pd
import numpy as np
import panel as pn
import plotly.graph_objects as go
pn.extension('plotly')
# Load the selected intensities from the JSON file
with open(stored_variables_path, 'r') as f:
json_data = json.load(f)
subtype_ls_samples = json_data["ls_samples"]
#print(f"Loaded sample files: {subtype_ls_samples}")
# Checkbox group to select files
subtype_checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=subtype_ls_samples)
# Initially empty dropdowns for X and Y axis selection
subtype_x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[])
subtype_y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[])
# Input field for the number of random samples
subtype_random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100)
# Sliders for interactive X and Y lines
subtype_x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01)
subtype_y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01)
# Placeholder for the dot plot
subtype_plot_placeholder = pn.pane.Plotly()
# Placeholder for the digital reconstruction plot
subtype_reconstruction_placeholder = pn.pane.Plotly()
def update_color_dict():
# Define a color dictionary
cell_subtype_color_dict = {
'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),
'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),
'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313),
'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6),
'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),
'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),
'M2': (1.0, 0.4980392156862745, 0.0),
'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),
'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),
'Cancer': (1.0, 1.0, 0.6),
'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392),
'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765)
}
# Add the 'rgb' prefix to the colors
cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()}
return cell_subtype_color_dict
# Function to create the dot plot
def create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos):
if not subtype_selected_files:
# print("No files selected.")
return go.Figure()
subtype_keep = subtype_selected_files
# print(df)
subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
#subtype_test2_df = df.loc[df['Sample_ID'].isin('TMA.csv'), :].copy()
# print(f"Number of samples in test2_df: {len(subtype_test2_df)}")
if len(subtype_test2_df) > subtype_n_samples:
subtype_random_rows = np.random.choice(len(subtype_test2_df), subtype_n_samples)
subtype_test_df = subtype_test2_df.iloc[subtype_random_rows, :].copy()
else:
subtype_test_df = subtype_test2_df
# print(f"Number of samples in test_df: {len(subtype_test_df)}")
if subtype_x_axis not in subtype_test_df.columns or subtype_y_axis not in subtype_test_df.columns:
# print(f"Selected axes {subtype_x_axis} or {subtype_y_axis} not in DataFrame columns.")
return go.Figure()
fig = go.Figure()
title = 'Threshold'
fig.add_trace(go.Scatter(
x=subtype_test_df[subtype_x_axis],
y=subtype_test_df[subtype_y_axis],
mode='markers',
marker=dict(color='LightSkyBlue', size=2)
))
# Add vertical and horizontal lines
fig.add_vline(x=subtype_x_line_pos, line_width=2, line_dash="dash", line_color="red")
fig.add_hline(y=subtype_y_line_pos, line_width=2, line_dash="dash", line_color="red")
fig.update_layout(
title=title,
plot_bgcolor='white',
autosize=True,
margin=dict(l=20, r=20, t=40, b=20),
xaxis=dict(title=subtype_x_axis, linecolor='black', range=[subtype_test_df[subtype_x_axis].min(), subtype_test_df[subtype_x_axis].max()]),
yaxis=dict(title=subtype_y_axis, linecolor='black', range=[subtype_test_df[subtype_y_axis].min(), subtype_test_df[subtype_y_axis].max()])
)
return fig
def create_subtype_reconstruction_plot(subtype_selected_files):
cell_subtype_color_dict = update_color_dict()
# print(subtype_selected_files)
if not subtype_selected_files:
# print("No files selected.")
return go.Figure()
df = assign_cell_subtypes_again()
subtype_fig = go.Figure()
for sample in subtype_selected_files:
sample_id = sample
sample_id2 = sample.split('_')[0]
location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_subtype']]
# print(location_colors.head())
title = sample_id2 + " Background Subtracted XY Map cell subtypes"
for cellsubtype in df.loc[df['Sample_ID'] == sample_id, 'cell_subtype'].unique():
color = str(cell_subtype_color_dict[cellsubtype])
subtype_fig.add_scatter(
mode='markers',
marker=dict(size=3, opacity=0.5, color=color),
x=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_X'],
y=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_Y_Inv'],
name=cellsubtype
)
subtype_fig.update_layout(title=title, plot_bgcolor='white')
subtype_fig.update_xaxes(title_text='Nuc_X', linecolor='black')
subtype_fig.update_yaxes(title_text='Nuc_Y_Inv', linecolor='black')
# Adjust the size of the points
for trace in subtype_fig.data:
trace.marker.size = 2
subtype_fig.update_layout(
title=title,
plot_bgcolor='white',
legend=dict(
title='Cell Subtypes', # Legend title
font=dict(
family='Arial',
size=12,
color='black'
),
bgcolor='white',
bordercolor='black',
borderwidth=0.4,
itemsizing='constant'
)
)
# Save the figure as an image if needed
#subtype_fig.write_image(output_images_dir + "/" + title.replace(" ", "_") + ".png", width=1200, height=800, scale=4)
# print(sample_id, "processed!")
return subtype_fig
def update_subtype_dropdown_options(event):
# print(1)
subtype_selected_files = subtype_checkbox_group.value
# print(f"Selected files in update_dropdown_options: {subtype_selected_files}")
if subtype_selected_files:
subtype_keep = subtype_selected_files
subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
subtype_selected_intensities = list(subtype_test2_df.columns)
subtype_selected_intensities = [col for col in subtype_selected_intensities if '_Intensity_Average' in col]
# print(f"Updated dropdown options: {subtype_selected_intensities}")
subtype_x_axis_dropdown.options = subtype_selected_intensities
subtype_y_axis_dropdown.options = subtype_selected_intensities
else:
subtype_x_axis_dropdown.options = []
subtype_y_axis_dropdown.options = []
def update_subtype_slider_ranges(event):
subtype_selected_files = subtype_checkbox_group.value
subtype_x_axis = subtype_x_axis_dropdown.value
subtype_y_axis = subtype_y_axis_dropdown.value
if subtype_selected_files and subtype_x_axis and subtype_y_axis:
subtype_keep = subtype_selected_files
subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
subtype_x_range = (subtype_test2_df[subtype_x_axis].min(), subtype_test2_df[subtype_x_axis].max())
subtype_y_range = (subtype_test2_df[subtype_y_axis].min(), subtype_test2_df[subtype_y_axis].max())
subtype_x_line_slider.start = -abs(subtype_x_range[1])
subtype_x_line_slider.end = abs(subtype_x_range[1])
subtype_y_line_slider.start = -abs(subtype_y_range[1])
subtype_y_line_slider.end = abs(subtype_y_range[1])
subtype_x_line_slider.value = 0
subtype_y_line_slider.value = 0
def on_subtype_value_change(event):
subtype_selected_files = subtype_checkbox_group.value
subtype_x_axis = subtype_x_axis_dropdown.value
subtype_y_axis = subtype_y_axis_dropdown.value
subtype_n_samples = subtype_random_sample_input.value
subtype_x_line_pos = subtype_x_line_slider.value
subtype_y_line_pos = subtype_y_line_slider.value
# print(f"Selected files: {subtype_selected_files}")
# print(f"X-Axis: {subtype_x_axis}, Y-Axis: {subtype_y_axis}, Number of samples: {subtype_n_samples}, X Line: {subtype_x_line_pos}, Y Line: {subtype_y_line_pos}")
subtype_plot = create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos)
subtype_reconstruction_plot = create_subtype_reconstruction_plot(subtype_selected_files)
subtype_plot_placeholder.object = subtype_plot
subtype_reconstruction_placeholder.object = subtype_reconstruction_plot
# Link value changes to function
subtype_checkbox_group.param.watch(update_subtype_dropdown_options, 'value')
subtype_checkbox_group.param.watch(update_subtype_slider_ranges, 'value')
subtype_x_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value')
subtype_y_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value')
subtype_x_axis_dropdown.param.watch(on_subtype_value_change, 'value')
subtype_y_axis_dropdown.param.watch(on_subtype_value_change, 'value')
subtype_random_sample_input.param.watch(on_subtype_value_change, 'value')
subtype_x_line_slider.param.watch(on_subtype_value_change, 'value')
subtype_y_line_slider.param.watch(on_subtype_value_change, 'value')
# Layout
plot_with_subtype_reconstruction = pn.Column(
"## Select Files to Construct Dot Plot",
subtype_checkbox_group,
subtype_x_axis_dropdown,
subtype_y_axis_dropdown,
subtype_random_sample_input,
pn.Row(subtype_x_line_slider, subtype_y_line_slider),
pn.Row(
pn.Column(
"## Dot Plot",
pn.Column(subtype_plot_placeholder)),
pn.Column(
"## Cell Subtype Digital Reconstruction Plot",
subtype_reconstruction_placeholder),
)
)
subtype_x_axis = subtype_x_axis_dropdown.value
subtype_y_axis = subtype_y_axis_dropdown.value
#print(subtype_x_axis ,subtype_y_axis)
# Normalize the values in df2.cell_subtype
df2['cell_subtype'] = df2['cell_subtype'].str.strip().str.lower()
# Normalize the keys in cell_subtype_color_dict
cell_subtype_color_dict = {k.strip().lower(): v for k, v in cell_subtype_color_dict.items()}
# Map the cell_subtype values to colors
cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict)
# Debugging: print the unique values and the resulting mapped colors
#print("Unique values in df2.cell_subtype:", df2.cell_subtype.unique())
#print("Keys in cell_subtype_color_dict:", cell_subtype_color_dict.keys())
#print(cell_subtype_row_colors[1:5])
data
cell_subtype_color_dict
# Remove the 'rgb' prefix
cell_subtype_color_dict = {k: v[3:] for k, v in cell_subtype_color_dict.items()}
cell_subtype_color_dict
# Colors dictionaries
sample_row_colors =df.Sample_ID.map(sample_color_dict)
#print(sample_row_colors[1:5])
cell_subtype_row_colors = df.cell_subtype.map(cell_subtype_color_dict)
#print(cell_subtype_row_colors[1:5])
# Count of each immune_checkpoint type by cell_subtype
counts = df.groupby(['cell_type', 'cell_subtype']).size().reset_index(name='count')
counts
total = sum(counts['count'])
counts['percentage'] = counts.groupby('cell_subtype')['count'].transform(lambda x: (x / total) * 100)
#print(counts)
# ## IV.10. SAVE
# Save the data by Sample_ID
# Check for the existence of the output file first
for sample in ls_samples:
#sample_id = sample.split('_')[0]
sample_id = sample
filename = os.path.join(output_data_dir, sample_id + "_" + step_suffix + ".csv")
if os.path.exists(filename):
df_save = df.loc[df['Sample_ID'] == sample_id, :]
df_save.to_csv(filename, index=True, index_label='ID', mode='w') # 'mode='w'' overwrites the file
# print("File " + filename + " was overwritten!")
else:
df_save = df.loc[df['Sample_ID'] == sample_id, :]
df_save.to_csv(filename, index=True, index_label='ID') # Save normally if the file doesn't exist
# print("File " + filename + " was created and saved !")
# All samples
filename = os.path.join(output_data_dir, "all_Samples_" + project_name + ".csv")
# Save the DataFrame to a CSV file
df.to_csv(filename, index=True, index_label='ID')
#print("Merged file " + filename + " created!")
# ## Panel App
# Create widgets and panes
df_widget = pn.widgets.DataFrame(metadata, name="MetaData")
# Define the three tabs content
metadata_tab = pn.Column(pn.pane.Markdown("## Initial DataFrame"),intial_df)
dotplot_tab = pn.Column(plot_with_reconstruction)
celltype_classification_tab = pn.Column(cell_type_classification_app_main, threshold_panel)
cellsubtype_classification_tab = pn.Column(cell_subtype_classification_app_main, subtype_threshold_panel)
subtype_dotplot_tab = pn.Column(plot_with_subtype_reconstruction,)
app4_5 = pn.template.GoldenTemplate(
site="Cyc-IF",
title="Marker Threshold & Classification",
main=[
pn.Tabs(
("Metadata", metadata_tab),
("Classify-Celltype-Marker",celltype_classification_tab),
("Cell_Types", dotplot_tab),
("Classify-Cell Subtype-Marker",cellsubtype_classification_tab),
("Cell-Subtypes", subtype_dotplot_tab),
# ("Heatmap",pn.Column(celltype_heatmap, cell_subtype_heatmap))
)
]
)
app4_5.show()