KashyapiNagaHarshitha commited on
Commit
6372547
·
verified ·
1 Parent(s): ea9e947

Upload Step5_Marker_Threshold_Classification.py

Browse files
Step5_Marker_Threshold_Classification.py ADDED
@@ -0,0 +1,1508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+ # # IV. MARKERS TRESHOLDS NOTEBOOK
4
+ # ## IV.1. PACKAGES IMPORT
5
+
6
+ import os
7
+ import random
8
+ import re
9
+ import pandas as pd
10
+ import numpy as np
11
+ import seaborn as sb
12
+ import matplotlib.pyplot as plt
13
+ import matplotlib.colors as mplc
14
+ import subprocess
15
+ import warnings
16
+ import panel as pn
17
+ import json
18
+ from scipy import signal
19
+ from scipy.stats import pearsonr
20
+ import plotly.figure_factory as ff
21
+ import plotly
22
+ import plotly.graph_objs as go
23
+ from plotly.subplots import make_subplots
24
+ from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
25
+ import plotly.express as px
26
+ import sys
27
+ sys.setrecursionlimit(5000)
28
+ from my_modules import *
29
+ #Silence FutureWarnings & UserWarnings
30
+ warnings.filterwarnings('ignore', category= FutureWarning)
31
+ warnings.filterwarnings('ignore', category= UserWarning)
32
+
33
+
34
+ # ## IV.2. *DIRECTORIES
35
+ # Set base directory
36
+ #input_path = '/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431'
37
+ #set_path = 'test'
38
+ present_dir = os.path.dirname(os.path.realpath(__file__))
39
+ stored_variables_path = os.path.join(present_dir,'stored_variables.json')
40
+ with open(stored_variables_path, 'r') as file:
41
+ stored_vars = json.load(file)
42
+ directory = stored_vars['base_dir']
43
+ input_path = os.path.join(present_dir,directory)
44
+ set_path = stored_vars['set_path']
45
+ selected_metadata_files = stored_vars['selected_metadata_files']
46
+ ls_samples = stored_vars['ls_samples']
47
+ base_dir = input_path
48
+ set_name = set_path
49
+ project_name = set_name # Project name
50
+ step_suffix = 'mt' # Curent part (here part IV)
51
+ previous_step_suffix_long = "_zscore" # Previous part (here ZSCORE NOTEBOOK)
52
+
53
+ # Initial input data directory
54
+ input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)
55
+
56
+ # ZSCORE/LOG2 output directories
57
+ output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
58
+ # ZSCORE/LOG2 images subdirectory
59
+ output_images_dir = os.path.join(output_data_dir,"images")
60
+
61
+ # Data and Metadata directories
62
+ # Metadata directories
63
+ metadata_dir = os.path.join(base_dir, project_name + "_metadata")
64
+ # images subdirectory
65
+ metadata_images_dir = os.path.join(metadata_dir,"images")
66
+
67
+ # Create directories if they don't already exist
68
+ #for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
69
+ # if not os.path.exists(d):
70
+ #print("Creation of the" , d, "directory...")
71
+ # os.makedirs(d)
72
+ #else :
73
+ # print("The", d, "directory already exists !")
74
+
75
+ #os.chdir(input_data_dir)
76
+
77
+
78
+ # Verify paths
79
+ #print('base_dir :', base_dir)
80
+ #print('input_data_dir :', input_data_dir)
81
+ #print('output_data_dir :', output_data_dir)
82
+ #print('output_images_dir :', output_images_dir)
83
+ #print('metadata_dir :', metadata_dir)
84
+ #print('metadata_images_dir :', metadata_images_dir)
85
+
86
+
87
+ # ## IV.3. FILES
88
+
89
+ # ### IV.3.1. METADATA
90
+
91
+
92
+ filename = "marker_intensity_metadata.csv"
93
+ filename = os.path.join(metadata_dir, filename)
94
+
95
+ # Check file exists
96
+ #if not os.path.exists(filename):
97
+ # print("WARNING: Could not find desired file: "+filename)
98
+ #else :
99
+ # print("The",filename,"file was imported for further analysis!")
100
+
101
+ # Open, read in information
102
+ metadata = pd.read_csv(filename)
103
+
104
+ # Verify size with verify_line_no() function in my_modules.py
105
+ #verify_line_no(filename, metadata.shape[0] + 1)
106
+
107
+ # Verify headers
108
+ exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']
109
+ compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")
110
+
111
+ metadata = metadata.dropna()
112
+ metadata.head()
113
+
114
+
115
+ # ### IV.3.2. NOT_INTENSITIES
116
+ filename = "not_intensities.csv"
117
+ filename = os.path.join(metadata_dir, filename)
118
+
119
+ # Check file exists
120
+ #if not os.path.exists(filename):
121
+ # print("WARNING: Could not find desired file: "+filename)
122
+ #else :
123
+ # print("The",filename,"file was imported for further analysis!")
124
+
125
+ not_intensities = []
126
+ with open(filename, 'r') as fh:
127
+ not_intensities = fh.read().strip().split("\n")
128
+ # take str, strip whitespace, split on new line character
129
+
130
+ # Verify size
131
+ #print("\nVerifying data read from file is the correct length...\n")
132
+ #verify_line_no(filename, len(not_intensities))
133
+
134
+ # Print to console
135
+ #print("not_intensities =\n", not_intensities)
136
+
137
+
138
+ # ### IV.3.3. FULL_TO_SHORT_COLUMN_NAMES
139
+
140
+ filename = "full_to_short_column_names.csv"
141
+ filename = os.path.join(metadata_dir, filename)
142
+
143
+ # Check file exists
144
+ #if not os.path.exists(filename):
145
+ # print("WARNING: Could not find desired file: " + filename)
146
+ #else :
147
+ # print("The",filename,"file was imported for further analysis!")
148
+
149
+ # Open, read in information
150
+ df = pd.read_csv(filename, header = 0)
151
+
152
+ # Verify size
153
+ print("Verifying data read from file is the correct length...\n")
154
+ #verify_line_no(filename, df.shape[0] + 1)
155
+
156
+ # Turn into dictionary
157
+ full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]
158
+ #print('full_to_short_names =\n',full_to_short_names)
159
+
160
+
161
+ # ### IV.3.4. SHORT_TO_FULL_COLUMN_NAMES
162
+
163
+
164
+ filename = "short_to_full_column_names.csv"
165
+ filename = os.path.join(metadata_dir, filename)
166
+
167
+ # Check file exists
168
+ #if not os.path.exists(filename):
169
+ # print("WARNING: Could not find desired file: " + filename)
170
+ #else :
171
+ # print("The",filename,"file was imported for further analysis!")
172
+
173
+ # Open, read in information
174
+ df = pd.read_csv(filename, header = 0)
175
+
176
+ # Verify size
177
+ #print("Verifying data read from file is the correct length...\n")
178
+ #verify_line_no(filename, df.shape[0] + 1)
179
+
180
+ # Turn into dictionary
181
+ short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]
182
+ # Print information
183
+ #print('short_to_full_names =\n',short_to_full_names)
184
+
185
+
186
+ # ### IV.3.10. DATA
187
+
188
+ # List files in the directory
189
+ # Check if the directory exists
190
+ if os.path.exists(input_data_dir):
191
+ # List files in the directory
192
+ ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_zscore.csv")]
193
+ # print("The following CSV files were detected:")
194
+ # print([sample for sample in ls_samples])
195
+ #else:
196
+ # print(f"The directory {input_data_dir} does not exist.")
197
+ # Import all the others files
198
+ dfs = {}
199
+
200
+ # Set variable to hold default header values
201
+ # First gather information on expected headers using first file in ls_samples
202
+ # Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
203
+ df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
204
+ expected_headers = df.columns.values
205
+ #print('Header order should be :\n', expected_headers, '\n')
206
+
207
+ ###############################
208
+ # !! This may take a while !! #
209
+ ###############################
210
+ for sample in ls_samples:
211
+ file_path = os.path.join(input_data_dir,sample)
212
+
213
+ try:
214
+ # Read the CSV file
215
+ df = pd.read_csv(file_path, index_col=0)
216
+ # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it
217
+
218
+ if not df.empty:
219
+ # Reorder the columns to match the expected headers list
220
+ df = df.reindex(columns=expected_headers)
221
+ # print(sample, "file is processed !\n")
222
+ #print(df)
223
+
224
+ except pd.errors.EmptyDataError:
225
+ # print(f'\nEmpty data error in {sample} file. Removing from analysis...')
226
+ ls_samples.remove(sample)
227
+
228
+ # Add df to dfs
229
+ dfs[sample] = df
230
+
231
+ #print(dfs)
232
+
233
+ # Merge dfs into one df
234
+ df = pd.concat(dfs.values(), ignore_index=False , sort = False)
235
+ del dfs
236
+
237
+ print(df.head())
238
+
239
+ intial_df = pn.pane.DataFrame(df.head(40), width = 2500)
240
+
241
+
242
+ # ### Marker Classification
243
+
244
+ # ## IV.5. *DOTPLOTS
245
+
246
+ df
247
+ # Load existing data from stored_variables.json with error handling
248
+ try:
249
+ with open(stored_variables_path, 'r') as file:
250
+ data = json.load(file)
251
+ except json.JSONDecodeError as e:
252
+ # print(f"Error reading JSON file: {e}")
253
+ data = {}
254
+
255
+ # Debug: Print loaded data to verify keys
256
+ #print(data)
257
+
258
+ df
259
+ df.head()
260
+
261
+
262
+ # ### IV.7.2. DOTPLOTS-DETERMINED TRESHOLD
263
+ #Empty dict in stored_variables to store the cell type classification for each marker
264
+ #stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
265
+ try:
266
+ with open(stored_variables_path, 'r') as f:
267
+ stored_variables = json.load(f)
268
+ except FileNotFoundError:
269
+ stored_variables = {}
270
+
271
+ # Check if 'thresholds' field is present, if not, add it
272
+ if 'cell_type_classification' not in stored_variables:
273
+ cell_type_classification = {}
274
+ stored_variables['cell_type_classification'] = cell_type_classification
275
+ with open(stored_variables_path, 'w') as f:
276
+ json.dump(stored_variables, f, indent=4)
277
+
278
+ #Empty dict in stored_variables to store the cell subtype classification for each marker
279
+ #stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
280
+ try:
281
+ with open(stored_variables_path, 'r') as f:
282
+ stored_variables = json.load(f)
283
+ except FileNotFoundError:
284
+ stored_variables = {}
285
+
286
+ # Check if 'thresholds' field is present, if not, add it
287
+ if 'cell_subtype_classification' not in stored_variables:
288
+ cell_type_classification = {}
289
+ stored_variables['cell_subtype_classification'] = cell_type_classification
290
+ with open(stored_variables_path, 'w') as f:
291
+ json.dump(stored_variables, f, indent=4)
292
+
293
+ df
294
+ data = df
295
+
296
+
297
+ import json
298
+ import panel as pn
299
+
300
+ # Load existing stored variables
301
+ with open(stored_variables_path, 'r') as f:
302
+ stored_variables = json.load(f)
303
+
304
+ # Initialize a dictionary to hold threshold inputs
305
+ threshold_inputs = {}
306
+
307
+ # Create widgets for each marker to get threshold inputs from the user
308
+ for marker in stored_variables['markers']:
309
+ threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1)
310
+
311
+ # Load stored_variables.json
312
+ #stored_variables_path = '/Users/harshithakolipaka/Downloads/stored_variables.json'
313
+ try:
314
+ with open(stored_variables_path, 'r') as f:
315
+ stored_variables = json.load(f)
316
+ except FileNotFoundError:
317
+ stored_variables = {}
318
+
319
+ # Check if 'thresholds' field is present, if not, add it
320
+ if 'thresholds' not in stored_variables:
321
+ thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()}
322
+ stored_variables['thresholds'] = thresholds
323
+ with open(stored_variables_path, 'w') as f:
324
+ json.dump(stored_variables, f, indent=4)
325
+
326
+ # Save button to save thresholds to stored_variables.json
327
+ def save_thresholds(event):
328
+ thresholds = {marker: input_widget.value for marker, input_widget in threshold_inputs.items()}
329
+ stored_variables['thresholds'] = thresholds
330
+ with open(stored_variables_path, 'w') as f:
331
+ json.dump(stored_variables, f, indent=4)
332
+ pn.state.notifications.success('Thresholds saved successfully!')
333
+
334
+ save_button2 = pn.widgets.Button(name='Save Thresholds', button_type='primary')
335
+ save_button2.on_click(save_thresholds)
336
+
337
+ # Create a GridSpec layout
338
+ grid = pn.GridSpec()
339
+
340
+ # Add the widgets to the grid with three per row
341
+ row = 0
342
+ col = 0
343
+ for marker in stored_variables['markers']:
344
+ grid[row, col] = threshold_inputs[marker]
345
+ col += 1
346
+ if col == 5:
347
+ col = 0
348
+ row += 1
349
+
350
+ # Add the save button at the end
351
+ grid[row + 1, :5] = save_button2
352
+
353
+ # Panel layout
354
+ threshold_panel = pn.Column(
355
+ pn.pane.Markdown("## Define Thresholds for Markers"),
356
+ grid)
357
+
358
+
359
+ import pandas as pd
360
+ import json
361
+
362
+ # Load stored variables from the JSON file
363
+ with open(stored_variables_path, 'r') as file:
364
+ stored_variables = json.load(file)
365
+ # Step 1: Identify intensities
366
+ intensities = list(df.columns)
367
+
368
+ def assign_cell_type(row):
369
+ for intensity in intensities:
370
+ marker = intensity.split('_')[0] # Extract marker from intensity name
371
+ if marker in stored_variables['thresholds']:
372
+ threshold = stored_variables['thresholds'][marker]
373
+ if row[intensity] > threshold:
374
+ for cell_type, markers in stored_variables['cell_type_classification'].items():
375
+ if marker in markers:
376
+ return cell_type
377
+ return 'STROMA' # Default if no condition matches
378
+
379
+ # Step 5: Apply the classification function to the DataFrame
380
+ df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1)
381
+ df.head()
382
+ # Check if 'IMMUNE' is present in any row of the cell_type column
383
+ present_stroma = df['cell_type'].str.contains('STROMA').sum()
384
+ present_cancer = df['cell_type'].str.contains('CANCER').sum()
385
+ present_immune = df['cell_type'].str.contains('IMMUNE').sum()
386
+ present_endothelial = df['cell_type'].str.contains('ENDOTHELIAL').sum()
387
+ # Print the result
388
+ #print(present_stroma)
389
+ #print(present_cancer)
390
+ #print(present_immune)
391
+ #print(present_endothelial)
392
+ #print(len(df))
393
+ df.head(30)
394
+ df
395
+
396
+ # ## IV.8. *HEATMAPS
397
+ #print(df.columns)
398
+ # Assuming df_merged is your DataFrame
399
+ if 'Sample_ID.1' in df.columns:
400
+ df = df.rename(columns={'Sample_ID.1': 'Sample_ID'})
401
+ # print("After renaming Sample_ID", df.columns)
402
+ # Selecting a subset of rows from the DataFrame df based on the 'Sample_ID' column
403
+ # and then randomly choosing 20,000 rows from that subset to create the DataFrame test_dfkeep = ['TMA.csv']
404
+ with open(stored_variables_path, 'r') as file:
405
+ ls_samples = stored_vars['ls_samples']
406
+ keep = ls_samples
407
+
408
+ keep_cell_type = ['ENDOTHELIAL','CANCER', 'STROMA', 'IMMUNE']
409
+ #if 'Sample_ID' in df.columns:
410
+ # print("The",df.loc[df['cell_type'].isin(keep_cell_type)])
411
+ test2_df = df.loc[(df['cell_type'].isin(keep_cell_type))
412
+ & (df['Sample_ID'].isin(keep)), :].copy()
413
+ #print(test2_df.head())
414
+
415
+ random_rows = np.random.choice(len(test2_df),20000)
416
+ df2 = test2_df.iloc[random_rows,:].copy()
417
+
418
+ df2
419
+ #print(df2)
420
+
421
+
422
+ # ### COLORS
423
+
424
+ # #### SAMPLES COLORS
425
+ color_values = sb.color_palette("husl",n_colors = len(ls_samples))
426
+ sb.palplot(sb.color_palette(color_values))
427
+
428
+ TMA_samples = [s for s in df.Sample_ID.unique() if 'TMA' in s]
429
+ TMA_color_values = sb.color_palette(n_colors = len(TMA_samples),palette = "gray")
430
+ sb.palplot(sb.color_palette(TMA_color_values))
431
+
432
+ # Store in a dictionary
433
+ color_dict = dict()
434
+ color_dict = dict(zip(df.Sample_ID.unique(), color_values))
435
+
436
+ # Replace all TMA samples' colors with gray
437
+ i = 0
438
+ for key in color_dict.keys():
439
+ if 'TMA' in key:
440
+ color_dict[key] = TMA_color_values[i]
441
+ i +=1
442
+
443
+ color_dict
444
+
445
+ color_df_sample = color_dict_to_df(color_dict, "Sample_ID")
446
+
447
+ # Save to file in metadatadirectory
448
+ filename = "sample_color_data.csv"
449
+ filename = os.path.join(metadata_dir, filename)
450
+ color_df_sample.to_csv(filename, index = False)
451
+
452
+ color_df_sample
453
+
454
+ # Legend of sample info only
455
+ g = plt.figure(figsize = (1,1)).add_subplot(111)
456
+ g.axis('off')
457
+ handles = []
458
+ for item in color_dict.keys():
459
+ h = g.bar(0,0, color = color_dict[item],
460
+ label = item, linewidth =0)
461
+ handles.append(h)
462
+ first_legend = plt.legend(handles=handles, loc='upper right', title = 'Sample')
463
+
464
+ filename = "Sample_legend.png"
465
+ filename = os.path.join(metadata_images_dir, filename)
466
+ plt.savefig(filename, bbox_inches = 'tight')
467
+
468
+ filename = "sample_color_data.csv"
469
+ filename = os.path.join(metadata_dir, filename)
470
+
471
+ # Check file exists
472
+ #if not os.path.exists(filename):
473
+ # print("WARNING: Could not find desired file: " + filename)
474
+ #else :
475
+ # print("The",filename,"file was imported for further analysis!")
476
+
477
+ # Open, read in information
478
+ df = pd.read_csv(filename, header = 0)
479
+ df = df.drop(columns = ['hex'])
480
+
481
+ # our tuple of float values for rgb, (r, g, b) was read in
482
+ # as a string '(r, g, b)'. We need to extract the r-, g-, and b-
483
+ # substrings and convert them back into floats
484
+ df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
485
+
486
+ # Verify size
487
+ #print("Verifying data read from file is the correct length...\n")
488
+ #verify_line_no(filename, df.shape[0] + 1)
489
+
490
+ # Turn into dictionary
491
+ sample_color_dict = df.set_index('Sample_ID')['rgb'].to_dict()
492
+
493
+ # Print information
494
+ #print('sample_color_dict =\n',sample_color_dict)
495
+
496
+
497
+ # #### CELL TYPES COLORS
498
+
499
+ # Define your custom colors for each cell type
500
+ custom_colors = {
501
+ 'CANCER': (0.1333, 0.5451, 0.1333),
502
+ 'STROMA': (0.4, 0.4, 0.4),
503
+ 'IMMUNE': (1, 1, 0),
504
+ 'ENDOTHELIAL': (0.502, 0, 0.502)
505
+ }
506
+
507
+ # Retrieve the list of cell types
508
+ cell_types = list(custom_colors.keys())
509
+
510
+ # Extract the corresponding colors from the dictionary
511
+ color_values = [custom_colors[cell] for cell in cell_types]
512
+
513
+ # Display the colors
514
+ sb.palplot(sb.color_palette(color_values))
515
+
516
+ # Store in a dctionnary
517
+ celltype_color_dict = dict(zip(cell_types, color_values))
518
+ celltype_color_dict
519
+
520
+ # Save color information (mapping and legend) to metadata directory
521
+ # Create dataframe
522
+ celltype_color_df = color_dict_to_df(celltype_color_dict, "cell_type")
523
+ celltype_color_df.head()
524
+
525
+ # Save to file in metadatadirectory
526
+ filename = "celltype_color_data.csv"
527
+ filename = os.path.join(metadata_dir, filename)
528
+ celltype_color_df.to_csv(filename, index = False)
529
+ #print("File" + filename + " was created!")
530
+
531
+ # Legend of cell type info only
532
+ g = plt.figure(figsize = (1,1)).add_subplot(111)
533
+ g.axis('off')
534
+ handles = []
535
+ for item in celltype_color_dict.keys():
536
+ h = g.bar(0,0, color = celltype_color_dict[item],
537
+ label = item, linewidth =0)
538
+ handles.append(h)
539
+ first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),
540
+
541
+
542
+ filename = "Celltype_legend.png"
543
+ filename = os.path.join(metadata_images_dir, filename)
544
+ plt.savefig(filename, bbox_inches = 'tight')
545
+
546
+ filename = "celltype_color_data.csv"
547
+ filename = os.path.join(metadata_dir, filename)
548
+
549
+ # Check file exists
550
+ #if not os.path.exists(filename):
551
+ # print("WARNING: Could not find desired file: "+filename)
552
+ #else :
553
+ # print("The",filename,"file was imported for further analysis!")
554
+
555
+ # Open, read in information
556
+ df = pd.read_csv(filename, header = 0)
557
+ df = df.drop(columns = ['hex'])
558
+
559
+ # our tuple of float values for rgb, (r, g, b) was read in
560
+ # as a string '(r, g, b)'. We need to extract the r-, g-, and b-
561
+ # substrings and convert them back into floats
562
+ df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
563
+
564
+ # Verify size
565
+ #print("Verifying data read from file is the correct length...\n")
566
+ #verify_line_no(filename, df.shape[0] + 1)
567
+
568
+ # Turn into dictionary
569
+ cell_type_color_dict = df.set_index('cell_type')['rgb'].to_dict()
570
+
571
+ # Print information
572
+ #print('cell_type_color_dict =\n',cell_type_color_dict)
573
+
574
+ # Colors dictionaries
575
+ sample_row_colors =df2.Sample_ID.map(sample_color_dict)
576
+ #print(sample_row_colors[1:5])
577
+
578
+ cell_type_row_colors = df2.cell_type.map(cell_type_color_dict)
579
+ #print(cell_type_row_colors[1:5])
580
+
581
+
582
+ # ## Cell Subtype Colours
583
+ import pandas as pd
584
+ import os
585
+
586
+ def rgb_tuple_from_str(rgb_str):
587
+ # Cleaning the string to remove any unexpected 'np.float64'
588
+ rgb_str = rgb_str.replace("(","").replace(")","").replace(" ","").replace("np.float64", "")
589
+ try:
590
+ rgb = list(map(float, rgb_str.split(",")))
591
+ return tuple(rgb)
592
+ except ValueError as e:
593
+ # print(f"Error converting {rgb_str} to floats: {e}")
594
+ return None # or handle the error as needed
595
+
596
+ filename = "cellsubtype_color_data.csv"
597
+ filename = os.path.join(metadata_dir, filename)
598
+
599
+ # Check file exists
600
+ #if not os.path.exists(filename):
601
+ # print("WARNING: Could not find desired file: " + filename)
602
+ #else:
603
+ # print("The", filename, "file was imported for further analysis!")
604
+
605
+ # Open, read in information
606
+ df = pd.read_csv(filename, header=0)
607
+ df = df.drop(columns=['hex'])
608
+
609
+ # Clean the 'rgb' column to remove unexpected strings
610
+ df['rgb'] = df['rgb'].str.replace("np.float64", "", regex=False)
611
+
612
+ # Apply the function to convert string to tuple of floats
613
+ df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis=1)
614
+
615
+ # Verify size
616
+ #print("Verifying data read from file is the correct length...\n")
617
+ # verify_line_no(filename, df.shape[0] + 1)
618
+
619
+ # Turn into dictionary
620
+ cell_subtype_color_dict = df.set_index('cell_subtype')['rgb'].to_dict()
621
+
622
+ # Print information
623
+ #print('cell_subtype_color_dict =\n', cell_subtype_color_dict)
624
+
625
+ df2
626
+
627
+ # Colors dictionaries
628
+ sample_row_colors =df2.Sample_ID.map(sample_color_dict)
629
+ #print(sample_row_colors[1:5])
630
+
631
+ cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict)
632
+ #print(cell_subtype_row_colors[1:5])
633
+
634
+
635
+ # #### Cell Type
636
+ df
637
+ #print(f"Loaded sample files: {ls_samples}")
638
+ selected_intensities = list(df.columns)
639
+ selected_intensities = list(df.columns)
640
+ #print(selected_intensities)
641
+ df
642
+ df2
643
+ df = df2
644
+ df
645
+ import json
646
+ import pandas as pd
647
+ import numpy as np
648
+ import panel as pn
649
+ import plotly.graph_objects as go
650
+
651
+ pn.extension('plotly')
652
+ # Load the selected intensities from the JSON file
653
+ with open(stored_variables_path, 'r') as f:
654
+ json_data = json.load(f)
655
+
656
+ ls_samples = json_data["ls_samples"]
657
+ #print(f"Loaded sample files: {ls_samples}")
658
+
659
+ # Checkbox group to select files
660
+ checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=ls_samples)
661
+
662
+ # Initially empty dropdowns for X and Y axis selection
663
+ x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[])
664
+ y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[])
665
+
666
+ # Input field for the number of random samples
667
+ random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100)
668
+
669
+ # Sliders for interactive X and Y lines
670
+ x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01)
671
+ y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01)
672
+
673
+ # Placeholder for the dot plot
674
+ plot_placeholder = pn.pane.Plotly()
675
+
676
+ # Placeholder for the digital reconstruction plot
677
+ reconstruction_placeholder = pn.pane.Plotly()
678
+
679
+ # Function to create the dot plot
680
+ def create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos):
681
+ if not selected_files:
682
+ # print("No files selected.")
683
+ return go.Figure()
684
+
685
+ keep = selected_files
686
+
687
+ test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
688
+ # print(f"Number of samples in test2_df: {len(test2_df)}")
689
+ if len(test2_df) > n_samples:
690
+ random_rows = np.random.choice(len(test2_df), n_samples)
691
+ test_df = test2_df.iloc[random_rows, :].copy()
692
+ else:
693
+ test_df = test2_df
694
+
695
+ # print(f"Number of samples in test_df: {len(test_df)}")
696
+
697
+ if x_axis not in test_df.columns or y_axis not in test_df.columns:
698
+ # print(f"Selected axes {x_axis} or {y_axis} not in DataFrame columns.")
699
+ return go.Figure()
700
+
701
+ fig = go.Figure()
702
+ title = 'Threshold'
703
+
704
+ fig.add_trace(go.Scatter(
705
+ x=test_df[x_axis],
706
+ y=test_df[y_axis],
707
+ mode='markers',
708
+ marker=dict(color='LightSkyBlue', size=2)
709
+ ))
710
+
711
+ # Add vertical and horizontal lines
712
+ fig.add_vline(x=x_line_pos, line_width=2, line_dash="dash", line_color="red")
713
+ fig.add_hline(y=y_line_pos, line_width=2, line_dash="dash", line_color="red")
714
+
715
+ fig.update_layout(
716
+ title=title,
717
+ plot_bgcolor='white',
718
+ autosize=True,
719
+ margin=dict(l=20, r=20, t=40, b=20),
720
+ xaxis=dict(title=x_axis, linecolor='black', range=[test_df[x_axis].min(), test_df[x_axis].max()]),
721
+ yaxis=dict(title=y_axis, linecolor='black', range=[test_df[y_axis].min(), test_df[y_axis].max()])
722
+ )
723
+ return fig
724
+
725
+ def assign_cell_types_again():
726
+ with open(stored_variables_path, 'r') as file:
727
+ stored_variables = json.load(file)
728
+ intensities = list(df.columns)
729
+ def assign_cell_type(row):
730
+ for intensity in intensities:
731
+ marker = intensity.split('_')[0] # Extract marker from intensity name
732
+ if marker in stored_variables['thresholds']:
733
+ threshold = stored_variables['thresholds'][marker]
734
+ if row[intensity] > threshold:
735
+ for cell_type, markers in stored_variables['cell_type_classification'].items():
736
+ if marker in markers:
737
+ return cell_type
738
+ return 'STROMA' # Default if no condition matches
739
+ df['cell_type'] = df.apply(lambda row: assign_cell_type(row), axis=1)
740
+ return df
741
+
742
+ # Function to create the digital reconstruction plot
743
+ def create_reconstruction_plot(selected_files):
744
+ if not selected_files:
745
+ # print("No files selected.")
746
+ return go.Figure()
747
+ df = assign_cell_types_again()
748
+ fig = go.Figure()
749
+
750
+ for sample in selected_files:
751
+ sample_id = sample
752
+ sample_id2 = sample.split('_')[0]
753
+ location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_type']]
754
+
755
+ title = sample_id2 + " Background Subtracted XY Map cell types"
756
+
757
+ for celltype in df.loc[df['Sample_ID'] == sample_id, 'cell_type'].unique():
758
+ fig.add_scatter(
759
+ mode='markers',
760
+ marker=dict(size=3, opacity=0.5, color='rgb' + str(cell_type_color_dict[celltype])),
761
+ x=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_X'],
762
+ y=location_colors.loc[location_colors['cell_type'] == celltype, 'Nuc_Y_Inv'],
763
+ name=celltype
764
+ )
765
+
766
+ fig.update_layout(
767
+ title=title,
768
+ plot_bgcolor='white',
769
+ autosize=True,
770
+ margin=dict(l=20, r=20, t=40, b=20),
771
+ legend=dict(
772
+ title='Cell Types',
773
+ font=dict(
774
+ family='Arial',
775
+ size=12,
776
+ color='black'
777
+ ),
778
+ bgcolor='white',
779
+ bordercolor='black',
780
+ borderwidth=0.4,
781
+ itemsizing='constant'
782
+ ),
783
+ xaxis=dict(title='Nuc_X', linecolor='black', range=[location_colors['Nuc_X'].min(), location_colors['Nuc_X'].max()]),
784
+ yaxis=dict(title='Nuc_Y_Inv', linecolor='black', range=[location_colors['Nuc_Y_Inv'].min(), location_colors['Nuc_Y_Inv'].max()])
785
+ )
786
+
787
+ return fig
788
+
789
+ def update_dropdown_options(event):
790
+ selected_files = checkbox_group.value
791
+ # print(f"Selected files in update_dropdown_options: {selected_files}")
792
+ if selected_files:
793
+ keep = selected_files
794
+ test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
795
+ selected_intensities = list(test2_df.columns)
796
+ selected_intensities = [col for col in selected_intensities if '_Intensity_Average' in col]
797
+ # print(f"Updated dropdown options: {selected_intensities}")
798
+ x_axis_dropdown.options = selected_intensities
799
+ y_axis_dropdown.options = selected_intensities
800
+ else:
801
+ x_axis_dropdown.options = []
802
+ y_axis_dropdown.options = []
803
+
804
+ def update_slider_ranges(event):
805
+ selected_files = checkbox_group.value
806
+ x_axis = x_axis_dropdown.value
807
+ y_axis = y_axis_dropdown.value
808
+ # print("Axis:",x_axis,y_axis)
809
+ if selected_files and x_axis and y_axis:
810
+ keep = selected_files
811
+ test2_df = df.loc[df['Sample_ID'].isin(keep), :].copy()
812
+ x_range = (test2_df[x_axis].min(), test2_df[x_axis].max())
813
+ y_range = (test2_df[y_axis].min(), test2_df[y_axis].max())
814
+ # print("Ranges:",x_range,y_range)
815
+ x_line_slider.start = -abs(x_range[1])
816
+ x_line_slider.end = abs(x_range[1])
817
+ y_line_slider.start = -abs(y_range[1])
818
+ y_line_slider.end = abs(y_range[1])
819
+ x_line_slider.value = 0
820
+ y_line_slider.value = 0
821
+
822
+ def on_value_change(event):
823
+ selected_files = checkbox_group.value
824
+ x_axis = x_axis_dropdown.value
825
+ y_axis = y_axis_dropdown.value
826
+ n_samples = random_sample_input.value
827
+ x_line_pos = x_line_slider.value
828
+ y_line_pos = y_line_slider.value
829
+ # print(f"Selected files: {selected_files}")
830
+ # print(f"X-Axis: {x_axis}, Y-Axis: {y_axis}, Number of samples: {n_samples}, X Line: {x_line_pos}, Y Line: {y_line_pos}")
831
+ plot = create_dot_plot(selected_files, x_axis, y_axis, n_samples, x_line_pos, y_line_pos)
832
+ reconstruction_plot = create_reconstruction_plot(selected_files)
833
+ plot_placeholder.object = plot
834
+ reconstruction_placeholder.object = reconstruction_plot
835
+
836
+ # Link value changes to function
837
+ checkbox_group.param.watch(update_dropdown_options, 'value')
838
+ checkbox_group.param.watch(update_slider_ranges, 'value')
839
+ x_axis_dropdown.param.watch(update_slider_ranges, 'value')
840
+ y_axis_dropdown.param.watch(update_slider_ranges, 'value')
841
+ x_axis_dropdown.param.watch(on_value_change, 'value')
842
+ y_axis_dropdown.param.watch(on_value_change, 'value')
843
+ random_sample_input.param.watch(on_value_change, 'value')
844
+ x_line_slider.param.watch(on_value_change, 'value')
845
+ y_line_slider.param.watch(on_value_change, 'value')
846
+
847
+ # Layout
848
+ plot_with_reconstruction = pn.Column(
849
+ "## Select Files to Construct Dot Plot",
850
+ checkbox_group,
851
+ x_axis_dropdown,
852
+ y_axis_dropdown,
853
+ random_sample_input,
854
+ pn.Row(x_line_slider, y_line_slider),
855
+ pn.Row(
856
+ pn.Column(
857
+ "## Dot Plot",
858
+ pn.Column(plot_placeholder)),
859
+ pn.Column(
860
+ "## Digital Reconstruction Plot",
861
+ reconstruction_placeholder),
862
+ ))
863
+
864
+ # Serve the app
865
+ #plot_with_reconstruction.show()
866
+
867
+ # ## MAKE HEATMAPS
868
+
869
+ # ### Cell Subtype
870
+ # Create data structure to hold everything we need for row/column annotations
871
+ # annotations is a dictionary
872
+ ## IMPORTANT - if you use 'annotations', it MUST have both 'rows' and 'cols'
873
+ ## objects inside. These can be empty lists, but they must be there!
874
+ anns = {}
875
+
876
+ # create a data structure to hold everything we need for only row annotations
877
+ # row_annotations is a list, where each item therein is a dictioary corresponding
878
+ # to all of the data pertaining to that particular annotation
879
+ # Adding each item (e.g., Sample, then Cluster), one at a time to ensure ordering
880
+ # is as anticipated on figure
881
+ row_annotations = []
882
+ row_annotations.append({'label':'Sample',
883
+ 'type':'row',
884
+ 'mapping':sample_row_colors,
885
+ 'dict':sample_color_dict,
886
+ 'location':'center left',
887
+ 'bbox_to_anchor':(0.1, 0.9)})
888
+ row_annotations.append({'label':'Cell type',
889
+ 'type':'row',
890
+ 'mapping':cell_type_row_colors,
891
+ 'dict':cell_type_color_dict,
892
+ 'location':'center left',
893
+ 'bbox_to_anchor':(0.17, 0.9)})
894
+ anns['rows'] = row_annotations
895
+
896
+ # Now we repeat the process for column annotations
897
+ col_annotations = []
898
+ anns['cols'] = col_annotations
899
+ # To simplify marker display in the following figures (heatmap, etc)
900
+ figure_marker_names = {key: value.split('_')[0] for key, value in full_to_short_names.items()}
901
+ not_intensities
902
+ df2
903
+ df2.drop('cell_subtype', axis = 'columns')
904
+ not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
905
+ 'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
906
+ 'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']
907
+ df2 = assign_cell_types_again()
908
+ df2.drop('cell_subtype', axis = 'columns')
909
+ df2.head()
910
+ # Save one heatmap
911
+
912
+ data = df
913
+ data
914
+ #print(data.columns)
915
+ # Selecting a subset of rows from df based on the 'Sample_ID' column
916
+ # and then random>ly choosing 50,000 rows from that subset to create the DataFrame test_df
917
+ with open(stored_variables_path, 'r') as file:
918
+ ls_samples = stored_vars['ls_samples']
919
+ keep = list(ls_samples)
920
+ keep_cell_type = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']
921
+
922
+ # Check the individual conditions
923
+ cell_type_condition = data['cell_type'].isin(keep_cell_type)
924
+ sample_id_condition = data['Sample_ID'].isin(keep)
925
+ #print("Cell type condition:")
926
+ #print(cell_type_condition.head())
927
+ #print("Sample ID condition:")
928
+ #print(sample_id_condition.head())
929
+
930
+ # Combine the conditions
931
+ combined_condition = cell_type_condition & sample_id_condition
932
+ #print("Combined condition:")
933
+ #print(combined_condition.head())
934
+
935
+ # Apply the combined condition to filter the DataFrame
936
+ test2_df = data.loc[combined_condition].copy()
937
+ #print("Filtered DataFrame:")
938
+ #print(test2_df.head())
939
+
940
+ #test2_df = data.loc[data['cell_type'].isin(keep_cell_type) & data['Sample_ID'].isin(keep)].copy()
941
+ #print("Test2_df",test2_df.head())
942
+ #print(len(test2_df))
943
+
944
+ #random_rows = np.random.choice(len(test2_df),len(test2_df))
945
+ random_rows = np.random.choice(len(test2_df),1000)
946
+ test_df = test2_df.iloc[random_rows,:].copy()
947
+ #print(len(test_df))
948
+ test_df
949
+ import json
950
+ import panel as pn
951
+ import param
952
+ import pandas as pd
953
+
954
+ # Initialize Panel extension
955
+ pn.extension('tabulator')
956
+
957
+ # Path to the stored variables file
958
+ file_path = stored_variables_path
959
+
960
+ # Load existing data from stored_variables.json with error handling
961
+ def load_data():
962
+ try:
963
+ with open(file_path, 'r') as file:
964
+ return json.load(file)
965
+ except json.JSONDecodeError as e:
966
+ print(f"Error reading JSON file: {e}")
967
+ return {}
968
+
969
+ data = load_data()
970
+
971
+ # Define markers, cell types, and cell subtypes from the loaded data
972
+ markers = data.get('markers', [])
973
+ cell_types = data.get('cell_type', [])
974
+ cell_subtypes = data.get('cell_subtype', [])
975
+
976
+ # Sanitize option names
977
+ def sanitize_options(options):
978
+ return [opt.replace(' ', '_').replace('+', 'plus').replace('α', 'a').replace("'", "") for opt in options]
979
+
980
+ sanitized_cell_types = sanitize_options(cell_types)
981
+ sanitized_cell_subtypes = sanitize_options(cell_subtypes)
982
+
983
+ # Helper function to create a Parameterized class and DataFrame
984
+ def create_classification_df(items, item_label):
985
+ params = {item_label: param.String()}
986
+ for marker in markers:
987
+ params[marker] = param.Boolean(default=False)
988
+
989
+ Classification = type(f'{item_label}Classification', (param.Parameterized,), params)
990
+
991
+ classification_widgets = []
992
+ for item in items:
993
+ item_params = {marker: False for marker in markers}
994
+ item_params[item_label] = item
995
+ classification_widgets.append(Classification(**item_params))
996
+
997
+ classification_df = pd.DataFrame([cw.param.values() for cw in classification_widgets])
998
+ classification_df = classification_df[[item_label] + markers]
999
+ return classification_df
1000
+
1001
+ # Create DataFrames for cell types and cell subtypes
1002
+ cell_type_df = create_classification_df(sanitized_cell_types, 'CELL_TYPE')
1003
+ cell_subtype_df = create_classification_df(sanitized_cell_subtypes, 'CELL_SUBTYPE')
1004
+
1005
+ # Define formatters for Tabulator widgets
1006
+ tabulator_formatters = {marker: {'type': 'tickCross'} for marker in markers}
1007
+
1008
+ # Create Tabulator widgets
1009
+ cell_type_table = pn.widgets.Tabulator(cell_type_df, formatters=tabulator_formatters)
1010
+ cell_subtype_table = pn.widgets.Tabulator(cell_subtype_df, formatters=tabulator_formatters)
1011
+
1012
+ # Save functions for cell types and cell subtypes
1013
+ def save_data(table, classification_key, item_label):
1014
+ current_data = table.value
1015
+ df_bool = current_data.replace({'✔': True, '✘': False})
1016
+
1017
+ classification = {}
1018
+ for i, row in df_bool.iterrows():
1019
+ item = row[item_label]
1020
+ selected_markers = [marker for marker in markers if row[marker]]
1021
+ classification[item] = selected_markers
1022
+
1023
+ data[classification_key] = classification
1024
+ # try:
1025
+ with open(file_path, 'w') as file:
1026
+ json.dump(data, file, indent=4)
1027
+ # print(f"{classification_key} saved successfully.")
1028
+ # except IOError as e:
1029
+ # print(f"Error writing JSON file: {e}")
1030
+
1031
+ # Button actions
1032
+ def save_cell_type_selections(event):
1033
+ save_data(cell_type_table, 'cell_type_classification', 'CELL_TYPE')
1034
+
1035
+ def save_cell_subtype_selections(event):
1036
+ save_data(cell_subtype_table, 'cell_subtype_classification', 'CELL_SUBTYPE')
1037
+
1038
+ # Create save buttons
1039
+ save_cell_type_button = pn.widgets.Button(name='Save Cell Type Selections', button_type='primary')
1040
+ save_cell_type_button.on_click(save_cell_type_selections)
1041
+
1042
+ save_cell_subtype_button = pn.widgets.Button(name='Save Cell Subtype Selections', button_type='primary')
1043
+ save_cell_subtype_button.on_click(save_cell_subtype_selections)
1044
+ cell_type_classification_app_main = pn.Column(
1045
+ pn.pane.Markdown("# Cell Type Classification"),
1046
+ cell_type_table,
1047
+ save_cell_type_button
1048
+ )
1049
+ cell_subtype_classification_app_main = pn.Column(
1050
+ pn.pane.Markdown("# Cell Subtype Classification"),
1051
+ cell_subtype_table,
1052
+ save_cell_subtype_button
1053
+ )
1054
+ #cell_subtype_classification_app_main.show()
1055
+
1056
+ import json
1057
+ import panel as pn
1058
+
1059
+ # Load existing stored variables
1060
+ with open(stored_variables_path, 'r') as f:
1061
+ stored_variables = json.load(f)
1062
+
1063
+ # Initialize a dictionary to hold threshold inputs
1064
+ subtype_threshold_inputs = {}
1065
+
1066
+ # Create widgets for each marker to get threshold inputs from the user
1067
+ for marker in stored_variables['markers']:
1068
+ subtype_threshold_inputs[marker] = pn.widgets.FloatInput(name=f'{marker} Threshold', value=0.0, step=0.1)
1069
+
1070
+ try:
1071
+ with open(stored_variables_path, 'r') as f:
1072
+ stored_variables = json.load(f)
1073
+ except FileNotFoundError:
1074
+ stored_variables = {}
1075
+
1076
+ # Check if 'thresholds' field is present, if not, add it
1077
+ if 'subtype_thresholds' not in stored_variables:
1078
+ subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()}
1079
+ stored_variables['subtype_thresholds'] = subtype_thresholds
1080
+ with open(stored_variables_path, 'w') as f:
1081
+ json.dump(stored_variables, f, indent=4)
1082
+
1083
+ # Save button to save thresholds to stored_variables.json
1084
+ def save_thresholds(event):
1085
+ subtype_thresholds = {marker: input_widget.value for marker, input_widget in subtype_threshold_inputs.items()}
1086
+ stored_variables['subtype_thresholds'] = subtype_thresholds
1087
+ with open(stored_variables_path, 'w') as f:
1088
+ json.dump(stored_variables, f, indent=4)
1089
+ save_button = pn.widgets.Button(name='Save Thresholds', button_type='primary')
1090
+ save_button.on_click(save_thresholds)
1091
+
1092
+ # Create a GridSpec layout
1093
+ subtype_grid = pn.GridSpec()
1094
+
1095
+ # Add the widgets to the grid with five per row
1096
+ row = 0
1097
+ col = 0
1098
+ for marker in stored_variables['markers']:
1099
+ subtype_grid[row, col] = subtype_threshold_inputs[marker]
1100
+ col += 1
1101
+ if col == 5:
1102
+ col = 0
1103
+ row += 1
1104
+
1105
+ # Add the save button at the end, spanning across all columns of the new row
1106
+ subtype_grid[row + 1, :5] = save_button
1107
+
1108
+ # Panel layout
1109
+ subtype_threshold_panel = pn.Column(
1110
+ pn.pane.Markdown("## Define Thresholds for Markers"),
1111
+ subtype_grid)
1112
+
1113
+ # Display the panel
1114
+ #subtype_threshold_panel.show()
1115
+
1116
+ with open(stored_variables_path, 'r') as file:
1117
+ stored_variables = json.load(file)
1118
+ intensities = list(df.columns)
1119
+ def assign_cell_subtypes(row):
1120
+ for intensity in intensities:
1121
+ marker = intensity.split('_')[0] # Extract marker from intensity name
1122
+ if marker in stored_variables['subtype_thresholds']:
1123
+ threshold = stored_variables['subtype_thresholds'][marker]
1124
+ if row[intensity] > threshold:
1125
+ for cell_subtype, markers in stored_variables['cell_subtype_classification'].items():
1126
+ if marker in markers:
1127
+ return cell_subtype
1128
+ return 'DC'
1129
+
1130
+ df = assign_cell_types_again()
1131
+ df['cell_subtype'] = df.apply(lambda row: assign_cell_subtypes(row), axis=1)
1132
+
1133
+ df
1134
+ data
1135
+ # Define a color dictionary
1136
+ cell_subtype_color_dict = {
1137
+ 'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
1138
+ 'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),
1139
+ 'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),
1140
+ 'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313),
1141
+ 'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6),
1142
+ 'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),
1143
+ 'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),
1144
+ 'M2': (1.0, 0.4980392156862745, 0.0),
1145
+ 'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),
1146
+ 'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),
1147
+ 'Cancer': (1.0, 1.0, 0.6),
1148
+ 'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392),
1149
+ 'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
1150
+ 'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765)
1151
+ }
1152
+ # Add the 'rgb' prefix to the colors
1153
+ cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()}
1154
+
1155
+ # Load stored variables from JSON file
1156
+ def load_stored_variables(path):
1157
+ with open(path, 'r') as file:
1158
+ return json.load(file)
1159
+
1160
+ # Get subtype intensities columns
1161
+ subtype_intensities = [col for col in df.columns if '_Intensity_Average' in col]
1162
+
1163
+ # Assign cell subtype based on thresholds and classifications
1164
+ def assign_cell_subtype(row):
1165
+ #print("new_row")
1166
+ stored_variables = load_stored_variables(stored_variables_path)
1167
+ for subtype_intensity in subtype_intensities:
1168
+ marker = subtype_intensity.split('_')[0]
1169
+ if marker in stored_variables['subtype_thresholds']:
1170
+ subtype_threshold = stored_variables['subtype_thresholds'][marker]
1171
+ if row[subtype_intensity] > subtype_threshold:
1172
+ for cell_subtype, markers in stored_variables['cell_subtype_classification'].items():
1173
+ #print(cell_subtype,marker,markers)
1174
+ if marker in markers:
1175
+ #print("Markers:",marker)
1176
+ return cell_subtype # Return the assigned subtype
1177
+ return 'DC' # Default value if no conditions match
1178
+
1179
+ # Main function to assign cell subtypes to DataFrame
1180
+ def assign_cell_subtypes_again():
1181
+ df['cell_subtype'] = df.apply(lambda row: assign_cell_subtype(row), axis=1)
1182
+ return df
1183
+
1184
+ import json
1185
+ import pandas as pd
1186
+ import numpy as np
1187
+ import panel as pn
1188
+ import plotly.graph_objects as go
1189
+
1190
+ pn.extension('plotly')
1191
+
1192
+ # Load the selected intensities from the JSON file
1193
+ with open(stored_variables_path, 'r') as f:
1194
+ json_data = json.load(f)
1195
+
1196
+ subtype_ls_samples = json_data["ls_samples"]
1197
+ #print(f"Loaded sample files: {subtype_ls_samples}")
1198
+
1199
+
1200
+ # Checkbox group to select files
1201
+ subtype_checkbox_group = pn.widgets.CheckBoxGroup(name='Select Files', options=subtype_ls_samples)
1202
+
1203
+ # Initially empty dropdowns for X and Y axis selection
1204
+ subtype_x_axis_dropdown = pn.widgets.Select(name='Select X-Axis', options=[])
1205
+ subtype_y_axis_dropdown = pn.widgets.Select(name='Select Y-Axis', options=[])
1206
+
1207
+ # Input field for the number of random samples
1208
+ subtype_random_sample_input = pn.widgets.IntInput(name='Number of Random Samples', value=20000, step=100)
1209
+
1210
+ # Sliders for interactive X and Y lines
1211
+ subtype_x_line_slider = pn.widgets.FloatSlider(name='X Axis Line Position', start=0, end=1, step=0.01)
1212
+ subtype_y_line_slider = pn.widgets.FloatSlider(name='Y Axis Line Position', start=0, end=1, step=0.01)
1213
+
1214
+ # Placeholder for the dot plot
1215
+ subtype_plot_placeholder = pn.pane.Plotly()
1216
+
1217
+ # Placeholder for the digital reconstruction plot
1218
+ subtype_reconstruction_placeholder = pn.pane.Plotly()
1219
+
1220
+ def update_color_dict():
1221
+ # Define a color dictionary
1222
+ cell_subtype_color_dict = {
1223
+ 'DC': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
1224
+ 'B': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765),
1225
+ 'TCD4': (0.6980392156862745, 0.8745098039215686, 0.5411764705882353),
1226
+ 'Exhausted TCD4': (0.2, 0.6274509803921569, 0.17254901960784313),
1227
+ 'Exhausted TCD8': (0.984313725490196, 0.6039215686274509, 0.6),
1228
+ 'TCD8': (0.8901960784313725, 0.10196078431372549, 0.10980392156862745),
1229
+ 'M1': (0.9921568627450981, 0.7490196078431373, 0.43529411764705883),
1230
+ 'M2': (1.0, 0.4980392156862745, 0.0),
1231
+ 'Treg': (0.792156862745098, 0.6980392156862745, 0.8392156862745098),
1232
+ 'Other CD45+': (0.41568627450980394, 0.23921568627450981, 0.6039215686274509),
1233
+ 'Cancer': (1.0, 1.0, 0.6),
1234
+ 'myCAF αSMA+': (0.6941176470588235, 0.34901960784313724, 0.1568627450980392),
1235
+ 'Stroma': (0.6509803921568628, 0.807843137254902, 0.8901960784313725),
1236
+ 'Endothelial': (0.12156862745098039, 0.47058823529411764, 0.7058823529411765)
1237
+ }
1238
+ # Add the 'rgb' prefix to the colors
1239
+ cell_subtype_color_dict = {k: f"rgb{v}" for k, v in cell_subtype_color_dict.items()}
1240
+ return cell_subtype_color_dict
1241
+
1242
+ # Function to create the dot plot
1243
+ def create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos):
1244
+ if not subtype_selected_files:
1245
+ # print("No files selected.")
1246
+ return go.Figure()
1247
+ subtype_keep = subtype_selected_files
1248
+ # print(df)
1249
+ subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
1250
+ #subtype_test2_df = df.loc[df['Sample_ID'].isin('TMA.csv'), :].copy()
1251
+ # print(f"Number of samples in test2_df: {len(subtype_test2_df)}")
1252
+ if len(subtype_test2_df) > subtype_n_samples:
1253
+ subtype_random_rows = np.random.choice(len(subtype_test2_df), subtype_n_samples)
1254
+ subtype_test_df = subtype_test2_df.iloc[subtype_random_rows, :].copy()
1255
+ else:
1256
+ subtype_test_df = subtype_test2_df
1257
+
1258
+ # print(f"Number of samples in test_df: {len(subtype_test_df)}")
1259
+
1260
+ if subtype_x_axis not in subtype_test_df.columns or subtype_y_axis not in subtype_test_df.columns:
1261
+ # print(f"Selected axes {subtype_x_axis} or {subtype_y_axis} not in DataFrame columns.")
1262
+ return go.Figure()
1263
+
1264
+ fig = go.Figure()
1265
+ title = 'Threshold'
1266
+
1267
+ fig.add_trace(go.Scatter(
1268
+ x=subtype_test_df[subtype_x_axis],
1269
+ y=subtype_test_df[subtype_y_axis],
1270
+ mode='markers',
1271
+ marker=dict(color='LightSkyBlue', size=2)
1272
+ ))
1273
+
1274
+ # Add vertical and horizontal lines
1275
+ fig.add_vline(x=subtype_x_line_pos, line_width=2, line_dash="dash", line_color="red")
1276
+ fig.add_hline(y=subtype_y_line_pos, line_width=2, line_dash="dash", line_color="red")
1277
+
1278
+ fig.update_layout(
1279
+ title=title,
1280
+ plot_bgcolor='white',
1281
+ autosize=True,
1282
+ margin=dict(l=20, r=20, t=40, b=20),
1283
+ xaxis=dict(title=subtype_x_axis, linecolor='black', range=[subtype_test_df[subtype_x_axis].min(), subtype_test_df[subtype_x_axis].max()]),
1284
+ yaxis=dict(title=subtype_y_axis, linecolor='black', range=[subtype_test_df[subtype_y_axis].min(), subtype_test_df[subtype_y_axis].max()])
1285
+ )
1286
+ return fig
1287
+
1288
+ def create_subtype_reconstruction_plot(subtype_selected_files):
1289
+ cell_subtype_color_dict = update_color_dict()
1290
+ # print(subtype_selected_files)
1291
+ if not subtype_selected_files:
1292
+ # print("No files selected.")
1293
+ return go.Figure()
1294
+ df = assign_cell_subtypes_again()
1295
+ subtype_fig = go.Figure()
1296
+
1297
+ for sample in subtype_selected_files:
1298
+ sample_id = sample
1299
+ sample_id2 = sample.split('_')[0]
1300
+ location_colors = df.loc[df['Sample_ID'] == sample_id, ['Nuc_X', 'Nuc_Y_Inv', 'cell_subtype']]
1301
+ # print(location_colors.head())
1302
+ title = sample_id2 + " Background Subtracted XY Map cell subtypes"
1303
+ for cellsubtype in df.loc[df['Sample_ID'] == sample_id, 'cell_subtype'].unique():
1304
+ color = str(cell_subtype_color_dict[cellsubtype])
1305
+ subtype_fig.add_scatter(
1306
+ mode='markers',
1307
+ marker=dict(size=3, opacity=0.5, color=color),
1308
+ x=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_X'],
1309
+ y=location_colors.loc[location_colors['cell_subtype'] == cellsubtype, 'Nuc_Y_Inv'],
1310
+ name=cellsubtype
1311
+ )
1312
+
1313
+ subtype_fig.update_layout(title=title, plot_bgcolor='white')
1314
+ subtype_fig.update_xaxes(title_text='Nuc_X', linecolor='black')
1315
+ subtype_fig.update_yaxes(title_text='Nuc_Y_Inv', linecolor='black')
1316
+
1317
+ # Adjust the size of the points
1318
+ for trace in subtype_fig.data:
1319
+ trace.marker.size = 2
1320
+
1321
+ subtype_fig.update_layout(
1322
+ title=title,
1323
+ plot_bgcolor='white',
1324
+ legend=dict(
1325
+ title='Cell Subtypes', # Legend title
1326
+ font=dict(
1327
+ family='Arial',
1328
+ size=12,
1329
+ color='black'
1330
+ ),
1331
+ bgcolor='white',
1332
+ bordercolor='black',
1333
+ borderwidth=0.4,
1334
+ itemsizing='constant'
1335
+ )
1336
+ )
1337
+ # Save the figure as an image if needed
1338
+ #subtype_fig.write_image(output_images_dir + "/" + title.replace(" ", "_") + ".png", width=1200, height=800, scale=4)
1339
+ # print(sample_id, "processed!")
1340
+
1341
+ return subtype_fig
1342
+
1343
+ def update_subtype_dropdown_options(event):
1344
+ # print(1)
1345
+ subtype_selected_files = subtype_checkbox_group.value
1346
+ # print(f"Selected files in update_dropdown_options: {subtype_selected_files}")
1347
+ if subtype_selected_files:
1348
+ subtype_keep = subtype_selected_files
1349
+ subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
1350
+ subtype_selected_intensities = list(subtype_test2_df.columns)
1351
+ subtype_selected_intensities = [col for col in subtype_selected_intensities if '_Intensity_Average' in col]
1352
+ # print(f"Updated dropdown options: {subtype_selected_intensities}")
1353
+ subtype_x_axis_dropdown.options = subtype_selected_intensities
1354
+ subtype_y_axis_dropdown.options = subtype_selected_intensities
1355
+ else:
1356
+ subtype_x_axis_dropdown.options = []
1357
+ subtype_y_axis_dropdown.options = []
1358
+
1359
+ def update_subtype_slider_ranges(event):
1360
+ subtype_selected_files = subtype_checkbox_group.value
1361
+ subtype_x_axis = subtype_x_axis_dropdown.value
1362
+ subtype_y_axis = subtype_y_axis_dropdown.value
1363
+
1364
+ if subtype_selected_files and subtype_x_axis and subtype_y_axis:
1365
+ subtype_keep = subtype_selected_files
1366
+ subtype_test2_df = df.loc[df['Sample_ID'].isin(subtype_keep), :].copy()
1367
+ subtype_x_range = (subtype_test2_df[subtype_x_axis].min(), subtype_test2_df[subtype_x_axis].max())
1368
+ subtype_y_range = (subtype_test2_df[subtype_y_axis].min(), subtype_test2_df[subtype_y_axis].max())
1369
+ subtype_x_line_slider.start = -abs(subtype_x_range[1])
1370
+ subtype_x_line_slider.end = abs(subtype_x_range[1])
1371
+ subtype_y_line_slider.start = -abs(subtype_y_range[1])
1372
+ subtype_y_line_slider.end = abs(subtype_y_range[1])
1373
+ subtype_x_line_slider.value = 0
1374
+ subtype_y_line_slider.value = 0
1375
+
1376
+ def on_subtype_value_change(event):
1377
+ subtype_selected_files = subtype_checkbox_group.value
1378
+ subtype_x_axis = subtype_x_axis_dropdown.value
1379
+ subtype_y_axis = subtype_y_axis_dropdown.value
1380
+ subtype_n_samples = subtype_random_sample_input.value
1381
+ subtype_x_line_pos = subtype_x_line_slider.value
1382
+ subtype_y_line_pos = subtype_y_line_slider.value
1383
+ # print(f"Selected files: {subtype_selected_files}")
1384
+ # print(f"X-Axis: {subtype_x_axis}, Y-Axis: {subtype_y_axis}, Number of samples: {subtype_n_samples}, X Line: {subtype_x_line_pos}, Y Line: {subtype_y_line_pos}")
1385
+ subtype_plot = create_subtype_dot_plot(subtype_selected_files, subtype_x_axis, subtype_y_axis, subtype_n_samples, subtype_x_line_pos, subtype_y_line_pos)
1386
+ subtype_reconstruction_plot = create_subtype_reconstruction_plot(subtype_selected_files)
1387
+ subtype_plot_placeholder.object = subtype_plot
1388
+ subtype_reconstruction_placeholder.object = subtype_reconstruction_plot
1389
+
1390
+ # Link value changes to function
1391
+ subtype_checkbox_group.param.watch(update_subtype_dropdown_options, 'value')
1392
+ subtype_checkbox_group.param.watch(update_subtype_slider_ranges, 'value')
1393
+ subtype_x_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value')
1394
+ subtype_y_axis_dropdown.param.watch(update_subtype_slider_ranges, 'value')
1395
+ subtype_x_axis_dropdown.param.watch(on_subtype_value_change, 'value')
1396
+ subtype_y_axis_dropdown.param.watch(on_subtype_value_change, 'value')
1397
+ subtype_random_sample_input.param.watch(on_subtype_value_change, 'value')
1398
+ subtype_x_line_slider.param.watch(on_subtype_value_change, 'value')
1399
+ subtype_y_line_slider.param.watch(on_subtype_value_change, 'value')
1400
+
1401
+ # Layout
1402
+ plot_with_subtype_reconstruction = pn.Column(
1403
+ "## Select Files to Construct Dot Plot",
1404
+ subtype_checkbox_group,
1405
+ subtype_x_axis_dropdown,
1406
+ subtype_y_axis_dropdown,
1407
+ subtype_random_sample_input,
1408
+ pn.Row(subtype_x_line_slider, subtype_y_line_slider),
1409
+ pn.Row(
1410
+ pn.Column(
1411
+ "## Dot Plot",
1412
+ pn.Column(subtype_plot_placeholder)),
1413
+ pn.Column(
1414
+ "## Cell Subtype Digital Reconstruction Plot",
1415
+ subtype_reconstruction_placeholder),
1416
+ )
1417
+ )
1418
+
1419
+ subtype_x_axis = subtype_x_axis_dropdown.value
1420
+ subtype_y_axis = subtype_y_axis_dropdown.value
1421
+ #print(subtype_x_axis ,subtype_y_axis)
1422
+
1423
+
1424
+ # Normalize the values in df2.cell_subtype
1425
+ df2['cell_subtype'] = df2['cell_subtype'].str.strip().str.lower()
1426
+
1427
+ # Normalize the keys in cell_subtype_color_dict
1428
+ cell_subtype_color_dict = {k.strip().lower(): v for k, v in cell_subtype_color_dict.items()}
1429
+
1430
+ # Map the cell_subtype values to colors
1431
+ cell_subtype_row_colors = df2.cell_subtype.map(cell_subtype_color_dict)
1432
+
1433
+ # Debugging: print the unique values and the resulting mapped colors
1434
+ #print("Unique values in df2.cell_subtype:", df2.cell_subtype.unique())
1435
+ #print("Keys in cell_subtype_color_dict:", cell_subtype_color_dict.keys())
1436
+ #print(cell_subtype_row_colors[1:5])
1437
+ data
1438
+ cell_subtype_color_dict
1439
+ # Remove the 'rgb' prefix
1440
+
1441
+ cell_subtype_color_dict = {k: v[3:] for k, v in cell_subtype_color_dict.items()}
1442
+ cell_subtype_color_dict
1443
+
1444
+ # Colors dictionaries
1445
+ sample_row_colors =df.Sample_ID.map(sample_color_dict)
1446
+ #print(sample_row_colors[1:5])
1447
+
1448
+ cell_subtype_row_colors = df.cell_subtype.map(cell_subtype_color_dict)
1449
+ #print(cell_subtype_row_colors[1:5])
1450
+
1451
+ # Count of each immune_checkpoint type by cell_subtype
1452
+ counts = df.groupby(['cell_type', 'cell_subtype']).size().reset_index(name='count')
1453
+ counts
1454
+
1455
+ total = sum(counts['count'])
1456
+ counts['percentage'] = counts.groupby('cell_subtype')['count'].transform(lambda x: (x / total) * 100)
1457
+
1458
+ #print(counts)
1459
+
1460
+
1461
+ # ## IV.10. SAVE
1462
+
1463
+ # Save the data by Sample_ID
1464
+ # Check for the existence of the output file first
1465
+ for sample in ls_samples:
1466
+ #sample_id = sample.split('_')[0]
1467
+ sample_id = sample
1468
+ filename = os.path.join(output_data_dir, sample_id + "_" + step_suffix + ".csv")
1469
+ if os.path.exists(filename):
1470
+ df_save = df.loc[df['Sample_ID'] == sample_id, :]
1471
+ df_save.to_csv(filename, index=True, index_label='ID', mode='w') # 'mode='w'' overwrites the file
1472
+ # print("File " + filename + " was overwritten!")
1473
+ else:
1474
+ df_save = df.loc[df['Sample_ID'] == sample_id, :]
1475
+ df_save.to_csv(filename, index=True, index_label='ID') # Save normally if the file doesn't exist
1476
+ # print("File " + filename + " was created and saved !")
1477
+
1478
+ # All samples
1479
+ filename = os.path.join(output_data_dir, "all_Samples_" + project_name + ".csv")
1480
+ # Save the DataFrame to a CSV file
1481
+ df.to_csv(filename, index=True, index_label='ID')
1482
+ #print("Merged file " + filename + " created!")
1483
+
1484
+ # ## Panel App
1485
+ # Create widgets and panes
1486
+ df_widget = pn.widgets.DataFrame(metadata, name="MetaData")
1487
+ # Define the three tabs content
1488
+ metadata_tab = pn.Column(pn.pane.Markdown("## Initial DataFrame"),intial_df)
1489
+ dotplot_tab = pn.Column(plot_with_reconstruction)
1490
+ celltype_classification_tab = pn.Column(cell_type_classification_app_main, threshold_panel)
1491
+ cellsubtype_classification_tab = pn.Column(cell_subtype_classification_app_main, subtype_threshold_panel)
1492
+ subtype_dotplot_tab = pn.Column(plot_with_subtype_reconstruction,)
1493
+
1494
+ app4_5 = pn.template.GoldenTemplate(
1495
+ site="Cyc-IF",
1496
+ title="Marker Threshold & Classification",
1497
+ main=[
1498
+ pn.Tabs(
1499
+ ("Metadata", metadata_tab),
1500
+ ("Classify-Celltype-Marker",celltype_classification_tab),
1501
+ ("Cell_Types", dotplot_tab),
1502
+ ("Classify-Cell Subtype-Marker",cellsubtype_classification_tab),
1503
+ ("Cell-Subtypes", subtype_dotplot_tab),
1504
+ # ("Heatmap",pn.Column(celltype_heatmap, cell_subtype_heatmap))
1505
+ )
1506
+ ]
1507
+ )
1508
+ app4_5.show()