KashyapiNagaHarshitha commited on
Commit
31a4d07
·
verified ·
1 Parent(s): f63698c

Delete Background_Substraction.py

Browse files
Files changed (1) hide show
  1. Background_Substraction.py +0 -1130
Background_Substraction.py DELETED
@@ -1,1130 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
-
5
- # In[1]:
6
- import os
7
- import random
8
- import re
9
- import pandas as pd
10
- import numpy as np
11
- import seaborn as sb
12
- import matplotlib.pyplot as plt
13
- import matplotlib.colors as mplc
14
- import subprocess
15
- import warnings
16
- from scipy import signal
17
- import plotly.figure_factory as ff
18
- import plotly
19
- import plotly.graph_objs as go
20
- from plotly.offline import download_plotlyjs, plot
21
- import plotly.express as px
22
- from my_modules import *
23
- os.getcwd()
24
- # In[2]:
25
-
26
-
27
- #Silence FutureWarnings & UserWarnings
28
- warnings.filterwarnings('ignore', category= FutureWarning)
29
- warnings.filterwarnings('ignore', category= UserWarning)
30
-
31
-
32
- # ## II.2. *DIRECTORIES
33
-
34
- # In[5]:
35
-
36
-
37
- # Set base directory
38
-
39
- ##### MAC WORKSTATION #####
40
- #base_dir = r'/Volumes/LaboLabrie/Projets/OC_TMA_Pejovic/Temp/Zoe/CyCIF_pipeline/'
41
- ###########################
42
-
43
- ##### WINDOWS WORKSTATION #####
44
- #base_dir = r'C:\Users\LaboLabrie\gerz2701\cyCIF-pipeline\Set_B'
45
- ###############################
46
-
47
- ##### LOCAL WORKSTATION #####
48
- #base_dir = r'/Users/harshithakolipaka/Downloads/wetransfer_data-zip_2024-05-17_1431/'
49
- #############################
50
-
51
- #set_name = 'Set_A'
52
- #set_name = 'test'
53
-
54
-
55
- present_dir = os.path.dirname(os.path.realpath(__file__))
56
-
57
- input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
58
- base_dir = input_path
59
- '''
60
- # Function to change permissions recursively with error handling
61
- def change_permissions_recursive(path, mode):
62
- for root, dirs, files in os.walk(path):
63
- for dir in dirs:
64
- try:
65
- os.chmod(os.path.join(root, dir), mode)
66
- except Exception as e:
67
- print(f"An error occurred while changing permissions for directory {os.path.join(root, dir)}: {e}")
68
- for file in files:
69
- try:
70
- os.chmod(os.path.join(root, file), mode)
71
- except Exception as e:
72
- print(f"An error occurred while changing permissions for file {os.path.join(root, file)}: {e}")
73
-
74
-
75
- change_permissions_recursive(base_dir, 0o777)
76
- change_permissions_recursive('/code', 0o777)
77
- '''
78
- set_path = 'test'
79
- selected_metadata_files = ['Slide_B_DD1s1.one_1.tif.csv', 'Slide_B_DD1s1.one_2.tif.csv']
80
- ls_samples = ['Ashlar_Exposure_Time.csv', 'new_data.csv', 'DD3S1.csv', 'DD3S2.csv', 'DD3S3.csv', 'TMA.csv']
81
-
82
- set_name = set_path
83
-
84
-
85
- # In[7]:
86
-
87
-
88
- project_name = set_name # Project name
89
- step_suffix = 'bs' # Curent part (here part II)
90
- previous_step_suffix_long = "_qc_eda" # Previous part (here QC/EDA NOTEBOOK)
91
-
92
- # Initial input data directory
93
- input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)
94
-
95
- # BS output directories
96
- output_data_dir = os.path.join(base_dir, project_name + "_" + step_suffix)
97
- # BS images subdirectory
98
- output_images_dir = os.path.join(output_data_dir,"images")
99
-
100
- # Data and Metadata directories
101
- # Metadata directories
102
- metadata_dir = os.path.join(base_dir, project_name + "_metadata")
103
- # images subdirectory
104
- metadata_images_dir = os.path.join(metadata_dir,"images")
105
-
106
- # Create directories if they don't already exist
107
- for d in [base_dir, input_data_dir, output_data_dir, output_images_dir, metadata_dir, metadata_images_dir]:
108
- if not os.path.exists(d):
109
- print("Creation of the" , d, "directory...")
110
- os.makedirs(d)
111
- else :
112
- print("The", d, "directory already exists !")
113
-
114
- os.chdir(input_data_dir)
115
-
116
-
117
- # In[8]:
118
-
119
-
120
- # Verify paths
121
- print('base_dir :', base_dir)
122
- print('input_data_dir :', input_data_dir)
123
- print('output_data_dir :', output_data_dir)
124
- print('output_images_dir :', output_images_dir)
125
- print('metadata_dir :', metadata_dir)
126
- print('metadata_images_dir :', metadata_images_dir)
127
-
128
- # ## II.3. FILES
129
- #Don't forget to put your data in the projname_data directory !
130
- # ### II.3.1. METADATA
131
-
132
- # In[9]:
133
-
134
-
135
- # Import all metadata we need from the QC/EDA chapter
136
-
137
- # METADATA
138
- filename = "marker_intensity_metadata.csv"
139
- filename = os.path.join(metadata_dir, filename)
140
-
141
- # Check file exists
142
- if not os.path.exists(filename):
143
- print("WARNING: Could not find desired file: "+filename)
144
- else :
145
- print("The",filename,"file was imported for further analysis!")
146
-
147
- # Open, read in information
148
- metadata = pd.read_csv(filename)
149
-
150
- # Verify size with verify_line_no() function in my_modules.py
151
- #verify_line_no(filename, metadata.shape[0] + 1)
152
-
153
- # Verify headers
154
- exp_cols = ['Round','Target','Channel','target_lower','full_column','marker','localisation']
155
- compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")
156
-
157
- metadata = metadata.dropna()
158
- metadata.head()
159
-
160
- # ### II.3.2. NOT_INTENSITIES
161
-
162
- # In[10]:
163
-
164
-
165
- # NOT_INTENSITIES
166
- filename = "not_intensities.csv"
167
- filename = os.path.join(metadata_dir, filename)
168
-
169
- # Check file exists
170
- if not os.path.exists(filename):
171
- print("WARNING: Could not find desired file: "+filename)
172
- else :
173
- print("The",filename,"file was imported for further analysis!")
174
-
175
- # Open, read in information
176
- #not_intensities = []
177
- with open(filename, 'r') as fh:
178
- not_intensities = fh.read().strip().split("\n")
179
- # take str, strip whitespace, split on new line character
180
-
181
- not_intensities = ['Nuc_X', 'Nuc_X_Inv', 'Nuc_Y', 'Nuc_Y_Inv', 'Nucleus_Roundness', 'Nucleus_Size', 'Cell_Size',
182
- 'ROI_index', 'Sample_ID', 'replicate_ID', 'Cell_ID','cell_type', 'cell_subtype', 'cluster','ID',
183
- 'Cytoplasm_Size', 'immune_checkpoint', 'Unique_ROI_index', 'Patient', 'Primary_chem(1)_vs_surg(0)']
184
-
185
- # Verify size
186
- print("Verifying data read from file is the correct length...\n")
187
- verify_line_no(filename, len(not_intensities))
188
-
189
- # Print to console
190
- print("not_intensities =\n", not_intensities)
191
-
192
- import os
193
- import pandas as pd
194
-
195
- # Function to compare headers (assuming you have this function defined in your my_modules.py)
196
- def compare_headers(expected, actual, description):
197
- missing = [col for col in expected if col not in actual]
198
- if missing:
199
- print(f"WARNING: Missing expected columns in {description}: {missing}")
200
- else:
201
- print(f"All expected columns are present in {description}.")
202
-
203
- # Get the current script directory
204
- present_dir = os.path.dirname(os.path.realpath(__file__))
205
-
206
- # Define the input path
207
- input_path = os.path.join(present_dir, 'wetransfer_data-zip_2024-05-17_1431')
208
- base_dir = input_path
209
- set_path = 'test'
210
-
211
- # Project and step names
212
- project_name = set_path # Project name
213
- previous_step_suffix_long = "_qc_eda" # Previous part (here QC/EDA NOTEBOOK)
214
-
215
- # Initial input data directory
216
- input_data_dir = os.path.join(base_dir, project_name + previous_step_suffix_long)
217
-
218
- # Metadata directories
219
- metadata_dir = os.path.join(base_dir, project_name + "_metadata")
220
- metadata_images_dir = os.path.join(metadata_dir, "images")
221
-
222
- # Define writable directory
223
- writable_directory = '/tmp'
224
-
225
- # Check and read metadata file
226
- filename = "marker_intensity_metadata.csv"
227
- filename = os.path.join(metadata_dir, filename)
228
-
229
- # Check if the file exists
230
- if not os.path.exists(filename):
231
- print("WARNING: Could not find desired file: " + filename)
232
- else:
233
- print("The", filename, "file was imported for further analysis!")
234
-
235
- # Open, read in information
236
- metadata = pd.read_csv(filename)
237
-
238
- # Verify headers
239
- exp_cols = ['Round', 'Target', 'Channel', 'target_lower', 'full_column', 'marker', 'localisation']
240
- compare_headers(exp_cols, metadata.columns.values, "Marker metadata file")
241
-
242
- metadata = metadata.dropna()
243
- print(metadata.head())
244
-
245
- # Example of writing to the writable directory
246
- output_file_path = os.path.join(writable_directory, 'processed_metadata.csv')
247
- try:
248
- metadata.to_csv(output_file_path, index=False)
249
- print(f"Processed metadata written successfully to {output_file_path}")
250
- except PermissionError as e:
251
- print(f"Permission denied: Unable to write the file at {output_file_path}. Error: {e}")
252
- except Exception as e:
253
- print(f"An error occurred: {e}")
254
-
255
- # ### II.3.3. FULL_TO_SHORT_COLUMN_NAMES
256
-
257
- # In[11]:
258
-
259
-
260
- # FULL_TO_SHORT_COLUMN_NAMES
261
- filename = "full_to_short_column_names.csv"
262
- filename = os.path.join(metadata_dir, filename)
263
-
264
- # Check file exists
265
- if not os.path.exists(filename):
266
- print("WARNING: Could not find desired file: " + filename)
267
- else :
268
- print("The",filename,"file was imported for further analysis!")
269
-
270
- # Open, read in information
271
- df = pd.read_csv(filename, header = 0)
272
-
273
- # Verify size
274
- print("Verifying data read from file is the correct length...\n")
275
- #verify_line_no(filename, df.shape[0] + 1)
276
-
277
- # Turn into dictionary
278
- full_to_short_names = df.set_index('full_name').T.to_dict('records')[0]
279
-
280
- # Print information
281
- print('full_to_short_names =\n',full_to_short_names)
282
-
283
-
284
- # ### II.3.4. SHORT_TO_FULL_COLUMN_NAMES
285
-
286
- # In[12]:
287
-
288
-
289
- # SHORT_TO_FULL_COLUMN_NAMES
290
- filename = "short_to_full_column_names.csv"
291
- filename = os.path.join(metadata_dir, filename)
292
-
293
- # Check file exists
294
- if not os.path.exists(filename):
295
- print("WARNING: Could not find desired file: " + filename)
296
- else :
297
- print("The",filename,"file was imported for further analysis!")
298
-
299
- # Open, read in information
300
- df = pd.read_csv(filename, header = 0)
301
-
302
- # Verify size
303
- print("Verifying data read from file is the correct length...\n")
304
- #verify_line_no(filename, df.shape[0] + 1)
305
-
306
- # Turn into dictionary
307
- short_to_full_names = df.set_index('short_name').T.to_dict('records')[0]
308
-
309
- # Print information
310
- print('short_to_full_names =\n',short_to_full_names)
311
-
312
-
313
- # ### II.3.5. SAMPLES COLORS
314
-
315
- # In[13]:
316
-
317
-
318
- # COLORS INFORMATION
319
- filename = "sample_color_data.csv"
320
- filename = os.path.join(metadata_dir, filename)
321
-
322
- # Check file exists
323
- if not os.path.exists(filename):
324
- print("WARNING: Could not find desired file: " + filename)
325
- else :
326
- print("The",filename,"file was imported for further analysis!")
327
-
328
- # Open, read in information
329
- df = pd.read_csv(filename, header = 0)
330
- df = df.drop(columns = ['hex'])
331
-
332
-
333
- # our tuple of float values for rgb, (r, g, b) was read in
334
- # as a string '(r, g, b)'. We need to extract the r-, g-, and b-
335
- # substrings and convert them back into floats
336
- df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
337
-
338
- # Verify size
339
- print("Verifying data read from file is the correct length...\n")
340
- #verify_line_no(filename, df.shape[0] + 1)
341
-
342
- # Turn into dictionary
343
- sample_color_dict = df.set_index('Sample_ID')['rgb'].to_dict()
344
-
345
- # Print information
346
- print('sample_color_dict =\n',sample_color_dict)
347
- sample_color_dict = pd.DataFrame.from_dict(sample_color_dict, orient='index', columns=['R', 'G', 'B'])
348
-
349
-
350
- # In[14]:
351
-
352
-
353
- sample_color_dict
354
-
355
-
356
- # ### II.3.6. CHANNELS COLORS
357
-
358
- # In[15]:
359
-
360
-
361
- # CHANNELS
362
- filename = "channel_color_data.csv"
363
- filename = os.path.join(metadata_dir, filename)
364
-
365
- # Check file exists
366
- if not os.path.exists(filename):
367
- print("WARNING: Could not find desired file: "+filename)
368
- else :
369
- print("The",filename,"file was imported for further analysis!")
370
-
371
- # Open, read in information
372
- df = pd.read_csv(filename, header = 0)
373
- df = df.drop(columns = ['hex'])
374
-
375
- # our tuple of float values for rgb, (r, g, b) was read in
376
- # as a string '(r, g, b)'. We need to extract the r-, g-, and b-
377
- # substrings and convert them back into floats
378
- df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
379
-
380
- # Verify size
381
- print("Verifying data read from file is the correct length...\n")
382
- #verify_line_no(filename, df.shape[0] + 1)
383
-
384
- # Turn into dictionary
385
- channel_color_dict = df.set_index('Channel')['rgb'].to_dict()
386
-
387
- # Print information
388
- print('channel_color_dict =\n',channel_color_dict)
389
- channel_color_dict = pd.DataFrame.from_dict(channel_color_dict, orient='index', columns=['R', 'G', 'B'])
390
-
391
-
392
- # In[16]:
393
-
394
-
395
- channel_color_dict
396
-
397
-
398
- # ### II.3.7. ROUNDS COLORS
399
-
400
- # In[17]:
401
-
402
-
403
- # ROUND
404
- filename = "round_color_data.csv"
405
- filename = os.path.join(metadata_dir, filename)
406
-
407
- # Check file exists
408
- if not os.path.exists(filename):
409
- print("WARNING: Could not find desired file: "+filename)
410
- else :
411
- print("The",filename,"file was imported for further analysis!")
412
-
413
- # Open, read in information
414
- df = pd.read_csv(filename, header = 0)
415
- df = df.drop(columns = ['hex'])
416
-
417
- # our tuple of float values for rgb, (r, g, b) was read in
418
- # as a string '(r, g, b)'. We need to extract the r-, g-, and b-
419
- # substrings and convert them back into floats
420
- df['rgb'] = df.apply(lambda row: rgb_tuple_from_str(row['rgb']), axis = 1)
421
-
422
- # Verify size
423
- print("Verifying data read from file is the correct length...\n")
424
- #verify_line_no(filename, df.shape[0] + 1)
425
-
426
- # Turn into dictionary
427
- round_color_dict = df.set_index('Round')['rgb'].to_dict()
428
-
429
- # Print information
430
- print('round_color_dict =\n',round_color_dict)
431
- round_color_dict = pd.DataFrame.from_dict(round_color_dict, orient='index', columns=['R', 'G', 'B'])
432
-
433
-
434
- # In[18]:
435
-
436
-
437
- round_color_dict
438
-
439
-
440
- # ### II.3.8. DATA
441
-
442
- # In[19]:
443
-
444
-
445
- # DATA
446
- # List files in the directory
447
- # Check if the directory exists
448
- if os.path.exists(input_data_dir):
449
- ls_samples = [sample for sample in os.listdir(input_data_dir) if sample.endswith("_qc_eda.csv")]
450
-
451
- print("The following CSV files were detected:")
452
- print([sample for sample in ls_samples])
453
- else:
454
- print(f"The directory {input_data_dir} does not exist.")
455
-
456
-
457
- # In[20]:
458
-
459
-
460
- # Import all the others files
461
- dfs = {}
462
-
463
- # Set variable to hold default header values
464
- # First gather information on expected headers using first file in ls_samples
465
- # Read in the first row of the file corresponding to the first sample (index = 0) in ls_samples
466
- df = pd.read_csv(os.path.join(input_data_dir, ls_samples[0]) , index_col = 0, nrows = 1)
467
- expected_headers = df.columns.values
468
- print(expected_headers)
469
-
470
- ###############################
471
- # !! This may take a while !! #
472
- ###############################
473
- for sample in ls_samples:
474
- file_path = os.path.join(input_data_dir,sample)
475
-
476
- try:
477
- # Read the CSV file
478
- df = pd.read_csv(file_path, index_col=0)
479
- # Check if the DataFrame is empty, if so, don't continue trying to process df and remove it
480
-
481
- if not df.empty:
482
- # Reorder the columns to match the expected headers list
483
- df = df.reindex(columns=expected_headers)
484
- print(sample, "file is processed !\n")
485
- #print(df)
486
-
487
- except pd.errors.EmptyDataError:
488
- print(f'\nEmpty data error in {sample} file. Removing from analysis...')
489
- ls_samples.remove(sample)
490
-
491
- # Add df to dfs
492
- dfs[sample] = df
493
-
494
- #print(dfs)
495
-
496
-
497
- # In[21]:
498
-
499
-
500
- # Merge dfs into one df
501
- df = pd.concat(dfs.values(), ignore_index=False , sort = False)
502
- #del dfs
503
- df.head()
504
-
505
-
506
- # In[22]:
507
-
508
-
509
- df.shape
510
-
511
-
512
- # In[23]:
513
-
514
-
515
- # Check for NaN entries (should not be any unless columns do not align)
516
- # False means no NaN entries
517
- # True means NaN entries
518
- df.isnull().any().any()
519
-
520
-
521
- # ## II.4. *FILTERING
522
-
523
- # In[24]:
524
-
525
-
526
- print("Number of cells before filtering :", df.shape[0])
527
- cells_before_filter = f"Number of cells before filtering :{df.shape[0]}"
528
-
529
-
530
- # In[25]:
531
-
532
-
533
- #print(df)
534
-
535
-
536
- # In[26]:
537
-
538
-
539
- # Delete small cells and objects w/high AF555 Signal (RBCs)
540
- # We usually use the 95th percentile calculated during QC_EDA
541
- df = df.loc[(df['Nucleus_Size'] > 42 )]
542
- df = df.loc[(df['Nucleus_Size'] < 216)]
543
- print("Number of cells after filtering on nucleus size:", df.shape[0])
544
-
545
- df = df.loc[(df['AF555_Cell_Intensity_Average'] < 2000)]
546
- print("Number of cells after filtering on AF555A ___ intensity:", df.shape[0])
547
- cells_after_filter_nucleus = f"Number of cells after filtering on nucleus size: {df.shape[0]}"
548
- cells_after_filter_intensity = f"Number of cells after filtering on AF555A ___ intensity: {df.shape[0]}"
549
-
550
-
551
- # In[27]:
552
-
553
-
554
- # Assign cell type
555
- # Assign tumor cells at each row at first (random assigning here just for development purposes)
556
- # Generate random values for cell_type column
557
- random_values = np.random.randint(0, 10, size=len(df))
558
-
559
- # Assign cell type based on random values
560
- def assign_cell_type(n):
561
- return np.random.choice(['STROMA','CANCER','IMMUNE','ENDOTHELIAL'])
562
-
563
- df['cell_type'] = np.vectorize(assign_cell_type)(random_values)
564
- df['cell_subtype'] = df['cell_type'].copy()
565
-
566
-
567
- # In[28]:
568
-
569
-
570
- filtered_dataframe = df
571
- df.head()
572
-
573
-
574
- # In[29]:
575
-
576
-
577
- quality_control_df = filtered_dataframe
578
-
579
-
580
- # In[30]:
581
-
582
-
583
- def check_index_format(index_str, ls_samples):
584
- """
585
- Checks if the given index string follows the specified format.
586
-
587
- Args:
588
- index_str (str): The index string to be checked.
589
- ls_samples (list): A list of valid sample names.
590
-
591
- Returns:
592
- bool: True if the index string follows the format, False otherwise.
593
- """
594
- # Split the index string into parts
595
- parts = index_str.split('_')
596
-
597
- # Check if there are exactly 3 parts
598
- if len(parts) != 3:
599
- print(len(parts))
600
- return False
601
-
602
- # Check if the first part is in ls_samples
603
- sample_name = parts[0]
604
- if f'{sample_name}_qc_eda.csv' not in ls_samples:
605
- print(sample_name)
606
- return False
607
-
608
- # Check if the second part is in ['cell', 'cytoplasm', 'nucleus']
609
- location = parts[1]
610
- valid_locations = ['Cell', 'Cytoplasm', 'Nucleus']
611
- if location not in valid_locations:
612
- print(location)
613
- return False
614
-
615
- # Check if the third part is a number
616
- try:
617
- index = int(parts[2])
618
- except ValueError:
619
- print(index)
620
- return False
621
-
622
- # If all checks pass, return True
623
- return True
624
-
625
-
626
- # In[31]:
627
-
628
-
629
- # Let's take a look at a few features to make sure our dataframe is as expected
630
- df.index
631
- def check_format_ofindex(index):
632
- for index in df.index:
633
- check_index = check_index_format(index, ls_samples)
634
- if check_index is False:
635
- index_format = "Bad"
636
- return index_format
637
-
638
- index_format = "Good"
639
- return index_format
640
- print(check_format_ofindex(df.index))
641
-
642
-
643
- # In[32]:
644
-
645
-
646
- import panel as pn
647
- import pandas as pd
648
-
649
- def quality_check(file, not_intensities):
650
- # Load the output file
651
- df = file
652
-
653
- # Check Index
654
- check_index = check_format_ofindex(df.index)
655
-
656
- # Check Shape
657
- check_shape = df.shape
658
-
659
- # Check for NaN entries
660
- check_no_null = df.isnull().any().any()
661
-
662
- mean_intensity = df.loc[:, ~df.columns.isin(not_intensities)].mean(axis=1)
663
- if (mean_intensity == 0).any():
664
- df = df.loc[mean_intensity > 0, :]
665
- print("df.shape after removing 0 mean values: ", df.shape)
666
- check_zero_intensities = f'Shape after removing 0 mean values: {df.shape}'
667
- else:
668
- print("No zero intensity values.")
669
- check_zero_intensities = "No zero intensity values."
670
-
671
- # Create a quality check results table
672
- quality_check_results_table = pd.DataFrame({
673
- 'Check': ['Index', 'Shape', 'Check for NaN Entries', 'Check for Zero Intensities'],
674
- 'Result': [str(check_index), str(check_shape), str(check_no_null), check_zero_intensities]
675
- })
676
-
677
- # Create a quality check results component
678
- quality_check_results_component = pn.Card(
679
- pn.pane.DataFrame(quality_check_results_table),
680
- title="Quality Control Results",
681
- header_background="#2196f3",
682
- header_color="white",
683
- )
684
-
685
- return quality_check_results_component
686
-
687
-
688
- # ## II.5. CELL TYPES COLORS
689
- # Establish colors to use throughout workflow
690
-
691
- # we want colors that are categorical, since Cell Type is a non-ordered category.
692
- # A categorical color palette will have dissimilar colors.
693
- # Get those unique colors
694
- cell_types = ['STROMA','CANCER','IMMUNE','ENDOTHELIAL']
695
- color_values = sb.color_palette("hls", n_colors = len(cell_types))
696
- # each color value is a tuple of three values: (R, G, B)
697
-
698
- print("Unique cell types are:",df.cell_type.unique())
699
- # Display those unique colors
700
- sb.palplot(sb.color_palette(color_values))
701
- # In[33]:
702
-
703
-
704
- # Define your custom colors for each cell type
705
- custom_colors = {
706
- 'CANCER': (0.1333, 0.5451, 0.1333),
707
- 'STROMA': (0.4, 0.4, 0.4),
708
- 'IMMUNE': (1, 1, 0),
709
- 'ENDOTHELIAL': (0.502, 0, 0.502)
710
- }
711
-
712
- # Retrieve the list of cell types
713
- cell_types = list(custom_colors.keys())
714
-
715
- # Extract the corresponding colors from the dictionary
716
- color_values = [custom_colors[cell] for cell in cell_types]
717
-
718
- # Display the colors
719
- sb.palplot(sb.color_palette(color_values))
720
-
721
-
722
- # In[34]:
723
-
724
-
725
- # Store in a dctionnary
726
- celltype_color_dict = dict(zip(cell_types, color_values))
727
- celltype_color_dict
728
-
729
-
730
- # In[35]:
731
-
732
-
733
- celltype_color_df = pd.DataFrame.from_dict(celltype_color_dict, orient='index', columns=['R', 'G', 'B'])
734
-
735
-
736
- # In[36]:
737
-
738
-
739
- # Save color information (mapping and legend) to metadata directory
740
- # Create dataframe
741
- celltype_color_df = color_dict_to_df(celltype_color_dict, "cell_type")
742
- celltype_color_df.head()
743
-
744
- # Save to file in metadatadirectory
745
- present_dir = os.path.dirname(os.path.realpath(__file__))
746
- filename = os.path.join(present_dir, "celltype_color_data.csv")
747
- #filename = "celltype_color_data.csv"
748
- filename = os.path.join(metadata_dir, filename)
749
- celltype_color_df.to_csv(filename, index = False)
750
- print("File" + filename + " was created!")
751
-
752
-
753
- # In[37]:
754
-
755
-
756
- celltype_color_df.head()
757
-
758
-
759
- # In[38]:
760
-
761
-
762
- # Legend of cell type info only
763
- g = plt.figure(figsize = (1,1)).add_subplot(111)
764
- g.axis('off')
765
- handles = []
766
- for item in celltype_color_dict.keys():
767
- h = g.bar(0,0, color = celltype_color_dict[item],
768
- label = item, linewidth =0)
769
- handles.append(h)
770
- first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell type'),
771
-
772
-
773
- filename = "Celltype_legend.png"
774
- filename = os.path.join(metadata_images_dir, filename)
775
- plt.savefig(filename, bbox_inches = 'tight')
776
-
777
-
778
- # In[39]:
779
-
780
-
781
- metadata
782
-
783
-
784
- # In[40]:
785
-
786
-
787
- df.columns.values
788
-
789
-
790
- # In[41]:
791
-
792
-
793
- df.shape
794
-
795
-
796
- # In[42]:
797
-
798
-
799
- metadata.shape
800
-
801
-
802
- # ## II.6. *CELL SUBTYPES COLORS
803
-
804
- # In[43]:
805
-
806
-
807
- # Establish colors to use throughout workflow
808
-
809
- # we want colors that are categorical, since Cell Type is a non-ordered category.
810
- # A categorical color palette will have dissimilar colors.
811
- # Get those unique colors
812
- cell_subtypes = ['DC','B', 'TCD4','TCD8','M1','M2','Treg', \
813
- 'IMMUNE_OTHER', 'CANCER', 'αSMA_myCAF',\
814
- 'STROMA_OTHER', 'ENDOTHELIAL']
815
- color_values = sb.color_palette("Paired",n_colors = len(cell_subtypes))
816
- # each color value is a tuple of three values: (R, G, B)
817
-
818
- print("Unique cell types are:",df.cell_subtype.unique())
819
- # Display those unique colors
820
- sb.palplot(sb.color_palette(color_values))
821
-
822
-
823
- # In[44]:
824
-
825
-
826
- # Store in a dctionnary
827
- cellsubtype_color_dict = dict(zip(cell_subtypes, color_values))
828
- cellsubtype_color_dict
829
-
830
-
831
- # In[45]:
832
-
833
-
834
- cellsubtype_color_df = pd.DataFrame.from_dict(cellsubtype_color_dict, orient='index', columns=['R', 'G', 'B'])
835
-
836
-
837
- # In[46]:
838
-
839
-
840
- # Save color information (mapping and legend) to metadata directory
841
- # Create dataframe
842
- cellsubtype_color_df = color_dict_to_df(cellsubtype_color_dict, "cell_subtype")
843
-
844
- # Save to file in metadatadirectory
845
- filename = "cellsubtype_color_data.csv"
846
- filename = os.path.join(metadata_dir, filename)
847
- cellsubtype_color_df.to_csv(filename, index = False)
848
- print("File" + filename + " was created!")
849
-
850
-
851
- # In[47]:
852
-
853
-
854
- cellsubtype_color_df.head()
855
-
856
-
857
- # In[48]:
858
-
859
-
860
- # Legend of cell type info only
861
- g = plt.figure(figsize = (1,1)).add_subplot(111)
862
- g.axis('off')
863
- handles = []
864
- for item in cellsubtype_color_dict.keys():
865
- h = g.bar(0,0, color = cellsubtype_color_dict[item],
866
- label = item, linewidth =0)
867
- handles.append(h)
868
- first_legend = plt.legend(handles=handles, loc='upper right', title = 'Cell subtype'),
869
-
870
-
871
- filename = "Cellsubtype_legend.png"
872
- filename = os.path.join(metadata_images_dir, filename)
873
- plt.savefig(filename, bbox_inches = 'tight')
874
-
875
-
876
- # ## II.7. IMMUNE CHECKPOINT COLORS
877
-
878
- # In[49]:
879
-
880
-
881
- # Assign IMMUNE SUBTYPES
882
- df['cell_subtype'] = df['cell_type'].copy()
883
- df['immune_checkpoint'] = 'none'
884
- df
885
-
886
- immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'None']
887
- color_values = sb.color_palette("husl",n_colors=len(immune_checkpoint))
888
- # each color value is a tuple of three values: (R, G, B)
889
-
890
- print("Unique immune checkpoint are:",df.immune_checkpoint.unique())
891
- # Display those unique colors
892
- sb.palplot(sb.color_palette(color_values))
893
- # In[50]:
894
-
895
-
896
- immune_checkpoint = ['B7H4', 'PDL1', 'PD1', 'B7H4_PDL1', 'None']
897
-
898
- # Base colors for the primary checkpoints
899
- base_colors = sb.color_palette("husl", n_colors=3) # Three distinct colors
900
-
901
- # Function to mix two RGB colors
902
- def mix_colors(color1, color2):
903
- return tuple((c1 + c2) / 2 for c1, c2 in zip(color1, color2))
904
-
905
- # Generate mixed colors for the combinations of checkpoints
906
- mixed_colors = [
907
- mix_colors(base_colors[0], base_colors[1]), # Mix B7H4 and PDL1
908
- # mix_colors(base_colors[0], base_colors[2]), # Mix B7H4 and PD1
909
- # mix_colors(base_colors[1], base_colors[2]), # Mix PDL1 and PD1
910
- tuple(np.mean(base_colors, axis=0)) # Mix B7H4, PDL1, and PD1
911
- ]
912
-
913
- # Adding the color for 'None'
914
- #none_color = [(0.8, 0.8, 0.8)] # A shade of gray
915
-
916
- # Combine all colors into one list
917
- color_values = base_colors + mixed_colors #+ none_color
918
-
919
- # Display unique immune checkpoint combinations
920
- print("Unique immune checkpoint combinations are:", immune_checkpoint)
921
- # Display the unique colors
922
- sb.palplot(color_values)
923
-
924
-
925
- # In[51]:
926
-
927
-
928
- # Store in a dctionnary
929
- immunecheckpoint_color_dict = dict(zip(immune_checkpoint, color_values))
930
- immunecheckpoint_color_dict
931
-
932
-
933
- # In[52]:
934
-
935
-
936
- # Save color information (mapping and legend) to metadata directory
937
- # Create dataframe
938
- immunecheckpoint_color_df = color_dict_to_df(immunecheckpoint_color_dict, "immune_checkpoint")
939
- immunecheckpoint_color_df.head()
940
-
941
- # Save to file in metadatadirectory
942
- filename = "immunecheckpoint_color_data.csv"
943
- filename = os.path.join(metadata_dir, filename)
944
- immunecheckpoint_color_df.to_csv(filename, index = False)
945
- print("File " + filename + " was created!")
946
-
947
-
948
- # In[53]:
949
-
950
-
951
- # Legend of cell type info only
952
- g = plt.figure(figsize = (1,1)).add_subplot(111)
953
- g.axis('off')
954
- handles = []
955
- for item in immunecheckpoint_color_dict.keys():
956
- h = g.bar(0,0, color = immunecheckpoint_color_dict[item],
957
- label = item, linewidth =0)
958
- handles.append(h)
959
- first_legend = plt.legend(handles=handles, loc='upper right', title = 'Immune checkpoint'),
960
-
961
-
962
- filename = "Cellsubtype_legend.png"
963
- filename = os.path.join(metadata_images_dir, filename)
964
- plt.savefig(filename, bbox_inches = 'tight')
965
-
966
-
967
- # ## II.7. BACKGROUND SUBSTRACTION
968
-
969
- # In[54]:
970
-
971
-
972
- def do_background_sub(col, df, metadata):
973
- #print(col.name)
974
- location = metadata.loc[metadata['full_column'] == col.name, 'localisation'].values[0]
975
- #print('location = ' + location)
976
- channel = metadata.loc[metadata['full_column'] == col.name, 'Channel'].values[0]
977
- #print('channel = ' + channel)
978
- af_target = metadata.loc[
979
- (metadata['Channel']==channel) \
980
- & (metadata['localisation']==location) \
981
- & (metadata['target_lower'].str.contains(r'^af\d{3}$')),\
982
- 'full_column'].values[0]
983
- return col - df.loc[:,af_target]
984
-
985
-
986
- # In[55]:
987
-
988
-
989
- metadata_with_localisation = metadata
990
- metadata_with_localisation
991
-
992
-
993
- # In[56]:
994
-
995
-
996
- #Normalization
997
-
998
- df.loc[:, ~df.columns.isin(not_intensities)] = \
999
- df.loc[:, ~df.columns.isin(not_intensities)].apply(lambda column: divide_exp_time(column, 'Exp', metadata), axis = 0)
1000
-
1001
-
1002
- # In[57]:
1003
-
1004
-
1005
- normalization_df = df
1006
- normalization_df.head()
1007
-
1008
-
1009
- # In[58]:
1010
-
1011
-
1012
- # Do background subtraction
1013
- # this uses a df (metadata) outside of
1014
- # the scope of the lambda...
1015
- # careful that this might break inside of a script...
1016
-
1017
- df.loc[:,~df.columns.isin(not_intensities)] = \
1018
- df.loc[:,~df.columns.isin(not_intensities)].apply(lambda column: do_background_sub(column, df, metadata),axis = 0)
1019
-
1020
-
1021
- # In[59]:
1022
-
1023
-
1024
- df
1025
- background_substraction_df = df
1026
- background_substraction_df.head()
1027
-
1028
-
1029
- # In[60]:
1030
-
1031
-
1032
- # Drop AF columns
1033
- df = df.filter(regex='^(?!AF\d{3}).*')
1034
- print(df.columns.values)
1035
-
1036
-
1037
- # In[61]:
1038
-
1039
-
1040
- intensities_df = df.loc[:, ~df.columns.isin(not_intensities)]
1041
- intensities_df
1042
-
1043
-
1044
- # In[62]:
1045
-
1046
-
1047
- normalization_df.head()
1048
-
1049
-
1050
- # In[63]:
1051
-
1052
-
1053
- metadata_df = metadata_with_localisation
1054
- intensities_df = intensities_df # Assuming you have loaded the intensities DataFrame
1055
-
1056
- # Create a list of column names from the intensities DataFrame
1057
- column_names = intensities_df.columns.tolist()
1058
-
1059
- # Create a Select widget for choosing a column
1060
- column_selector = pn.widgets.Select(name='Select Column', options=column_names)
1061
-
1062
- # Create a Markdown widget to display the selected column's information
1063
- column_info_md = pn.pane.Markdown(name='Column Information', width=400, object='Select a column to view its information.')
1064
-
1065
- # Define a function to update the column information
1066
- def update_column_info(event):
1067
- selected_column = event.new
1068
- if selected_column:
1069
- # Get the selected column's intensity
1070
- intensity = intensities_df[selected_column].values
1071
-
1072
- # Get the corresponding channel, localization, and experiment from the metadata
1073
- channel = metadata_df.loc[metadata_df['full_column'] == selected_column, 'Channel'].values[0]
1074
- localization = metadata_df.loc[metadata_df['full_column'] == selected_column, 'localisation'].values[0]
1075
- exposure = metadata_df.loc[metadata_df['full_column'] == selected_column, 'Exp'].values[0]
1076
-
1077
- # Create a Markdown string with the column information
1078
- column_info_text = f"**Intensity:** {intensity}\n\n**Channel:** {channel}\n\n**Localization:** {localization}\n\n**Exposure:** {exposure}"
1079
-
1080
- # Update the Markdown widget with the column information
1081
- column_info_md.object = column_info_text
1082
- else:
1083
- column_info_md.object = 'Select a column to view its information.'
1084
-
1085
- # Watch for changes in the column selector and update the column information
1086
- column_selector.param.watch(update_column_info, 'value')
1087
-
1088
- # Create a Panel app and display the widgets
1089
- bs_info = pn.Column(column_selector, column_info_md)
1090
- pn.extension()
1091
- bs_info.servable()
1092
-
1093
-
1094
- # In[64]:
1095
-
1096
-
1097
- normalization_df.head()
1098
-
1099
-
1100
- # In[65]:
1101
-
1102
-
1103
- import panel as pn
1104
- df_widget = pn.widgets.DataFrame(metadata, name="MetaData")
1105
- app2 = pn.template.GoldenTemplate(
1106
- site="Cyc-IF",
1107
- title=" Background-Substraction",
1108
- main=[pn.Tabs(("Background-Substraction",pn.Column(
1109
- #pn.Column(pn.pane.Markdown("### Celltype thresholds"), pn.pane.DataFrame(celltype_color_df)),
1110
- #pn.Column(pn.pane.Markdown("### Cell Subtype thresholds"), pn.pane.DataFrame(cellsubtype_color_df)),
1111
- #pn.Column(pn.pane.Markdown("### Cells Before Filtering"),pn.pane.Str(cells_before_filter)),
1112
- #pn.Column(pn.pane.Markdown("### Cells After Filtering Nucleus"),pn.pane.Str(cells_after_filter_nucleus)),
1113
- #pn.Column(pn.pane.Markdown("### Cells After Filtering Intensity"),pn.pane.Str(cells_after_filter_intensity)),
1114
- #pn.Column(pn.pane.Markdown("### Dataframe after filtering"), pn.pane.DataFrame(filtered_dataframe.head())),
1115
- pn.Column(pn.pane.Markdown("### The metadata obtained that specifies the localisation:"), metadata_with_localisation.head(8)),
1116
- pn.Column(pn.pane.Markdown("### The channels and exposure of each intensities column"), bs_info),
1117
- pn.Column(pn.pane.Markdown("### Dataframe after perfroming normalization"),pn.pane.DataFrame(normalization_df.head(), width = 1500)),
1118
- pn.Column(pn.pane.Markdown("### Dataframe after background Substraction"), pn.pane.DataFrame(background_substraction_df.head()),
1119
- ))),
1120
- ("Quality Control", pn.Column(
1121
- quality_check(quality_control_df, not_intensities)
1122
- #pn.pane.Markdown("### The Quality check results are:"), quality_check_results(check_shape, check_no_null, check_all_expected_files_present, check_zero_intensities)
1123
- ))
1124
- )],)
1125
-
1126
-
1127
- # In[66]:
1128
-
1129
-
1130
- app2.servable()