JMLizano JMLizano darabos commited on
Commit
7baf2a1
·
unverified ·
1 Parent(s): 10c9dc3

BioNemo demo (#84)

Browse files

* Add BioNeMo integration, single demo for now
---------

Co-authored-by: JMLizano <[email protected]>
Co-authored-by: Daniel Darabos <[email protected]>

examples/BioNemo demo ADDED
@@ -0,0 +1,985 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "edges": [
3
+ {
4
+ "id": "BioNeMo > Import H5AD file 1 BioNeMo > Get labels 1",
5
+ "source": "BioNeMo > Import H5AD file 1",
6
+ "sourceHandle": "output",
7
+ "target": "BioNeMo > Get labels 1",
8
+ "targetHandle": "adata"
9
+ },
10
+ {
11
+ "id": "BioNeMo > Download CELLxGENE dataset 1 BioNeMo > Infer 1",
12
+ "source": "BioNeMo > Download CELLxGENE dataset 1",
13
+ "sourceHandle": "output",
14
+ "target": "BioNeMo > Infer 1",
15
+ "targetHandle": "dataset_path"
16
+ },
17
+ {
18
+ "id": "BioNeMo > Download model 2 BioNeMo > Infer 1",
19
+ "source": "BioNeMo > Download model 2",
20
+ "sourceHandle": "output",
21
+ "target": "BioNeMo > Infer 1",
22
+ "targetHandle": "model_path"
23
+ },
24
+ {
25
+ "id": "BioNeMo > Download CELLxGENE dataset 1 BioNeMo > Infer 2",
26
+ "source": "BioNeMo > Download CELLxGENE dataset 1",
27
+ "sourceHandle": "output",
28
+ "target": "BioNeMo > Infer 2",
29
+ "targetHandle": "dataset_path"
30
+ },
31
+ {
32
+ "id": "BioNeMo > Download model 1 BioNeMo > Infer 2",
33
+ "source": "BioNeMo > Download model 1",
34
+ "sourceHandle": "output",
35
+ "target": "BioNeMo > Infer 2",
36
+ "targetHandle": "model_path"
37
+ },
38
+ {
39
+ "id": "BioNeMo > Infer 2 BioNeMo > Load results 1",
40
+ "source": "BioNeMo > Infer 2",
41
+ "sourceHandle": "output",
42
+ "target": "BioNeMo > Load results 1",
43
+ "targetHandle": "results_path"
44
+ },
45
+ {
46
+ "id": "BioNeMo > Load results 1 BioNeMo > Run benchmark 1",
47
+ "source": "BioNeMo > Load results 1",
48
+ "sourceHandle": "output",
49
+ "target": "BioNeMo > Run benchmark 1",
50
+ "targetHandle": "data"
51
+ },
52
+ {
53
+ "id": "BioNeMo > Get labels 1 BioNeMo > Run benchmark 1",
54
+ "source": "BioNeMo > Get labels 1",
55
+ "sourceHandle": "output",
56
+ "target": "BioNeMo > Run benchmark 1",
57
+ "targetHandle": "labels"
58
+ },
59
+ {
60
+ "id": "BioNeMo > Infer 1 BioNeMo > Load results 2",
61
+ "source": "BioNeMo > Infer 1",
62
+ "sourceHandle": "output",
63
+ "target": "BioNeMo > Load results 2",
64
+ "targetHandle": "results_path"
65
+ },
66
+ {
67
+ "id": "BioNeMo > Load results 2 BioNeMo > Run benchmark 2",
68
+ "source": "BioNeMo > Load results 2",
69
+ "sourceHandle": "output",
70
+ "target": "BioNeMo > Run benchmark 2",
71
+ "targetHandle": "data"
72
+ },
73
+ {
74
+ "id": "BioNeMo > Get labels 1 BioNeMo > Run benchmark 2",
75
+ "source": "BioNeMo > Get labels 1",
76
+ "sourceHandle": "output",
77
+ "target": "BioNeMo > Run benchmark 2",
78
+ "targetHandle": "labels"
79
+ },
80
+ {
81
+ "id": "BioNeMo > Run benchmark 2 BioNeMo > Plot f1 comparison 1",
82
+ "source": "BioNeMo > Run benchmark 2",
83
+ "sourceHandle": "output",
84
+ "target": "BioNeMo > Plot f1 comparison 1",
85
+ "targetHandle": "benchmark_output10m"
86
+ },
87
+ {
88
+ "id": "BioNeMo > Run benchmark 1 BioNeMo > Plot f1 comparison 1",
89
+ "source": "BioNeMo > Run benchmark 1",
90
+ "sourceHandle": "output",
91
+ "target": "BioNeMo > Plot f1 comparison 1",
92
+ "targetHandle": "benchmark_output100m"
93
+ },
94
+ {
95
+ "id": "BioNeMo > Run benchmark 2 BioNeMo > Plot accuracy comparison 1",
96
+ "source": "BioNeMo > Run benchmark 2",
97
+ "sourceHandle": "output",
98
+ "target": "BioNeMo > Plot accuracy comparison 1",
99
+ "targetHandle": "benchmark_output10m"
100
+ },
101
+ {
102
+ "id": "BioNeMo > Run benchmark 1 BioNeMo > Plot accuracy comparison 1",
103
+ "source": "BioNeMo > Run benchmark 1",
104
+ "sourceHandle": "output",
105
+ "target": "BioNeMo > Plot accuracy comparison 1",
106
+ "targetHandle": "benchmark_output100m"
107
+ }
108
+ ],
109
+ "env": "LynxKite Graph Analytics",
110
+ "nodes": [
111
+ {
112
+ "data": {
113
+ "__execution_delay": 0.0,
114
+ "collapsed": null,
115
+ "display": null,
116
+ "error": null,
117
+ "meta": {
118
+ "inputs": {},
119
+ "name": "BioNeMo > Import H5AD file",
120
+ "outputs": {
121
+ "output": {
122
+ "name": "output",
123
+ "position": "right",
124
+ "type": {
125
+ "type": "None"
126
+ }
127
+ }
128
+ },
129
+ "params": {
130
+ "file_path": {
131
+ "default": null,
132
+ "name": "file_path",
133
+ "type": {
134
+ "type": "<class 'str'>"
135
+ }
136
+ }
137
+ },
138
+ "position": {
139
+ "x": 504.0,
140
+ "y": 355.0
141
+ },
142
+ "type": "basic"
143
+ },
144
+ "params": {
145
+ "file_path": "hs-celltype-bench.h5ad"
146
+ },
147
+ "status": "done",
148
+ "title": "BioNeMo > Import H5AD file"
149
+ },
150
+ "dragHandle": ".bg-primary",
151
+ "height": 347.0,
152
+ "id": "BioNeMo > Import H5AD file 1",
153
+ "position": {
154
+ "x": 975.3920617976814,
155
+ "y": 246.19491328410817
156
+ },
157
+ "type": "basic",
158
+ "width": 295.0
159
+ },
160
+ {
161
+ "data": {
162
+ "display": null,
163
+ "error": null,
164
+ "meta": {
165
+ "inputs": {
166
+ "adata": {
167
+ "name": "adata",
168
+ "position": "left",
169
+ "type": {
170
+ "type": "<class 'inspect._empty'>"
171
+ }
172
+ }
173
+ },
174
+ "name": "BioNeMo > Get labels",
175
+ "outputs": {
176
+ "output": {
177
+ "name": "output",
178
+ "position": "right",
179
+ "type": {
180
+ "type": "None"
181
+ }
182
+ }
183
+ },
184
+ "params": {},
185
+ "position": {
186
+ "x": 389.0,
187
+ "y": 633.0
188
+ },
189
+ "type": "basic"
190
+ },
191
+ "params": {},
192
+ "status": "done",
193
+ "title": "BioNeMo > Get labels"
194
+ },
195
+ "dragHandle": ".bg-primary",
196
+ "height": 200.0,
197
+ "id": "BioNeMo > Get labels 1",
198
+ "position": {
199
+ "x": 1330.5731290863628,
200
+ "y": 322.77821619446473
201
+ },
202
+ "type": "basic",
203
+ "width": 200.0
204
+ },
205
+ {
206
+ "data": {
207
+ "__execution_delay": 0.0,
208
+ "collapsed": null,
209
+ "display": null,
210
+ "error": null,
211
+ "meta": {
212
+ "inputs": {},
213
+ "name": "BioNeMo > Download model",
214
+ "outputs": {
215
+ "output": {
216
+ "name": "output",
217
+ "position": "right",
218
+ "type": {
219
+ "type": "None"
220
+ }
221
+ }
222
+ },
223
+ "params": {
224
+ "model_name": {
225
+ "default": null,
226
+ "name": "model_name",
227
+ "type": {
228
+ "type": "<class 'str'>"
229
+ }
230
+ }
231
+ },
232
+ "position": {
233
+ "x": 1026.0,
234
+ "y": 839.0
235
+ },
236
+ "type": "basic"
237
+ },
238
+ "params": {
239
+ "model_name": "geneformer_100m"
240
+ },
241
+ "status": "done",
242
+ "title": "BioNeMo > Download model"
243
+ },
244
+ "dragHandle": ".bg-primary",
245
+ "height": 200.0,
246
+ "id": "BioNeMo > Download model 1",
247
+ "position": {
248
+ "x": 551.1714527812203,
249
+ "y": 629.2951247275757
250
+ },
251
+ "type": "basic",
252
+ "width": 200.0
253
+ },
254
+ {
255
+ "data": {
256
+ "__execution_delay": 0.0,
257
+ "collapsed": null,
258
+ "display": null,
259
+ "error": null,
260
+ "meta": {
261
+ "inputs": {},
262
+ "name": "BioNeMo > Download model",
263
+ "outputs": {
264
+ "output": {
265
+ "name": "output",
266
+ "position": "right",
267
+ "type": {
268
+ "type": "None"
269
+ }
270
+ }
271
+ },
272
+ "params": {
273
+ "model_name": {
274
+ "default": null,
275
+ "name": "model_name",
276
+ "type": {
277
+ "type": "<class 'str'>"
278
+ }
279
+ }
280
+ },
281
+ "position": {
282
+ "x": 939.0,
283
+ "y": 523.0
284
+ },
285
+ "type": "basic"
286
+ },
287
+ "params": {
288
+ "model_name": "geneformer_10m"
289
+ },
290
+ "status": "done",
291
+ "title": "BioNeMo > Download model"
292
+ },
293
+ "dragHandle": ".bg-primary",
294
+ "height": 200.0,
295
+ "id": "BioNeMo > Download model 2",
296
+ "position": {
297
+ "x": 556.2267014450949,
298
+ "y": 313.55564323889297
299
+ },
300
+ "type": "basic",
301
+ "width": 200.0
302
+ },
303
+ {
304
+ "data": {
305
+ "__execution_delay": 0.0,
306
+ "collapsed": null,
307
+ "display": null,
308
+ "error": null,
309
+ "meta": {
310
+ "inputs": {},
311
+ "name": "BioNeMo > Download CELLxGENE dataset",
312
+ "outputs": {
313
+ "output": {
314
+ "name": "output",
315
+ "position": "right",
316
+ "type": {
317
+ "type": "None"
318
+ }
319
+ }
320
+ },
321
+ "params": {
322
+ "census_version": {
323
+ "default": "2023-12-15",
324
+ "name": "census_version",
325
+ "type": {
326
+ "type": "<class 'str'>"
327
+ }
328
+ },
329
+ "max_workers": {
330
+ "default": 1.0,
331
+ "name": "max_workers",
332
+ "type": {
333
+ "type": "<class 'int'>"
334
+ }
335
+ },
336
+ "organism": {
337
+ "default": "Homo sapiens",
338
+ "name": "organism",
339
+ "type": {
340
+ "type": "<class 'str'>"
341
+ }
342
+ },
343
+ "save_path": {
344
+ "default": null,
345
+ "name": "save_path",
346
+ "type": {
347
+ "type": "<class 'str'>"
348
+ }
349
+ },
350
+ "use_mp": {
351
+ "default": false,
352
+ "name": "use_mp",
353
+ "type": {
354
+ "type": "<class 'bool'>"
355
+ }
356
+ },
357
+ "value_filter": {
358
+ "default": "dataset_id==\"8e47ed12-c658-4252-b126-381df8d52a3d\"",
359
+ "name": "value_filter",
360
+ "type": {
361
+ "type": "<class 'str'>"
362
+ }
363
+ }
364
+ },
365
+ "position": {
366
+ "x": 1020.0,
367
+ "y": 262.0
368
+ },
369
+ "type": "basic"
370
+ },
371
+ "params": {
372
+ "census_version": "2023-12-15",
373
+ "max_workers": 1.0,
374
+ "organism": "Homo sapiens",
375
+ "save_path": "celltype-bench-dataset",
376
+ "use_mp": false,
377
+ "value_filter": "dataset_id==\"8e47ed12-c658-4252-b126-381df8d52a3d\""
378
+ },
379
+ "status": "done",
380
+ "title": "BioNeMo > Download CELLxGENE dataset"
381
+ },
382
+ "dragHandle": ".bg-primary",
383
+ "height": 421.0,
384
+ "id": "BioNeMo > Download CELLxGENE dataset 1",
385
+ "position": {
386
+ "x": 414.9692093497506,
387
+ "y": -221.8644693915577
388
+ },
389
+ "type": "basic",
390
+ "width": 240.0
391
+ },
392
+ {
393
+ "data": {
394
+ "__execution_delay": 0.0,
395
+ "collapsed": null,
396
+ "display": null,
397
+ "error": null,
398
+ "meta": {
399
+ "inputs": {
400
+ "dataset_path": {
401
+ "name": "dataset_path",
402
+ "position": "left",
403
+ "type": {
404
+ "type": "<class 'str'>"
405
+ }
406
+ },
407
+ "model_path": {
408
+ "name": "model_path",
409
+ "position": "left",
410
+ "type": {
411
+ "type": "str | None"
412
+ }
413
+ }
414
+ },
415
+ "name": "BioNeMo > Infer",
416
+ "outputs": {
417
+ "output": {
418
+ "name": "output",
419
+ "position": "right",
420
+ "type": {
421
+ "type": "None"
422
+ }
423
+ }
424
+ },
425
+ "params": {
426
+ "results_path": {
427
+ "default": null,
428
+ "name": "results_path",
429
+ "type": {
430
+ "type": "<class 'str'>"
431
+ }
432
+ }
433
+ },
434
+ "position": {
435
+ "x": 1544.0,
436
+ "y": 356.0
437
+ },
438
+ "type": "basic"
439
+ },
440
+ "params": {
441
+ "results_path": "results_10m"
442
+ },
443
+ "status": "done",
444
+ "title": "BioNeMo > Infer"
445
+ },
446
+ "dragHandle": ".bg-primary",
447
+ "height": 200.0,
448
+ "id": "BioNeMo > Infer 1",
449
+ "position": {
450
+ "x": 1039.04712219626,
451
+ "y": -43.33924107744772
452
+ },
453
+ "type": "basic",
454
+ "width": 200.0
455
+ },
456
+ {
457
+ "data": {
458
+ "__execution_delay": 0.0,
459
+ "collapsed": null,
460
+ "display": null,
461
+ "error": null,
462
+ "meta": {
463
+ "inputs": {
464
+ "dataset_path": {
465
+ "name": "dataset_path",
466
+ "position": "left",
467
+ "type": {
468
+ "type": "<class 'str'>"
469
+ }
470
+ },
471
+ "model_path": {
472
+ "name": "model_path",
473
+ "position": "left",
474
+ "type": {
475
+ "type": "str | None"
476
+ }
477
+ }
478
+ },
479
+ "name": "BioNeMo > Infer",
480
+ "outputs": {
481
+ "output": {
482
+ "name": "output",
483
+ "position": "right",
484
+ "type": {
485
+ "type": "None"
486
+ }
487
+ }
488
+ },
489
+ "params": {
490
+ "results_path": {
491
+ "default": null,
492
+ "name": "results_path",
493
+ "type": {
494
+ "type": "<class 'str'>"
495
+ }
496
+ }
497
+ },
498
+ "position": {
499
+ "x": 1256.0,
500
+ "y": 1005.0
501
+ },
502
+ "type": "basic"
503
+ },
504
+ "params": {
505
+ "results_path": "results_100m"
506
+ },
507
+ "status": "done",
508
+ "title": "BioNeMo > Infer"
509
+ },
510
+ "dragHandle": ".bg-primary",
511
+ "height": 200.0,
512
+ "id": "BioNeMo > Infer 2",
513
+ "position": {
514
+ "x": 1030.3289199948294,
515
+ "y": 636.5914302771178
516
+ },
517
+ "type": "basic",
518
+ "width": 200.0
519
+ },
520
+ {
521
+ "data": {
522
+ "display": null,
523
+ "error": null,
524
+ "meta": {
525
+ "inputs": {
526
+ "results_path": {
527
+ "name": "results_path",
528
+ "position": "left",
529
+ "type": {
530
+ "type": "<class 'str'>"
531
+ }
532
+ }
533
+ },
534
+ "name": "BioNeMo > Load results",
535
+ "outputs": {
536
+ "output": {
537
+ "name": "output",
538
+ "position": "right",
539
+ "type": {
540
+ "type": "None"
541
+ }
542
+ }
543
+ },
544
+ "params": {},
545
+ "position": {
546
+ "x": 1506.0,
547
+ "y": 804.0
548
+ },
549
+ "type": "basic"
550
+ },
551
+ "params": {},
552
+ "status": "done",
553
+ "title": "BioNeMo > Load results"
554
+ },
555
+ "dragHandle": ".bg-primary",
556
+ "height": 200.0,
557
+ "id": "BioNeMo > Load results 1",
558
+ "position": {
559
+ "x": 1316.753212112243,
560
+ "y": 588.3511253627433
561
+ },
562
+ "type": "basic",
563
+ "width": 200.0
564
+ },
565
+ {
566
+ "data": {
567
+ "display": null,
568
+ "error": null,
569
+ "meta": {
570
+ "inputs": {
571
+ "data": {
572
+ "name": "data",
573
+ "position": "left",
574
+ "type": {
575
+ "type": "<class 'inspect._empty'>"
576
+ }
577
+ },
578
+ "labels": {
579
+ "name": "labels",
580
+ "position": "left",
581
+ "type": {
582
+ "type": "<class 'inspect._empty'>"
583
+ }
584
+ }
585
+ },
586
+ "name": "BioNeMo > Run benchmark",
587
+ "outputs": {
588
+ "output": {
589
+ "name": "output",
590
+ "position": "right",
591
+ "type": {
592
+ "type": "None"
593
+ }
594
+ }
595
+ },
596
+ "params": {
597
+ "use_pca": {
598
+ "default": false,
599
+ "name": "use_pca",
600
+ "type": {
601
+ "type": "<class 'bool'>"
602
+ }
603
+ }
604
+ },
605
+ "position": {
606
+ "x": 1698.0,
607
+ "y": 929.0
608
+ },
609
+ "type": "basic"
610
+ },
611
+ "params": {
612
+ "use_pca": false
613
+ },
614
+ "status": "done",
615
+ "title": "BioNeMo > Run benchmark"
616
+ },
617
+ "dragHandle": ".bg-primary",
618
+ "height": 254.0,
619
+ "id": "BioNeMo > Run benchmark 1",
620
+ "position": {
621
+ "x": 1717.5260843687468,
622
+ "y": 601.9085109739857
623
+ },
624
+ "type": "basic",
625
+ "width": 218.0
626
+ },
627
+ {
628
+ "data": {
629
+ "display": null,
630
+ "error": null,
631
+ "meta": {
632
+ "inputs": {
633
+ "results_path": {
634
+ "name": "results_path",
635
+ "position": "left",
636
+ "type": {
637
+ "type": "<class 'str'>"
638
+ }
639
+ }
640
+ },
641
+ "name": "BioNeMo > Load results",
642
+ "outputs": {
643
+ "output": {
644
+ "name": "output",
645
+ "position": "right",
646
+ "type": {
647
+ "type": "None"
648
+ }
649
+ }
650
+ },
651
+ "params": {},
652
+ "position": {
653
+ "x": 1314.0,
654
+ "y": 286.0
655
+ },
656
+ "type": "basic"
657
+ },
658
+ "params": {},
659
+ "status": "done",
660
+ "title": "BioNeMo > Load results"
661
+ },
662
+ "dragHandle": ".bg-primary",
663
+ "height": 200.0,
664
+ "id": "BioNeMo > Load results 2",
665
+ "position": {
666
+ "x": 1371.1643035406682,
667
+ "y": -38.628856650688306
668
+ },
669
+ "type": "basic",
670
+ "width": 200.0
671
+ },
672
+ {
673
+ "data": {
674
+ "display": null,
675
+ "error": null,
676
+ "meta": {
677
+ "inputs": {
678
+ "data": {
679
+ "name": "data",
680
+ "position": "left",
681
+ "type": {
682
+ "type": "<class 'inspect._empty'>"
683
+ }
684
+ },
685
+ "labels": {
686
+ "name": "labels",
687
+ "position": "left",
688
+ "type": {
689
+ "type": "<class 'inspect._empty'>"
690
+ }
691
+ }
692
+ },
693
+ "name": "BioNeMo > Run benchmark",
694
+ "outputs": {
695
+ "output": {
696
+ "name": "output",
697
+ "position": "right",
698
+ "type": {
699
+ "type": "None"
700
+ }
701
+ }
702
+ },
703
+ "params": {
704
+ "use_pca": {
705
+ "default": false,
706
+ "name": "use_pca",
707
+ "type": {
708
+ "type": "<class 'bool'>"
709
+ }
710
+ }
711
+ },
712
+ "position": {
713
+ "x": 1576.0,
714
+ "y": 395.0
715
+ },
716
+ "type": "basic"
717
+ },
718
+ "params": {
719
+ "use_pca": false
720
+ },
721
+ "status": "done",
722
+ "title": "BioNeMo > Run benchmark"
723
+ },
724
+ "dragHandle": ".bg-primary",
725
+ "height": 200.0,
726
+ "id": "BioNeMo > Run benchmark 2",
727
+ "position": {
728
+ "x": 1740.0,
729
+ "y": 120.0
730
+ },
731
+ "type": "basic",
732
+ "width": 200.0
733
+ },
734
+ {
735
+ "data": {
736
+ "display": {
737
+ "grid": {
738
+ "bottom": "10%",
739
+ "height": "70%",
740
+ "left": "20%",
741
+ "right": "10%",
742
+ "top": "10%",
743
+ "width": "70%"
744
+ },
745
+ "series": [
746
+ {
747
+ "data": [
748
+ 0.7020536292780548,
749
+ 0.843335333719808
750
+ ],
751
+ "itemStyle": {
752
+ "color": "#440154"
753
+ },
754
+ "name": "F1 Score",
755
+ "type": "bar"
756
+ },
757
+ {
758
+ "data": [
759
+ [
760
+ 0.6853106016807672,
761
+ 0.7187966568753424
762
+ ],
763
+ [
764
+ 0.8270726644727397,
765
+ 0.8595980029668762
766
+ ]
767
+ ],
768
+ "itemStyle": {
769
+ "color": "#1f77b4"
770
+ },
771
+ "name": "Error Bars",
772
+ "type": "errorbar"
773
+ }
774
+ ],
775
+ "title": {
776
+ "left": "center",
777
+ "text": "F1 Score Comparison",
778
+ "textStyle": {
779
+ "fontSize": 20,
780
+ "fontWeight": "bold"
781
+ }
782
+ },
783
+ "tooltip": {
784
+ "axisPointer": {
785
+ "type": "shadow"
786
+ },
787
+ "trigger": "axis"
788
+ },
789
+ "xAxis": {
790
+ "axisLabel": {
791
+ "align": "right",
792
+ "rotate": 45,
793
+ "textStyle": {
794
+ "fontSize": 14,
795
+ "fontWeight": "bold"
796
+ }
797
+ },
798
+ "data": [
799
+ "10M parameters",
800
+ "106M parameters"
801
+ ],
802
+ "type": "category"
803
+ },
804
+ "yAxis": {
805
+ "axisLabel": {
806
+ "textStyle": {
807
+ "fontSize": 14,
808
+ "fontWeight": "bold"
809
+ }
810
+ },
811
+ "interval": 0.1,
812
+ "max": 1,
813
+ "min": 0,
814
+ "name": "F1 Score",
815
+ "type": "value"
816
+ }
817
+ },
818
+ "error": null,
819
+ "meta": {
820
+ "inputs": {
821
+ "benchmark_output100m": {
822
+ "name": "benchmark_output100m",
823
+ "position": "left",
824
+ "type": {
825
+ "type": "<class 'inspect._empty'>"
826
+ }
827
+ },
828
+ "benchmark_output10m": {
829
+ "name": "benchmark_output10m",
830
+ "position": "left",
831
+ "type": {
832
+ "type": "<class 'inspect._empty'>"
833
+ }
834
+ }
835
+ },
836
+ "name": "BioNeMo > Plot f1 comparison",
837
+ "outputs": {},
838
+ "params": {},
839
+ "position": {
840
+ "x": 1716.0,
841
+ "y": 309.0
842
+ },
843
+ "type": "visualization"
844
+ },
845
+ "params": {},
846
+ "status": "done",
847
+ "title": "BioNeMo > Plot f1 comparison"
848
+ },
849
+ "dragHandle": ".bg-primary",
850
+ "height": 863.0,
851
+ "id": "BioNeMo > Plot f1 comparison 1",
852
+ "position": {
853
+ "x": 2091.687426186124,
854
+ "y": -368.096892874947
855
+ },
856
+ "type": "visualization",
857
+ "width": 1034.0
858
+ },
859
+ {
860
+ "data": {
861
+ "display": {
862
+ "grid": {
863
+ "bottom": "10%",
864
+ "height": "70%",
865
+ "left": "20%",
866
+ "right": "10%",
867
+ "top": "10%",
868
+ "width": "70%"
869
+ },
870
+ "series": [
871
+ {
872
+ "data": [
873
+ 0.8385031821273431,
874
+ 0.9053958718388249
875
+ ],
876
+ "itemStyle": {
877
+ "color": "#440154"
878
+ },
879
+ "name": "Accuracy",
880
+ "type": "bar"
881
+ },
882
+ {
883
+ "data": [
884
+ [
885
+ 0.8221974395834195,
886
+ 0.8548089246712667
887
+ ],
888
+ [
889
+ 0.8901141406971089,
890
+ 0.9206776029805408
891
+ ]
892
+ ],
893
+ "itemStyle": {
894
+ "color": "#1f77b4"
895
+ },
896
+ "name": "Error Bars",
897
+ "type": "errorbar"
898
+ }
899
+ ],
900
+ "title": {
901
+ "left": "center",
902
+ "text": "Accuracy Comparison",
903
+ "textStyle": {
904
+ "fontSize": 20,
905
+ "fontWeight": "bold"
906
+ }
907
+ },
908
+ "tooltip": {
909
+ "axisPointer": {
910
+ "type": "shadow"
911
+ },
912
+ "trigger": "axis"
913
+ },
914
+ "xAxis": {
915
+ "axisLabel": {
916
+ "align": "right",
917
+ "rotate": 45,
918
+ "textStyle": {
919
+ "fontSize": 14,
920
+ "fontWeight": "bold"
921
+ }
922
+ },
923
+ "data": [
924
+ "10M parameters",
925
+ "106M parameters"
926
+ ],
927
+ "type": "category"
928
+ },
929
+ "yAxis": {
930
+ "axisLabel": {
931
+ "textStyle": {
932
+ "fontSize": 14,
933
+ "fontWeight": "bold"
934
+ }
935
+ },
936
+ "interval": 0.1,
937
+ "max": 1,
938
+ "min": 0,
939
+ "name": "Accuracy",
940
+ "type": "value"
941
+ }
942
+ },
943
+ "error": null,
944
+ "meta": {
945
+ "inputs": {
946
+ "benchmark_output100m": {
947
+ "name": "benchmark_output100m",
948
+ "position": "left",
949
+ "type": {
950
+ "type": "<class 'inspect._empty'>"
951
+ }
952
+ },
953
+ "benchmark_output10m": {
954
+ "name": "benchmark_output10m",
955
+ "position": "left",
956
+ "type": {
957
+ "type": "<class 'inspect._empty'>"
958
+ }
959
+ }
960
+ },
961
+ "name": "BioNeMo > Plot accuracy comparison",
962
+ "outputs": {},
963
+ "params": {},
964
+ "position": {
965
+ "x": 1574.0,
966
+ "y": 720.0
967
+ },
968
+ "type": "visualization"
969
+ },
970
+ "params": {},
971
+ "status": "done",
972
+ "title": "BioNeMo > Plot accuracy comparison"
973
+ },
974
+ "dragHandle": ".bg-primary",
975
+ "height": 200.0,
976
+ "id": "BioNeMo > Plot accuracy comparison 1",
977
+ "position": {
978
+ "x": 2160.0,
979
+ "y": 915.0
980
+ },
981
+ "type": "visualization",
982
+ "width": 200.0
983
+ }
984
+ ]
985
+ }
lynxkite-app/src/lynxkite_app/__main__.py CHANGED
@@ -6,7 +6,13 @@ import os
6
  def main():
7
  port = int(os.environ.get("PORT", "8000"))
8
  reload = bool(os.environ.get("LYNXKITE_RELOAD", ""))
9
- uvicorn.run("lynxkite_app.main:app", host="0.0.0.0", port=port, reload=reload)
 
 
 
 
 
 
10
 
11
 
12
  if __name__ == "__main__":
 
6
  def main():
7
  port = int(os.environ.get("PORT", "8000"))
8
  reload = bool(os.environ.get("LYNXKITE_RELOAD", ""))
9
+ uvicorn.run(
10
+ "lynxkite_app.main:app",
11
+ host="0.0.0.0",
12
+ port=port,
13
+ reload=reload,
14
+ loop="asyncio",
15
+ )
16
 
17
 
18
  if __name__ == "__main__":
lynxkite-app/web/src/workspace/nodes/NodeWithVisualization.tsx CHANGED
@@ -10,8 +10,8 @@ const NodeWithVisualization = (props: any) => {
10
  if (!opts || !chartsRef.current) return;
11
  chartsInstanceRef.current = echarts.init(chartsRef.current, null, {
12
  renderer: "canvas",
13
- width: 250,
14
- height: 250,
15
  });
16
  chartsInstanceRef.current.setOption(opts);
17
  const onResize = () => chartsInstanceRef.current?.resize();
 
10
  if (!opts || !chartsRef.current) return;
11
  chartsInstanceRef.current = echarts.init(chartsRef.current, null, {
12
  renderer: "canvas",
13
+ width: 800,
14
+ height: 800,
15
  });
16
  chartsInstanceRef.current.setOption(opts);
17
  const onResize = () => chartsInstanceRef.current?.resize();
lynxkite-graph-analytics/.dockerignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ lynxkite_data
2
+ lynxkite_crdt_data
3
+ .venv
lynxkite-graph-analytics/Dockerfile.bionemo ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvcr.io/nvidia/clara/bionemo-framework:nightly
2
+
3
+ ENV LYNXKITE_BIONEMO_INSTALLED=true
4
+
5
+ WORKDIR /app
6
+
7
+ # Download and install nvm
8
+ RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.2/install.sh | bash
9
+ RUN echo node > .nvmrc
10
+ RUN source /root/.nvm/nvm.sh --install
11
+
12
+ COPY . /app
13
+
14
+ RUN uv pip install -e lynxkite-core/[dev] -e lynxkite-app/[dev] -e lynxkite-graph-analytics/[dev] -e lynxkite-bio -e lynxkite-pillow-example/
15
+
16
+ # bionemo cellxgene_census needs this version of numpy
17
+ RUN uv pip install numpy==1.26.4
lynxkite-graph-analytics/README.md CHANGED
@@ -11,3 +11,44 @@ pip install lynxkite lynxkite-graph-analytics
11
  ```
12
 
13
  Run LynxKite with `NX_CUGRAPH_AUTOCONFIG=True` to enable GPU-accelerated graph data science operations.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ```
12
 
13
  Run LynxKite with `NX_CUGRAPH_AUTOCONFIG=True` to enable GPU-accelerated graph data science operations.
14
+
15
+
16
+ ## BioNemo
17
+
18
+ If you want to use BioNemo operations, then you will have to use the provided Docker image, or
19
+ install BioNemo manually in your environment.
20
+ Take into account that BioNemo needs a GPU to work, you can find the specific requirements
21
+ [here](https://docs.nvidia.com/bionemo-framework/latest/user-guide/getting-started/pre-reqs/).
22
+
23
+ The import of BioNemo operations is gate keeped behing the `LYNXKITE_BIONEMO_INSTALLED` variable.
24
+ BioNemo operations will only be imported if this environment variable is set to true.
25
+
26
+ To build the image:
27
+
28
+ ```bash
29
+ # in lynxkite-graph-analytics folder
30
+ $ docker build -f Dockerfile.bionemo -t lynxkite-bionemo ..
31
+ ```
32
+
33
+ Take into account that this Dockerfile does not include the lynxkite-lynxscribe package. If you want to include it you will
34
+ need to set up git credentials inside the container.
35
+
36
+ Then, inside the image you can start LynxKite as usual.
37
+
38
+ If you want to do some development, then it is recommend to use the [devcontainers](https://code.visualstudio.com/docs/devcontainers/containers)
39
+ vscode extension. The following is a basic configuration to get started:
40
+
41
+ ```json
42
+ // .devcontainer/devcontainer.json
43
+ {
44
+ "name": "Existing Dockerfile",
45
+ "runArgs": [
46
+ "--gpus=all",
47
+ "--shm-size=4g"
48
+ ],
49
+ "build": {
50
+ "context": "..",
51
+ "dockerfile": "../lynxkite-graph-analytics/Dockerfile.bionemo"
52
+ }
53
+ }
54
+ ```
lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py CHANGED
@@ -14,3 +14,6 @@ from .core import * # noqa (easier access for core classes)
14
  from . import lynxkite_ops # noqa (imported to trigger registration)
15
  from . import networkx_ops # noqa (imported to trigger registration)
16
  from . import pytorch_model_ops # noqa (imported to trigger registration)
 
 
 
 
14
  from . import lynxkite_ops # noqa (imported to trigger registration)
15
  from . import networkx_ops # noqa (imported to trigger registration)
16
  from . import pytorch_model_ops # noqa (imported to trigger registration)
17
+
18
+ if os.environ.get("LYNXKITE_BIONEMO_INSTALLED", "").strip().lower() == "true":
19
+ from . import bionemo_ops # noqa (imported to trigger registration)
lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """BioNeMo related operations
2
+
3
+ The intention is to showcase how BioNeMo can be integrated with LynxKite. This should be
4
+ considered as a reference implementation and not a production ready code.
5
+ The operations are quite specific for this example notebook:
6
+ https://github.com/NVIDIA/bionemo-framework/blob/main/docs/docs/user-guide/examples/bionemo-geneformer/geneformer-celltype-classification.ipynb
7
+ """
8
+
9
+ from lynxkite.core import ops
10
+ import requests
11
+ import tarfile
12
+ import os
13
+ from collections import Counter
14
+ from . import core
15
+ import joblib
16
+ import numpy as np
17
+ import torch
18
+ from pathlib import Path
19
+ import random
20
+ from contextlib import contextmanager
21
+ import cellxgene_census # TODO: This needs numpy < 2
22
+ import tempfile
23
+ from sklearn.ensemble import RandomForestClassifier
24
+ from sklearn.pipeline import Pipeline
25
+ from sklearn.model_selection import StratifiedKFold, cross_validate
26
+ from sklearn.metrics import (
27
+ make_scorer,
28
+ accuracy_score,
29
+ precision_score,
30
+ recall_score,
31
+ f1_score,
32
+ roc_auc_score,
33
+ confusion_matrix,
34
+ )
35
+ from sklearn.decomposition import PCA
36
+ from sklearn.model_selection import cross_val_predict
37
+ from sklearn.preprocessing import LabelEncoder
38
+ from bionemo.scdl.io.single_cell_collection import SingleCellCollection
39
+
40
+ import scanpy
41
+
42
+
43
+ mem = joblib.Memory("../joblib-cache")
44
+ op = ops.op_registration(core.ENV)
45
+ DATA_PATH = Path("/workspace")
46
+
47
+
48
+ @contextmanager
49
+ def random_seed(seed: int):
50
+ state = random.getstate()
51
+ random.seed(seed)
52
+ try:
53
+ yield
54
+ finally:
55
+ # Go back to previous state
56
+ random.setstate(state)
57
+
58
+
59
+ @op("BioNeMo > Download CELLxGENE dataset")
60
+ @mem.cache()
61
+ def download_cellxgene_dataset(
62
+ *,
63
+ save_path: str,
64
+ census_version: str = "2023-12-15",
65
+ organism: str = "Homo sapiens",
66
+ value_filter='dataset_id=="8e47ed12-c658-4252-b126-381df8d52a3d"',
67
+ max_workers: int = 1,
68
+ use_mp: bool = False,
69
+ ) -> None:
70
+ """Downloads a CELLxGENE dataset"""
71
+
72
+ with cellxgene_census.open_soma(census_version=census_version) as census:
73
+ adata = cellxgene_census.get_anndata(
74
+ census,
75
+ organism,
76
+ obs_value_filter=value_filter,
77
+ )
78
+ with random_seed(32):
79
+ indices = list(range(len(adata)))
80
+ random.shuffle(indices)
81
+ micro_batch_size: int = 32
82
+ num_steps: int = 256
83
+ selection = sorted(indices[: micro_batch_size * num_steps])
84
+ # NOTE: there's a current constraint that predict_step needs to be a function of micro-batch-size.
85
+ # this is something we are working on fixing. A quick hack is to set micro-batch-size=1, but this is
86
+ # slow. In this notebook we are going to use mbs=32 and subsample the anndata.
87
+ adata = adata[selection].copy() # so it's not a view
88
+ h5ad_outfile = DATA_PATH / Path("hs-celltype-bench.h5ad")
89
+ adata.write_h5ad(h5ad_outfile)
90
+ with tempfile.TemporaryDirectory() as temp_dir:
91
+ coll = SingleCellCollection(temp_dir)
92
+ coll.load_h5ad_multi(
93
+ h5ad_outfile.parent, max_workers=max_workers, use_processes=use_mp
94
+ )
95
+ coll.flatten(DATA_PATH / save_path, destroy_on_copy=True)
96
+ return DATA_PATH / save_path
97
+
98
+
99
+ @op("BioNeMo > Import H5AD file")
100
+ def import_h5ad(*, file_path: str):
101
+ return scanpy.read_h5ad(DATA_PATH / Path(file_path))
102
+
103
+
104
+ @op("BioNeMo > Download model")
105
+ @mem.cache(verbose=1)
106
+ def download_model(*, model_name: str) -> str:
107
+ """Downloads a model."""
108
+ model_download_parameters = {
109
+ "geneformer_100m": {
110
+ "name": "geneformer_100m",
111
+ "version": "2.0",
112
+ "path": "geneformer_106M_240530_nemo2",
113
+ },
114
+ "geneformer_10m": {
115
+ "name": "geneformer_10m",
116
+ "version": "2.0",
117
+ "path": "geneformer_10M_240530_nemo2",
118
+ },
119
+ "geneformer_10m2": {
120
+ "name": "geneformer_10m",
121
+ "version": "2.1",
122
+ "path": "geneformer_10M_241113_nemo2",
123
+ },
124
+ }
125
+
126
+ # Define the URL and output file
127
+ url_template = "https://api.ngc.nvidia.com/v2/models/org/nvidia/team/clara/{name}/{version}/files?redirect=true&path={path}.tar.gz"
128
+ url = url_template.format(**model_download_parameters[model_name])
129
+ model_filename = f"{DATA_PATH}/{model_download_parameters[model_name]['path']}"
130
+ output_file = f"{model_filename}.tar.gz"
131
+
132
+ # Send the request
133
+ response = requests.get(url, allow_redirects=True, stream=True)
134
+ response.raise_for_status() # Raise an error for bad responses (4xx and 5xx)
135
+
136
+ # Save the file to disk
137
+ with open(f"{output_file}", "wb") as file:
138
+ for chunk in response.iter_content(chunk_size=8192):
139
+ file.write(chunk)
140
+
141
+ # Extract the tar.gz file
142
+ os.makedirs(model_filename, exist_ok=True)
143
+ with tarfile.open(output_file, "r:gz") as tar:
144
+ tar.extractall(path=model_filename)
145
+
146
+ return model_filename
147
+
148
+
149
+ @op("BioNeMo > Infer")
150
+ @mem.cache(verbose=1)
151
+ def infer(
152
+ dataset_path: str, model_path: str | None = None, *, results_path: str
153
+ ) -> str:
154
+ """Infer on a dataset."""
155
+ # This import is slow, so we only import it when we need it.
156
+ from bionemo.geneformer.scripts.infer_geneformer import infer_model
157
+
158
+ infer_model(
159
+ data_path=dataset_path,
160
+ checkpoint_path=model_path,
161
+ results_path=DATA_PATH / results_path,
162
+ include_hiddens=False,
163
+ micro_batch_size=32,
164
+ include_embeddings=True,
165
+ include_logits=False,
166
+ seq_length=2048,
167
+ precision="bf16-mixed",
168
+ devices=1,
169
+ num_nodes=1,
170
+ num_dataset_workers=10,
171
+ )
172
+ return DATA_PATH / results_path
173
+
174
+
175
+ @op("BioNeMo > Load results")
176
+ def load_results(results_path: str):
177
+ embeddings = (
178
+ torch.load(f"{results_path}/predictions__rank_0.pt")["embeddings"]
179
+ .float()
180
+ .cpu()
181
+ .numpy()
182
+ )
183
+ return embeddings
184
+
185
+
186
+ @op("BioNeMo > Get labels")
187
+ def get_labels(adata):
188
+ infer_metadata = adata.obs
189
+ labels = infer_metadata["cell_type"].values
190
+ label_encoder = LabelEncoder()
191
+ integer_labels = label_encoder.fit_transform(labels)
192
+ label_encoder.integer_labels = integer_labels
193
+ return label_encoder
194
+
195
+
196
+ @op("BioNeMo > Plot labels", view="visualization")
197
+ def plot_labels(adata):
198
+ infer_metadata = adata.obs
199
+ labels = infer_metadata["cell_type"].values
200
+ label_counts = Counter(labels)
201
+ labels = list(label_counts.keys())
202
+ values = list(label_counts.values())
203
+
204
+ options = {
205
+ "title": {
206
+ "text": "Cell type counts for classification dataset",
207
+ "left": "center",
208
+ },
209
+ "tooltip": {"trigger": "axis", "axisPointer": {"type": "shadow"}},
210
+ "xAxis": {
211
+ "type": "category",
212
+ "data": labels,
213
+ "axisLabel": {"rotate": 45, "align": "right"},
214
+ },
215
+ "yAxis": {"type": "value"},
216
+ "series": [
217
+ {
218
+ "name": "Count",
219
+ "type": "bar",
220
+ "data": values,
221
+ "itemStyle": {"color": "#4285F4"},
222
+ }
223
+ ],
224
+ }
225
+ return options
226
+
227
+
228
+ @op("BioNeMo > Run benchmark")
229
+ @mem.cache(verbose=1)
230
+ def run_benchmark(data, labels, *, use_pca: bool = False):
231
+ """
232
+ data - contains the single cell expression (or whatever feature) in each row.
233
+ labels - contains the string label for each cell
234
+
235
+ data_shape (R, C)
236
+ labels_shape (R,)
237
+ """
238
+ np.random.seed(1337)
239
+ # Define the target dimension 'n_components'
240
+ n_components = 10 # for example, adjust based on your specific needs
241
+
242
+ # Create a pipeline that includes Gaussian random projection and RandomForestClassifier
243
+ if use_pca:
244
+ pipeline = Pipeline(
245
+ [
246
+ ("projection", PCA(n_components=n_components)),
247
+ ("classifier", RandomForestClassifier(class_weight="balanced")),
248
+ ]
249
+ )
250
+ else:
251
+ pipeline = Pipeline(
252
+ [("classifier", RandomForestClassifier(class_weight="balanced"))]
253
+ )
254
+
255
+ # Set up StratifiedKFold to ensure each fold reflects the overall distribution of labels
256
+ cv = StratifiedKFold(n_splits=5)
257
+
258
+ # Define the scoring functions
259
+ scoring = {
260
+ "accuracy": make_scorer(accuracy_score),
261
+ "precision": make_scorer(
262
+ precision_score, average="macro"
263
+ ), # 'macro' averages over classes
264
+ "recall": make_scorer(recall_score, average="macro"),
265
+ "f1_score": make_scorer(f1_score, average="macro"),
266
+ # 'roc_auc' requires probability or decision function; hence use multi_class if applicable
267
+ "roc_auc": make_scorer(roc_auc_score, multi_class="ovr"),
268
+ }
269
+ labels = labels.integer_labels
270
+ # Perform stratified cross-validation with multiple metrics using the pipeline
271
+ results = cross_validate(
272
+ pipeline, data, labels, cv=cv, scoring=scoring, return_train_score=False
273
+ )
274
+
275
+ # Print the cross-validation results
276
+ print("Cross-validation metrics:")
277
+ results_out = {}
278
+ for metric, scores in results.items():
279
+ if metric.startswith("test_"):
280
+ results_out[metric] = (scores.mean(), scores.std())
281
+ print(f"{metric[5:]}: {scores.mean():.3f} (+/- {scores.std():.3f})")
282
+
283
+ predictions = cross_val_predict(pipeline, data, labels, cv=cv)
284
+
285
+ # v Return confusion matrix and metrics.
286
+ conf_matrix = confusion_matrix(labels, predictions)
287
+
288
+ return results_out, conf_matrix
289
+
290
+
291
+ @op("BioNeMo > Plot confusion matrix", view="visualization")
292
+ @mem.cache(verbose=1)
293
+ def plot_confusion_matrix(benchmark_output, labels):
294
+ cm = benchmark_output[1]
295
+ labels = labels.classes_
296
+ str_labels = [str(label) for label in labels]
297
+ norm_cm = [[float(val / sum(row)) if sum(row) else 0 for val in row] for row in cm]
298
+ # heatmap has the 0,0 at the bottom left corner
299
+ num_rows = len(str_labels)
300
+ heatmap_data = [
301
+ [j, num_rows - i - 1, norm_cm[i][j]]
302
+ for i in range(len(labels))
303
+ for j in range(len(labels))
304
+ ]
305
+
306
+ options = {
307
+ "title": {"text": "Confusion Matrix", "left": "center"},
308
+ "tooltip": {"position": "top"},
309
+ "xAxis": {
310
+ "type": "category",
311
+ "data": str_labels,
312
+ "splitArea": {"show": True},
313
+ "axisLabel": {"rotate": 70, "align": "right"},
314
+ },
315
+ "yAxis": {
316
+ "type": "category",
317
+ "data": list(reversed(str_labels)),
318
+ "splitArea": {"show": True},
319
+ },
320
+ "grid": {
321
+ "height": "70%",
322
+ "width": "70%",
323
+ "left": "20%",
324
+ "right": "10%",
325
+ "bottom": "10%",
326
+ "top": "10%",
327
+ },
328
+ "visualMap": {
329
+ "min": 0,
330
+ "max": 1,
331
+ "calculable": True,
332
+ "orient": "vertical",
333
+ "right": 10,
334
+ "top": "center",
335
+ "inRange": {
336
+ "color": ["#E0F7FA", "#81D4FA", "#29B6F6", "#0288D1", "#01579B"]
337
+ },
338
+ },
339
+ "series": [
340
+ {
341
+ "name": "Confusion matrix",
342
+ "type": "heatmap",
343
+ "data": heatmap_data,
344
+ "emphasis": {"itemStyle": {"borderColor": "#333", "borderWidth": 1}},
345
+ "itemStyle": {"borderColor": "#D3D3D3", "borderWidth": 2},
346
+ }
347
+ ],
348
+ }
349
+ return options
350
+
351
+
352
+ @op("BioNeMo > Plot accuracy comparison", view="visualization")
353
+ def accuracy_comparison(benchmark_output10m, benchmark_output100m):
354
+ results_10m = benchmark_output10m[0]
355
+ results_106M = benchmark_output100m[0]
356
+ data = {
357
+ "model": ["10M parameters", "106M parameters"],
358
+ "accuracy_mean": [
359
+ results_10m["test_accuracy"][0],
360
+ results_106M["test_accuracy"][0],
361
+ ],
362
+ "accuracy_std": [
363
+ results_10m["test_accuracy"][1],
364
+ results_106M["test_accuracy"][1],
365
+ ],
366
+ }
367
+
368
+ labels = data["model"] # X-axis labels
369
+ values = data["accuracy_mean"] # Y-axis values
370
+ error_bars = data["accuracy_std"] # Standard deviation for error bars
371
+
372
+ options = {
373
+ "title": {
374
+ "text": "Accuracy Comparison",
375
+ "left": "center",
376
+ "textStyle": {
377
+ "fontSize": 20, # Bigger font for title
378
+ "fontWeight": "bold", # Make title bold
379
+ },
380
+ },
381
+ "grid": {
382
+ "height": "70%",
383
+ "width": "70%",
384
+ "left": "20%",
385
+ "right": "10%",
386
+ "bottom": "10%",
387
+ "top": "10%",
388
+ },
389
+ "tooltip": {"trigger": "axis", "axisPointer": {"type": "shadow"}},
390
+ "xAxis": {
391
+ "type": "category",
392
+ "data": labels,
393
+ "axisLabel": {
394
+ "rotate": 45, # Rotate labels for better readability
395
+ "align": "right",
396
+ "textStyle": {
397
+ "fontSize": 14, # Bigger font for X-axis labels
398
+ "fontWeight": "bold",
399
+ },
400
+ },
401
+ },
402
+ "yAxis": {
403
+ "type": "value",
404
+ "name": "Accuracy",
405
+ "min": 0,
406
+ "max": 1,
407
+ "interval": 0.1, # Matches np.arange(0, 1.05, 0.05)
408
+ "axisLabel": {
409
+ "textStyle": {
410
+ "fontSize": 14, # Bigger font for X-axis labels
411
+ "fontWeight": "bold",
412
+ }
413
+ },
414
+ },
415
+ "series": [
416
+ {
417
+ "name": "Accuracy",
418
+ "type": "bar",
419
+ "data": values,
420
+ "itemStyle": {
421
+ "color": "#440154" # Viridis color palette (dark purple)
422
+ },
423
+ },
424
+ {
425
+ "name": "Error Bars",
426
+ "type": "errorbar",
427
+ "data": [
428
+ [val - err, val + err] for val, err in zip(values, error_bars)
429
+ ],
430
+ "itemStyle": {"color": "#1f77b4"},
431
+ },
432
+ ],
433
+ }
434
+ return options
435
+
436
+
437
+ @op("BioNeMo > Plot f1 comparison", view="visualization")
438
+ def f1_comparison(benchmark_output10m, benchmark_output100m):
439
+ results_10m = benchmark_output10m[0]
440
+ results_106M = benchmark_output100m[0]
441
+ data = {
442
+ "model": ["10M parameters", "106M parameters"],
443
+ "f1_score_mean": [
444
+ results_10m["test_f1_score"][0],
445
+ results_106M["test_f1_score"][0],
446
+ ],
447
+ "f1_score_std": [
448
+ results_10m["test_f1_score"][1],
449
+ results_106M["test_f1_score"][1],
450
+ ],
451
+ }
452
+
453
+ labels = data["model"] # X-axis labels
454
+ values = data["f1_score_mean"] # Y-axis values
455
+ error_bars = data["f1_score_std"] # Standard deviation for error bars
456
+
457
+ options = {
458
+ "title": {
459
+ "text": "F1 Score Comparison",
460
+ "left": "center",
461
+ "textStyle": {
462
+ "fontSize": 20, # Bigger font for title
463
+ "fontWeight": "bold", # Make title bold
464
+ },
465
+ },
466
+ "grid": {
467
+ "height": "70%",
468
+ "width": "70%",
469
+ "left": "20%",
470
+ "right": "10%",
471
+ "bottom": "10%",
472
+ "top": "10%",
473
+ },
474
+ "tooltip": {"trigger": "axis", "axisPointer": {"type": "shadow"}},
475
+ "xAxis": {
476
+ "type": "category",
477
+ "data": labels,
478
+ "axisLabel": {
479
+ "rotate": 45, # Rotate labels for better readability
480
+ "align": "right",
481
+ "textStyle": {
482
+ "fontSize": 14, # Bigger font for X-axis labels
483
+ "fontWeight": "bold",
484
+ },
485
+ },
486
+ },
487
+ "yAxis": {
488
+ "type": "value",
489
+ "name": "F1 Score",
490
+ "min": 0,
491
+ "max": 1,
492
+ "interval": 0.1, # Matches np.arange(0, 1.05, 0.05),
493
+ "axisLabel": {
494
+ "textStyle": {
495
+ "fontSize": 14, # Bigger font for X-axis labels
496
+ "fontWeight": "bold",
497
+ }
498
+ },
499
+ },
500
+ "series": [
501
+ {
502
+ "name": "F1 Score",
503
+ "type": "bar",
504
+ "data": values,
505
+ "itemStyle": {
506
+ "color": "#440154" # Viridis color palette (dark purple)
507
+ },
508
+ },
509
+ {
510
+ "name": "Error Bars",
511
+ "type": "errorbar",
512
+ "data": [
513
+ [val - err, val + err] for val, err in zip(values, error_bars)
514
+ ],
515
+ "itemStyle": {"color": "#1f77b4"},
516
+ },
517
+ ],
518
+ }
519
+ return options