Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

JMLizano JMLizano

darabos commited on Mar 6

Commit

7baf2a1

unverified ·

1 Parent(s): 10c9dc3

BioNemo demo (#84)

Browse files

* Add BioNeMo integration, single demo for now
---------

Co-authored-by: JMLizano <[email protected]>
Co-authored-by: Daniel Darabos <[email protected]>

Files changed (8) hide show

examples/BioNemo demo +985 -0
lynxkite-app/src/lynxkite_app/__main__.py +7 -1
lynxkite-app/web/src/workspace/nodes/NodeWithVisualization.tsx +2 -2
lynxkite-graph-analytics/.dockerignore +3 -0
lynxkite-graph-analytics/Dockerfile.bionemo +17 -0
lynxkite-graph-analytics/README.md +41 -0
lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py +3 -0
lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py +519 -0

examples/BioNemo demo ADDED Viewed

	@@ -0,0 +1,985 @@

+{
+  "edges": [
+    {
+      "id": "BioNeMo > Import H5AD file 1 BioNeMo > Get labels 1",
+      "source": "BioNeMo > Import H5AD file 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Get labels 1",
+      "targetHandle": "adata"
+    },
+    {
+      "id": "BioNeMo > Download CELLxGENE dataset 1 BioNeMo > Infer 1",
+      "source": "BioNeMo > Download CELLxGENE dataset 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Infer 1",
+      "targetHandle": "dataset_path"
+    },
+    {
+      "id": "BioNeMo > Download model 2 BioNeMo > Infer 1",
+      "source": "BioNeMo > Download model 2",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Infer 1",
+      "targetHandle": "model_path"
+    },
+    {
+      "id": "BioNeMo > Download CELLxGENE dataset 1 BioNeMo > Infer 2",
+      "source": "BioNeMo > Download CELLxGENE dataset 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Infer 2",
+      "targetHandle": "dataset_path"
+    },
+    {
+      "id": "BioNeMo > Download model 1 BioNeMo > Infer 2",
+      "source": "BioNeMo > Download model 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Infer 2",
+      "targetHandle": "model_path"
+    },
+    {
+      "id": "BioNeMo > Infer 2 BioNeMo > Load results 1",
+      "source": "BioNeMo > Infer 2",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Load results 1",
+      "targetHandle": "results_path"
+    },
+    {
+      "id": "BioNeMo > Load results 1 BioNeMo > Run benchmark 1",
+      "source": "BioNeMo > Load results 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Run benchmark 1",
+      "targetHandle": "data"
+    },
+    {
+      "id": "BioNeMo > Get labels 1 BioNeMo > Run benchmark 1",
+      "source": "BioNeMo > Get labels 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Run benchmark 1",
+      "targetHandle": "labels"
+    },
+    {
+      "id": "BioNeMo > Infer 1 BioNeMo > Load results 2",
+      "source": "BioNeMo > Infer 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Load results 2",
+      "targetHandle": "results_path"
+    },
+    {
+      "id": "BioNeMo > Load results 2 BioNeMo > Run benchmark 2",
+      "source": "BioNeMo > Load results 2",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Run benchmark 2",
+      "targetHandle": "data"
+    },
+    {
+      "id": "BioNeMo > Get labels 1 BioNeMo > Run benchmark 2",
+      "source": "BioNeMo > Get labels 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Run benchmark 2",
+      "targetHandle": "labels"
+    },
+    {
+      "id": "BioNeMo > Run benchmark 2 BioNeMo > Plot f1 comparison 1",
+      "source": "BioNeMo > Run benchmark 2",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Plot f1 comparison 1",
+      "targetHandle": "benchmark_output10m"
+    },
+    {
+      "id": "BioNeMo > Run benchmark 1 BioNeMo > Plot f1 comparison 1",
+      "source": "BioNeMo > Run benchmark 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Plot f1 comparison 1",
+      "targetHandle": "benchmark_output100m"
+    },
+    {
+      "id": "BioNeMo > Run benchmark 2 BioNeMo > Plot accuracy comparison 1",
+      "source": "BioNeMo > Run benchmark 2",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Plot accuracy comparison 1",
+      "targetHandle": "benchmark_output10m"
+    },
+    {
+      "id": "BioNeMo > Run benchmark 1 BioNeMo > Plot accuracy comparison 1",
+      "source": "BioNeMo > Run benchmark 1",
+      "sourceHandle": "output",
+      "target": "BioNeMo > Plot accuracy comparison 1",
+      "targetHandle": "benchmark_output100m"
+    }
+  ],
+  "env": "LynxKite Graph Analytics",
+  "nodes": [
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {},
+          "name": "BioNeMo > Import H5AD file",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "file_path": {
+              "default": null,
+              "name": "file_path",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 504.0,
+            "y": 355.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "file_path": "hs-celltype-bench.h5ad"
+        },
+        "status": "done",
+        "title": "BioNeMo > Import H5AD file"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 347.0,
+      "id": "BioNeMo > Import H5AD file 1",
+      "position": {
+        "x": 975.3920617976814,
+        "y": 246.19491328410817
+      },
+      "type": "basic",
+      "width": 295.0
+    },
+    {
+      "data": {
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {
+            "adata": {
+              "name": "adata",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "BioNeMo > Get labels",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {},
+          "position": {
+            "x": 389.0,
+            "y": 633.0
+          },
+          "type": "basic"
+        },
+        "params": {},
+        "status": "done",
+        "title": "BioNeMo > Get labels"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "BioNeMo > Get labels 1",
+      "position": {
+        "x": 1330.5731290863628,
+        "y": 322.77821619446473
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {},
+          "name": "BioNeMo > Download model",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "model_name": {
+              "default": null,
+              "name": "model_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1026.0,
+            "y": 839.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "model_name": "geneformer_100m"
+        },
+        "status": "done",
+        "title": "BioNeMo > Download model"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "BioNeMo > Download model 1",
+      "position": {
+        "x": 551.1714527812203,
+        "y": 629.2951247275757
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {},
+          "name": "BioNeMo > Download model",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "model_name": {
+              "default": null,
+              "name": "model_name",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 939.0,
+            "y": 523.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "model_name": "geneformer_10m"
+        },
+        "status": "done",
+        "title": "BioNeMo > Download model"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "BioNeMo > Download model 2",
+      "position": {
+        "x": 556.2267014450949,
+        "y": 313.55564323889297
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {},
+          "name": "BioNeMo > Download CELLxGENE dataset",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "census_version": {
+              "default": "2023-12-15",
+              "name": "census_version",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "max_workers": {
+              "default": 1.0,
+              "name": "max_workers",
+              "type": {
+                "type": "<class 'int'>"
+              }
+            },
+            "organism": {
+              "default": "Homo sapiens",
+              "name": "organism",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "save_path": {
+              "default": null,
+              "name": "save_path",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "use_mp": {
+              "default": false,
+              "name": "use_mp",
+              "type": {
+                "type": "<class 'bool'>"
+              }
+            },
+            "value_filter": {
+              "default": "dataset_id==\"8e47ed12-c658-4252-b126-381df8d52a3d\"",
+              "name": "value_filter",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1020.0,
+            "y": 262.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "census_version": "2023-12-15",
+          "max_workers": 1.0,
+          "organism": "Homo sapiens",
+          "save_path": "celltype-bench-dataset",
+          "use_mp": false,
+          "value_filter": "dataset_id==\"8e47ed12-c658-4252-b126-381df8d52a3d\""
+        },
+        "status": "done",
+        "title": "BioNeMo > Download CELLxGENE dataset"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 421.0,
+      "id": "BioNeMo > Download CELLxGENE dataset 1",
+      "position": {
+        "x": 414.9692093497506,
+        "y": -221.8644693915577
+      },
+      "type": "basic",
+      "width": 240.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {
+            "dataset_path": {
+              "name": "dataset_path",
+              "position": "left",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "model_path": {
+              "name": "model_path",
+              "position": "left",
+              "type": {
+                "type": "str | None"
+              }
+            }
+          },
+          "name": "BioNeMo > Infer",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "results_path": {
+              "default": null,
+              "name": "results_path",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1544.0,
+            "y": 356.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "results_path": "results_10m"
+        },
+        "status": "done",
+        "title": "BioNeMo > Infer"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "BioNeMo > Infer 1",
+      "position": {
+        "x": 1039.04712219626,
+        "y": -43.33924107744772
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "__execution_delay": 0.0,
+        "collapsed": null,
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {
+            "dataset_path": {
+              "name": "dataset_path",
+              "position": "left",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            },
+            "model_path": {
+              "name": "model_path",
+              "position": "left",
+              "type": {
+                "type": "str | None"
+              }
+            }
+          },
+          "name": "BioNeMo > Infer",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "results_path": {
+              "default": null,
+              "name": "results_path",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1256.0,
+            "y": 1005.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "results_path": "results_100m"
+        },
+        "status": "done",
+        "title": "BioNeMo > Infer"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "BioNeMo > Infer 2",
+      "position": {
+        "x": 1030.3289199948294,
+        "y": 636.5914302771178
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {
+            "results_path": {
+              "name": "results_path",
+              "position": "left",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "name": "BioNeMo > Load results",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {},
+          "position": {
+            "x": 1506.0,
+            "y": 804.0
+          },
+          "type": "basic"
+        },
+        "params": {},
+        "status": "done",
+        "title": "BioNeMo > Load results"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "BioNeMo > Load results 1",
+      "position": {
+        "x": 1316.753212112243,
+        "y": 588.3511253627433
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {
+            "data": {
+              "name": "data",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "labels": {
+              "name": "labels",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "BioNeMo > Run benchmark",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "use_pca": {
+              "default": false,
+              "name": "use_pca",
+              "type": {
+                "type": "<class 'bool'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1698.0,
+            "y": 929.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "use_pca": false
+        },
+        "status": "done",
+        "title": "BioNeMo > Run benchmark"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 254.0,
+      "id": "BioNeMo > Run benchmark 1",
+      "position": {
+        "x": 1717.5260843687468,
+        "y": 601.9085109739857
+      },
+      "type": "basic",
+      "width": 218.0
+    },
+    {
+      "data": {
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {
+            "results_path": {
+              "name": "results_path",
+              "position": "left",
+              "type": {
+                "type": "<class 'str'>"
+              }
+            }
+          },
+          "name": "BioNeMo > Load results",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {},
+          "position": {
+            "x": 1314.0,
+            "y": 286.0
+          },
+          "type": "basic"
+        },
+        "params": {},
+        "status": "done",
+        "title": "BioNeMo > Load results"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "BioNeMo > Load results 2",
+      "position": {
+        "x": 1371.1643035406682,
+        "y": -38.628856650688306
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "display": null,
+        "error": null,
+        "meta": {
+          "inputs": {
+            "data": {
+              "name": "data",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "labels": {
+              "name": "labels",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "BioNeMo > Run benchmark",
+          "outputs": {
+            "output": {
+              "name": "output",
+              "position": "right",
+              "type": {
+                "type": "None"
+              }
+            }
+          },
+          "params": {
+            "use_pca": {
+              "default": false,
+              "name": "use_pca",
+              "type": {
+                "type": "<class 'bool'>"
+              }
+            }
+          },
+          "position": {
+            "x": 1576.0,
+            "y": 395.0
+          },
+          "type": "basic"
+        },
+        "params": {
+          "use_pca": false
+        },
+        "status": "done",
+        "title": "BioNeMo > Run benchmark"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "BioNeMo > Run benchmark 2",
+      "position": {
+        "x": 1740.0,
+        "y": 120.0
+      },
+      "type": "basic",
+      "width": 200.0
+    },
+    {
+      "data": {
+        "display": {
+          "grid": {
+            "bottom": "10%",
+            "height": "70%",
+            "left": "20%",
+            "right": "10%",
+            "top": "10%",
+            "width": "70%"
+          },
+          "series": [
+            {
+              "data": [
+                0.7020536292780548,
+                0.843335333719808
+              ],
+              "itemStyle": {
+                "color": "#440154"
+              },
+              "name": "F1 Score",
+              "type": "bar"
+            },
+            {
+              "data": [
+                [
+                  0.6853106016807672,
+                  0.7187966568753424
+                ],
+                [
+                  0.8270726644727397,
+                  0.8595980029668762
+                ]
+              ],
+              "itemStyle": {
+                "color": "#1f77b4"
+              },
+              "name": "Error Bars",
+              "type": "errorbar"
+            }
+          ],
+          "title": {
+            "left": "center",
+            "text": "F1 Score Comparison",
+            "textStyle": {
+              "fontSize": 20,
+              "fontWeight": "bold"
+            }
+          },
+          "tooltip": {
+            "axisPointer": {
+              "type": "shadow"
+            },
+            "trigger": "axis"
+          },
+          "xAxis": {
+            "axisLabel": {
+              "align": "right",
+              "rotate": 45,
+              "textStyle": {
+                "fontSize": 14,
+                "fontWeight": "bold"
+              }
+            },
+            "data": [
+              "10M parameters",
+              "106M parameters"
+            ],
+            "type": "category"
+          },
+          "yAxis": {
+            "axisLabel": {
+              "textStyle": {
+                "fontSize": 14,
+                "fontWeight": "bold"
+              }
+            },
+            "interval": 0.1,
+            "max": 1,
+            "min": 0,
+            "name": "F1 Score",
+            "type": "value"
+          }
+        },
+        "error": null,
+        "meta": {
+          "inputs": {
+            "benchmark_output100m": {
+              "name": "benchmark_output100m",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "benchmark_output10m": {
+              "name": "benchmark_output10m",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "BioNeMo > Plot f1 comparison",
+          "outputs": {},
+          "params": {},
+          "position": {
+            "x": 1716.0,
+            "y": 309.0
+          },
+          "type": "visualization"
+        },
+        "params": {},
+        "status": "done",
+        "title": "BioNeMo > Plot f1 comparison"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 863.0,
+      "id": "BioNeMo > Plot f1 comparison 1",
+      "position": {
+        "x": 2091.687426186124,
+        "y": -368.096892874947
+      },
+      "type": "visualization",
+      "width": 1034.0
+    },
+    {
+      "data": {
+        "display": {
+          "grid": {
+            "bottom": "10%",
+            "height": "70%",
+            "left": "20%",
+            "right": "10%",
+            "top": "10%",
+            "width": "70%"
+          },
+          "series": [
+            {
+              "data": [
+                0.8385031821273431,
+                0.9053958718388249
+              ],
+              "itemStyle": {
+                "color": "#440154"
+              },
+              "name": "Accuracy",
+              "type": "bar"
+            },
+            {
+              "data": [
+                [
+                  0.8221974395834195,
+                  0.8548089246712667
+                ],
+                [
+                  0.8901141406971089,
+                  0.9206776029805408
+                ]
+              ],
+              "itemStyle": {
+                "color": "#1f77b4"
+              },
+              "name": "Error Bars",
+              "type": "errorbar"
+            }
+          ],
+          "title": {
+            "left": "center",
+            "text": "Accuracy Comparison",
+            "textStyle": {
+              "fontSize": 20,
+              "fontWeight": "bold"
+            }
+          },
+          "tooltip": {
+            "axisPointer": {
+              "type": "shadow"
+            },
+            "trigger": "axis"
+          },
+          "xAxis": {
+            "axisLabel": {
+              "align": "right",
+              "rotate": 45,
+              "textStyle": {
+                "fontSize": 14,
+                "fontWeight": "bold"
+              }
+            },
+            "data": [
+              "10M parameters",
+              "106M parameters"
+            ],
+            "type": "category"
+          },
+          "yAxis": {
+            "axisLabel": {
+              "textStyle": {
+                "fontSize": 14,
+                "fontWeight": "bold"
+              }
+            },
+            "interval": 0.1,
+            "max": 1,
+            "min": 0,
+            "name": "Accuracy",
+            "type": "value"
+          }
+        },
+        "error": null,
+        "meta": {
+          "inputs": {
+            "benchmark_output100m": {
+              "name": "benchmark_output100m",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            },
+            "benchmark_output10m": {
+              "name": "benchmark_output10m",
+              "position": "left",
+              "type": {
+                "type": "<class 'inspect._empty'>"
+              }
+            }
+          },
+          "name": "BioNeMo > Plot accuracy comparison",
+          "outputs": {},
+          "params": {},
+          "position": {
+            "x": 1574.0,
+            "y": 720.0
+          },
+          "type": "visualization"
+        },
+        "params": {},
+        "status": "done",
+        "title": "BioNeMo > Plot accuracy comparison"
+      },
+      "dragHandle": ".bg-primary",
+      "height": 200.0,
+      "id": "BioNeMo > Plot accuracy comparison 1",
+      "position": {
+        "x": 2160.0,
+        "y": 915.0
+      },
+      "type": "visualization",
+      "width": 200.0
+    }
+  ]
+}

lynxkite-app/src/lynxkite_app/__main__.py CHANGED Viewed

@@ -6,7 +6,13 @@ import os
 def main():
     port = int(os.environ.get("PORT", "8000"))
     reload = bool(os.environ.get("LYNXKITE_RELOAD", ""))
-    uvicorn.run("lynxkite_app.main:app", host="0.0.0.0", port=port, reload=reload)
 if __name__ == "__main__":

 def main():
     port = int(os.environ.get("PORT", "8000"))
     reload = bool(os.environ.get("LYNXKITE_RELOAD", ""))
+    uvicorn.run(
+        "lynxkite_app.main:app",
+        host="0.0.0.0",
+        port=port,
+        reload=reload,
+        loop="asyncio",
+    )
 if __name__ == "__main__":

lynxkite-app/web/src/workspace/nodes/NodeWithVisualization.tsx CHANGED Viewed

@@ -10,8 +10,8 @@ const NodeWithVisualization = (props: any) => {
     if (!opts || !chartsRef.current) return;
     chartsInstanceRef.current = echarts.init(chartsRef.current, null, {
       renderer: "canvas",
-      width: 250,
-      height: 250,
     });
     chartsInstanceRef.current.setOption(opts);
     const onResize = () => chartsInstanceRef.current?.resize();

     if (!opts || !chartsRef.current) return;
     chartsInstanceRef.current = echarts.init(chartsRef.current, null, {
       renderer: "canvas",
+      width: 800,
+      height: 800,
     });
     chartsInstanceRef.current.setOption(opts);
     const onResize = () => chartsInstanceRef.current?.resize();

lynxkite-graph-analytics/.dockerignore ADDED Viewed

	@@ -0,0 +1,3 @@

+lynxkite_data
+lynxkite_crdt_data
+.venv

lynxkite-graph-analytics/Dockerfile.bionemo ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM nvcr.io/nvidia/clara/bionemo-framework:nightly
+ENV LYNXKITE_BIONEMO_INSTALLED=true
+WORKDIR /app
+# Download and install nvm
+RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.2/install.sh |  bash
+RUN echo node > .nvmrc
+RUN source /root/.nvm/nvm.sh --install
+COPY . /app
+RUN uv pip install -e lynxkite-core/[dev] -e lynxkite-app/[dev] -e lynxkite-graph-analytics/[dev] -e lynxkite-bio  -e lynxkite-pillow-example/
+# bionemo cellxgene_census needs this version of numpy
+RUN uv pip install numpy==1.26.4

lynxkite-graph-analytics/README.md CHANGED Viewed

@@ -11,3 +11,44 @@ pip install lynxkite lynxkite-graph-analytics
 ```
 Run LynxKite with `NX_CUGRAPH_AUTOCONFIG=True` to enable GPU-accelerated graph data science operations.

 ```
 Run LynxKite with `NX_CUGRAPH_AUTOCONFIG=True` to enable GPU-accelerated graph data science operations.
+## BioNemo
+If you want to use BioNemo operations, then you will have to use the provided Docker image, or
+install BioNemo manually in your environment.
+Take into account that BioNemo needs a GPU to work, you can find the specific requirements
+[here](https://docs.nvidia.com/bionemo-framework/latest/user-guide/getting-started/pre-reqs/).
+The import of BioNemo operations is gate keeped behing the `LYNXKITE_BIONEMO_INSTALLED` variable.
+BioNemo operations will only be imported if this environment variable is set to true.
+To build the image:
+```bash
+# in lynxkite-graph-analytics folder
+$ docker build -f Dockerfile.bionemo -t lynxkite-bionemo ..
+```
+Take into account that this Dockerfile does not include the lynxkite-lynxscribe package. If you want to include it you will
+need to set up git credentials inside the container.
+Then, inside the image you can start LynxKite as usual.
+If you want to do some development, then it is recommend to use the [devcontainers](https://code.visualstudio.com/docs/devcontainers/containers)
+vscode extension. The following is a basic configuration to get started:
+```json
+// .devcontainer/devcontainer.json
+{
+	"name": "Existing Dockerfile",
+	"runArgs": [
+		"--gpus=all",
+		"--shm-size=4g"
+	],
+	"build": {
+		"context": "..",
+		"dockerfile": "../lynxkite-graph-analytics/Dockerfile.bionemo"
+	}
+}
+```

lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py CHANGED Viewed

@@ -14,3 +14,6 @@ from .core import *  # noqa (easier access for core classes)
 from . import lynxkite_ops  # noqa (imported to trigger registration)
 from . import networkx_ops  # noqa (imported to trigger registration)
 from . import pytorch_model_ops  # noqa (imported to trigger registration)

 from . import lynxkite_ops  # noqa (imported to trigger registration)
 from . import networkx_ops  # noqa (imported to trigger registration)
 from . import pytorch_model_ops  # noqa (imported to trigger registration)
+if os.environ.get("LYNXKITE_BIONEMO_INSTALLED", "").strip().lower() == "true":
+    from . import bionemo_ops  # noqa (imported to trigger registration)

lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py ADDED Viewed

	@@ -0,0 +1,519 @@

+"""BioNeMo related operations
+The intention is to showcase how BioNeMo can be integrated with LynxKite. This should be
+considered as a reference implementation and not a production ready code.
+The operations are quite specific for this example notebook:
+https://github.com/NVIDIA/bionemo-framework/blob/main/docs/docs/user-guide/examples/bionemo-geneformer/geneformer-celltype-classification.ipynb
+"""
+from lynxkite.core import ops
+import requests
+import tarfile
+import os
+from collections import Counter
+from . import core
+import joblib
+import numpy as np
+import torch
+from pathlib import Path
+import random
+from contextlib import contextmanager
+import cellxgene_census  # TODO: This needs numpy < 2
+import tempfile
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.pipeline import Pipeline
+from sklearn.model_selection import StratifiedKFold, cross_validate
+from sklearn.metrics import (
+    make_scorer,
+    accuracy_score,
+    precision_score,
+    recall_score,
+    f1_score,
+    roc_auc_score,
+    confusion_matrix,
+)
+from sklearn.decomposition import PCA
+from sklearn.model_selection import cross_val_predict
+from sklearn.preprocessing import LabelEncoder
+from bionemo.scdl.io.single_cell_collection import SingleCellCollection
+import scanpy
+mem = joblib.Memory("../joblib-cache")
+op = ops.op_registration(core.ENV)
+DATA_PATH = Path("/workspace")
+@contextmanager
+def random_seed(seed: int):
+    state = random.getstate()
+    random.seed(seed)
+    try:
+        yield
+    finally:
+        # Go back to previous state
+        random.setstate(state)
+@op("BioNeMo > Download CELLxGENE dataset")
+@mem.cache()
+def download_cellxgene_dataset(
+    *,
+    save_path: str,
+    census_version: str = "2023-12-15",
+    organism: str = "Homo sapiens",
+    value_filter='dataset_id=="8e47ed12-c658-4252-b126-381df8d52a3d"',
+    max_workers: int = 1,
+    use_mp: bool = False,
+) -> None:
+    """Downloads a CELLxGENE dataset"""
+    with cellxgene_census.open_soma(census_version=census_version) as census:
+        adata = cellxgene_census.get_anndata(
+            census,
+            organism,
+            obs_value_filter=value_filter,
+        )
+    with random_seed(32):
+        indices = list(range(len(adata)))
+        random.shuffle(indices)
+    micro_batch_size: int = 32
+    num_steps: int = 256
+    selection = sorted(indices[: micro_batch_size * num_steps])
+    # NOTE: there's a current constraint that predict_step needs to be a function of micro-batch-size.
+    #  this is something we are working on fixing. A quick hack is to set micro-batch-size=1, but this is
+    #  slow. In this notebook we are going to use mbs=32 and subsample the anndata.
+    adata = adata[selection].copy()  # so it's not a view
+    h5ad_outfile = DATA_PATH / Path("hs-celltype-bench.h5ad")
+    adata.write_h5ad(h5ad_outfile)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        coll = SingleCellCollection(temp_dir)
+        coll.load_h5ad_multi(
+            h5ad_outfile.parent, max_workers=max_workers, use_processes=use_mp
+        )
+        coll.flatten(DATA_PATH / save_path, destroy_on_copy=True)
+    return DATA_PATH / save_path
+@op("BioNeMo > Import H5AD file")
+def import_h5ad(*, file_path: str):
+    return scanpy.read_h5ad(DATA_PATH / Path(file_path))
+@op("BioNeMo > Download model")
+@mem.cache(verbose=1)
+def download_model(*, model_name: str) -> str:
+    """Downloads a model."""
+    model_download_parameters = {
+        "geneformer_100m": {
+            "name": "geneformer_100m",
+            "version": "2.0",
+            "path": "geneformer_106M_240530_nemo2",
+        },
+        "geneformer_10m": {
+            "name": "geneformer_10m",
+            "version": "2.0",
+            "path": "geneformer_10M_240530_nemo2",
+        },
+        "geneformer_10m2": {
+            "name": "geneformer_10m",
+            "version": "2.1",
+            "path": "geneformer_10M_241113_nemo2",
+        },
+    }
+    # Define the URL and output file
+    url_template = "https://api.ngc.nvidia.com/v2/models/org/nvidia/team/clara/{name}/{version}/files?redirect=true&path={path}.tar.gz"
+    url = url_template.format(**model_download_parameters[model_name])
+    model_filename = f"{DATA_PATH}/{model_download_parameters[model_name]['path']}"
+    output_file = f"{model_filename}.tar.gz"
+    # Send the request
+    response = requests.get(url, allow_redirects=True, stream=True)
+    response.raise_for_status()  # Raise an error for bad responses (4xx and 5xx)
+    # Save the file to disk
+    with open(f"{output_file}", "wb") as file:
+        for chunk in response.iter_content(chunk_size=8192):
+            file.write(chunk)
+    # Extract the tar.gz file
+    os.makedirs(model_filename, exist_ok=True)
+    with tarfile.open(output_file, "r:gz") as tar:
+        tar.extractall(path=model_filename)
+    return model_filename
+@op("BioNeMo > Infer")
+@mem.cache(verbose=1)
+def infer(
+    dataset_path: str, model_path: str | None = None, *, results_path: str
+) -> str:
+    """Infer on a dataset."""
+    # This import is slow, so we only import it when we need it.
+    from bionemo.geneformer.scripts.infer_geneformer import infer_model
+    infer_model(
+        data_path=dataset_path,
+        checkpoint_path=model_path,
+        results_path=DATA_PATH / results_path,
+        include_hiddens=False,
+        micro_batch_size=32,
+        include_embeddings=True,
+        include_logits=False,
+        seq_length=2048,
+        precision="bf16-mixed",
+        devices=1,
+        num_nodes=1,
+        num_dataset_workers=10,
+    )
+    return DATA_PATH / results_path
+@op("BioNeMo > Load results")
+def load_results(results_path: str):
+    embeddings = (
+        torch.load(f"{results_path}/predictions__rank_0.pt")["embeddings"]
+        .float()
+        .cpu()
+        .numpy()
+    )
+    return embeddings
+@op("BioNeMo > Get labels")
+def get_labels(adata):
+    infer_metadata = adata.obs
+    labels = infer_metadata["cell_type"].values
+    label_encoder = LabelEncoder()
+    integer_labels = label_encoder.fit_transform(labels)
+    label_encoder.integer_labels = integer_labels
+    return label_encoder
+@op("BioNeMo > Plot labels", view="visualization")
+def plot_labels(adata):
+    infer_metadata = adata.obs
+    labels = infer_metadata["cell_type"].values
+    label_counts = Counter(labels)
+    labels = list(label_counts.keys())
+    values = list(label_counts.values())
+    options = {
+        "title": {
+            "text": "Cell type counts for classification dataset",
+            "left": "center",
+        },
+        "tooltip": {"trigger": "axis", "axisPointer": {"type": "shadow"}},
+        "xAxis": {
+            "type": "category",
+            "data": labels,
+            "axisLabel": {"rotate": 45, "align": "right"},
+        },
+        "yAxis": {"type": "value"},
+        "series": [
+            {
+                "name": "Count",
+                "type": "bar",
+                "data": values,
+                "itemStyle": {"color": "#4285F4"},
+            }
+        ],
+    }
+    return options
+@op("BioNeMo > Run benchmark")
+@mem.cache(verbose=1)
+def run_benchmark(data, labels, *, use_pca: bool = False):
+    """
+    data - contains the single cell expression (or whatever feature) in each row.
+    labels - contains the string label for each cell
+    data_shape (R, C)
+    labels_shape (R,)
+    """
+    np.random.seed(1337)
+    # Define the target dimension 'n_components'
+    n_components = 10  # for example, adjust based on your specific needs
+    # Create a pipeline that includes Gaussian random projection and RandomForestClassifier
+    if use_pca:
+        pipeline = Pipeline(
+            [
+                ("projection", PCA(n_components=n_components)),
+                ("classifier", RandomForestClassifier(class_weight="balanced")),
+            ]
+        )
+    else:
+        pipeline = Pipeline(
+            [("classifier", RandomForestClassifier(class_weight="balanced"))]
+        )
+    # Set up StratifiedKFold to ensure each fold reflects the overall distribution of labels
+    cv = StratifiedKFold(n_splits=5)
+    # Define the scoring functions
+    scoring = {
+        "accuracy": make_scorer(accuracy_score),
+        "precision": make_scorer(
+            precision_score, average="macro"
+        ),  # 'macro' averages over classes
+        "recall": make_scorer(recall_score, average="macro"),
+        "f1_score": make_scorer(f1_score, average="macro"),
+        # 'roc_auc' requires probability or decision function; hence use multi_class if applicable
+        "roc_auc": make_scorer(roc_auc_score, multi_class="ovr"),
+    }
+    labels = labels.integer_labels
+    # Perform stratified cross-validation with multiple metrics using the pipeline
+    results = cross_validate(
+        pipeline, data, labels, cv=cv, scoring=scoring, return_train_score=False
+    )
+    # Print the cross-validation results
+    print("Cross-validation metrics:")
+    results_out = {}
+    for metric, scores in results.items():
+        if metric.startswith("test_"):
+            results_out[metric] = (scores.mean(), scores.std())
+            print(f"{metric[5:]}: {scores.mean():.3f} (+/- {scores.std():.3f})")
+    predictions = cross_val_predict(pipeline, data, labels, cv=cv)
+    # v Return confusion matrix and metrics.
+    conf_matrix = confusion_matrix(labels, predictions)
+    return results_out, conf_matrix
+@op("BioNeMo > Plot confusion matrix", view="visualization")
+@mem.cache(verbose=1)
+def plot_confusion_matrix(benchmark_output, labels):
+    cm = benchmark_output[1]
+    labels = labels.classes_
+    str_labels = [str(label) for label in labels]
+    norm_cm = [[float(val / sum(row)) if sum(row) else 0 for val in row] for row in cm]
+    # heatmap has the 0,0 at the bottom left corner
+    num_rows = len(str_labels)
+    heatmap_data = [
+        [j, num_rows - i - 1, norm_cm[i][j]]
+        for i in range(len(labels))
+        for j in range(len(labels))
+    ]
+    options = {
+        "title": {"text": "Confusion Matrix", "left": "center"},
+        "tooltip": {"position": "top"},
+        "xAxis": {
+            "type": "category",
+            "data": str_labels,
+            "splitArea": {"show": True},
+            "axisLabel": {"rotate": 70, "align": "right"},
+        },
+        "yAxis": {
+            "type": "category",
+            "data": list(reversed(str_labels)),
+            "splitArea": {"show": True},
+        },
+        "grid": {
+            "height": "70%",
+            "width": "70%",
+            "left": "20%",
+            "right": "10%",
+            "bottom": "10%",
+            "top": "10%",
+        },
+        "visualMap": {
+            "min": 0,
+            "max": 1,
+            "calculable": True,
+            "orient": "vertical",
+            "right": 10,
+            "top": "center",
+            "inRange": {
+                "color": ["#E0F7FA", "#81D4FA", "#29B6F6", "#0288D1", "#01579B"]
+            },
+        },
+        "series": [
+            {
+                "name": "Confusion matrix",
+                "type": "heatmap",
+                "data": heatmap_data,
+                "emphasis": {"itemStyle": {"borderColor": "#333", "borderWidth": 1}},
+                "itemStyle": {"borderColor": "#D3D3D3", "borderWidth": 2},
+            }
+        ],
+    }
+    return options
+@op("BioNeMo > Plot accuracy comparison", view="visualization")
+def accuracy_comparison(benchmark_output10m, benchmark_output100m):
+    results_10m = benchmark_output10m[0]
+    results_106M = benchmark_output100m[0]
+    data = {
+        "model": ["10M parameters", "106M parameters"],
+        "accuracy_mean": [
+            results_10m["test_accuracy"][0],
+            results_106M["test_accuracy"][0],
+        ],
+        "accuracy_std": [
+            results_10m["test_accuracy"][1],
+            results_106M["test_accuracy"][1],
+        ],
+    }
+    labels = data["model"]  # X-axis labels
+    values = data["accuracy_mean"]  # Y-axis values
+    error_bars = data["accuracy_std"]  # Standard deviation for error bars
+    options = {
+        "title": {
+            "text": "Accuracy Comparison",
+            "left": "center",
+            "textStyle": {
+                "fontSize": 20,  # Bigger font for title
+                "fontWeight": "bold",  # Make title bold
+            },
+        },
+        "grid": {
+            "height": "70%",
+            "width": "70%",
+            "left": "20%",
+            "right": "10%",
+            "bottom": "10%",
+            "top": "10%",
+        },
+        "tooltip": {"trigger": "axis", "axisPointer": {"type": "shadow"}},
+        "xAxis": {
+            "type": "category",
+            "data": labels,
+            "axisLabel": {
+                "rotate": 45,  # Rotate labels for better readability
+                "align": "right",
+                "textStyle": {
+                    "fontSize": 14,  # Bigger font for X-axis labels
+                    "fontWeight": "bold",
+                },
+            },
+        },
+        "yAxis": {
+            "type": "value",
+            "name": "Accuracy",
+            "min": 0,
+            "max": 1,
+            "interval": 0.1,  # Matches np.arange(0, 1.05, 0.05)
+            "axisLabel": {
+                "textStyle": {
+                    "fontSize": 14,  # Bigger font for X-axis labels
+                    "fontWeight": "bold",
+                }
+            },
+        },
+        "series": [
+            {
+                "name": "Accuracy",
+                "type": "bar",
+                "data": values,
+                "itemStyle": {
+                    "color": "#440154"  # Viridis color palette (dark purple)
+                },
+            },
+            {
+                "name": "Error Bars",
+                "type": "errorbar",
+                "data": [
+                    [val - err, val + err] for val, err in zip(values, error_bars)
+                ],
+                "itemStyle": {"color": "#1f77b4"},
+            },
+        ],
+    }
+    return options
+@op("BioNeMo > Plot f1 comparison", view="visualization")
+def f1_comparison(benchmark_output10m, benchmark_output100m):
+    results_10m = benchmark_output10m[0]
+    results_106M = benchmark_output100m[0]
+    data = {
+        "model": ["10M parameters", "106M parameters"],
+        "f1_score_mean": [
+            results_10m["test_f1_score"][0],
+            results_106M["test_f1_score"][0],
+        ],
+        "f1_score_std": [
+            results_10m["test_f1_score"][1],
+            results_106M["test_f1_score"][1],
+        ],
+    }
+    labels = data["model"]  # X-axis labels
+    values = data["f1_score_mean"]  # Y-axis values
+    error_bars = data["f1_score_std"]  # Standard deviation for error bars
+    options = {
+        "title": {
+            "text": "F1 Score Comparison",
+            "left": "center",
+            "textStyle": {
+                "fontSize": 20,  # Bigger font for title
+                "fontWeight": "bold",  # Make title bold
+            },
+        },
+        "grid": {
+            "height": "70%",
+            "width": "70%",
+            "left": "20%",
+            "right": "10%",
+            "bottom": "10%",
+            "top": "10%",
+        },
+        "tooltip": {"trigger": "axis", "axisPointer": {"type": "shadow"}},
+        "xAxis": {
+            "type": "category",
+            "data": labels,
+            "axisLabel": {
+                "rotate": 45,  # Rotate labels for better readability
+                "align": "right",
+                "textStyle": {
+                    "fontSize": 14,  # Bigger font for X-axis labels
+                    "fontWeight": "bold",
+                },
+            },
+        },
+        "yAxis": {
+            "type": "value",
+            "name": "F1 Score",
+            "min": 0,
+            "max": 1,
+            "interval": 0.1,  # Matches np.arange(0, 1.05, 0.05),
+            "axisLabel": {
+                "textStyle": {
+                    "fontSize": 14,  # Bigger font for X-axis labels
+                    "fontWeight": "bold",
+                }
+            },
+        },
+        "series": [
+            {
+                "name": "F1 Score",
+                "type": "bar",
+                "data": values,
+                "itemStyle": {
+                    "color": "#440154"  # Viridis color palette (dark purple)
+                },
+            },
+            {
+                "name": "Error Bars",
+                "type": "errorbar",
+                "data": [
+                    [val - err, val + err] for val, err in zip(values, error_bars)
+                ],
+                "itemStyle": {"color": "#1f77b4"},
+            },
+        ],
+    }
+    return options