Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

darabos commited on Feb 21

Commit

b95d49e

1 Parent(s): ce575c3

Graph from molecule similarity.

Browse files

Files changed (7) hide show

README.md +2 -1
lynxkite-app/src/lynxkite_app/crdt.py +0 -1
lynxkite-bio/README.md +3 -0
lynxkite-bio/pyproject.toml +24 -0
lynxkite-bio/src/lynxkite_bio/__init__.py +67 -0
lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py +1 -1
lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py +4 -3

README.md CHANGED Viewed

@@ -14,6 +14,7 @@ original LynxKite. The primary goals of this rewrite are:
 - `lynxkite-graph-analytics`: Graph analytics plugin. The classical LynxKite experience!
 - `lynxkite-pillow`: A simple example plugin.
 - `lynxkite-lynxscribe`: A plugin for building and running LynxScribe applications.
 - `docs`: User-facing documentation. It's shared between all packages.
 ## Development
@@ -25,7 +26,7 @@ uv venv
 source .venv/bin/activate
 uvx pre-commit install
 # The [dev] tag is only needed if you intend on running tests
-uv pip install -e lynxkite-core/[dev] -e lynxkite-app/[dev] -e lynxkite-graph-analytics/[dev] -e lynxkite-lynxscribe/ -e lynxkite-pillow-example/
 ```
 This also builds the frontend, hopefully very quickly. To run it:

 - `lynxkite-graph-analytics`: Graph analytics plugin. The classical LynxKite experience!
 - `lynxkite-pillow`: A simple example plugin.
 - `lynxkite-lynxscribe`: A plugin for building and running LynxScribe applications.
+- `lynxkite-bio`: Bioinformatics additions for LynxKite Graph Analytics.
 - `docs`: User-facing documentation. It's shared between all packages.
 ## Development
 source .venv/bin/activate
 uvx pre-commit install
 # The [dev] tag is only needed if you intend on running tests
+uv pip install -e lynxkite-core/[dev] -e lynxkite-app/[dev] -e lynxkite-graph-analytics/[dev] -e lynxkite-bio -e lynxkite-lynxscribe/ -e lynxkite-pillow-example/
 ```
 This also builds the frontend, hopefully very quickly. To run it:

lynxkite-app/src/lynxkite_app/crdt.py CHANGED Viewed

@@ -3,7 +3,6 @@
 import asyncio
 import contextlib
 import enum
-import pathlib
 import fastapi
 import os.path
 import pycrdt

 import asyncio
 import contextlib
 import enum
 import fastapi
 import os.path
 import pycrdt

lynxkite-bio/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # LynxKite Bio
2	+
3	+ An expansion for `lynxkite-graph-analytics` that provides algorithms for biological applications.

lynxkite-bio/pyproject.toml ADDED Viewed

	@@ -0,0 +1,24 @@

+[project]
+name = "lynxkite-bio"
+version = "0.1.0"
+description = "Additional boxes for LynxKite Graph Analytics that add algorithms for biology."
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "fsspec>=2025.2.0",
+    "joblib>=1.4.2",
+    "lynxkite-core",
+    "lynxkite-graph-analytics",
+    "pandas>=2.2.3",
+    "rdkit>=2024.9.5",
+    "scipy>=1.15.2",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.3.4",
+]
+[tool.uv.sources]
+lynxkite-core = { path = "../lynxkite-core" }
+lynxkite-graph-analytics = { path = "../lynxkite-graph-analytics" }

lynxkite-bio/src/lynxkite_bio/__init__.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""Graph analytics operations. To be split into separate files when we have more."""
+from lynxkite_graph_analytics import Bundle, RelationDefinition
+from lynxkite.core import ops
+import joblib
+import numpy as np
+import pandas as pd
+import rdkit.Chem
+import rdkit.Chem.rdFingerprintGenerator
+import rdkit.Chem.Fingerprints.ClusterMols
+import scipy
+mem = joblib.Memory("../joblib-cache")
+ENV = "LynxKite Graph Analytics"
+op = ops.op_registration(ENV)
+@op("Parse SMILES")
+def parse_smiles(bundle: Bundle, *, table="df", smiles_column="SMILES", save_as="mols"):
+    """Parse SMILES strings into RDKit molecules."""
+    df = bundle.dfs[table]
+    mols = [rdkit.Chem.MolFromSmiles(smiles) for smiles in df[smiles_column].dropna()]
+    mols = [mol for mol in mols if mol is not None]
+    bundle = bundle.copy()
+    bundle.dfs[table] = df.assign(**{save_as: mols})
+    return bundle
+def _get_similarity_matrix(mols):
+    mfpgen = rdkit.Chem.rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
+    fps = [(0, mfpgen.GetFingerprint(mol)) for mol in mols]
+    similarity_matrix = rdkit.Chem.Fingerprints.ClusterMols.GetDistanceMatrix(
+        fps, metric=rdkit.Chem.DataStructs.TanimotoSimilarity, isSimilarity=1
+    )
+    return scipy.spatial.distance.squareform(similarity_matrix)
+@op("Graph from molecule similarity")
+def graph_from_similarity(
+    bundle: Bundle, *, table="df", mols_column="mols", average_degree=10
+):
+    df = bundle.dfs[table]
+    mols = df[mols_column]
+    similarity_matrix = _get_similarity_matrix(mols)
+    i_idx, j_idx = np.triu_indices_from(similarity_matrix, k=1)
+    sim_values = similarity_matrix[i_idx, j_idx]
+    N = int(average_degree * len(mols))
+    top_n_idx = np.argsort(sim_values)[-N:]
+    top_n_pairs = [(i_idx[k], j_idx[k], sim_values[k]) for k in top_n_idx]
+    edges = pd.DataFrame(top_n_pairs, columns=["source", "target", "similarity"])
+    nodes = df.copy()
+    nodes.index.name = "id"
+    bundle = Bundle(
+        dfs={"edges": edges, "nodes": nodes},
+        relations=[
+            RelationDefinition(
+                df="edges",
+                source_column="source",
+                target_column="target",
+                source_table="nodes",
+                target_table="nodes",
+                source_key="id",
+                target_key="id",
+            )
+        ],
+    )
+    return bundle

lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from . import lynxkite_ops  # noqa (imported to trigger registration)
 from . import networkx_ops  # noqa (imported to trigger registration)
 from . import pytorch_model_ops  # noqa (imported to trigger registration)

+from .lynxkite_ops import *  # noqa (imported to trigger registration)
 from . import networkx_ops  # noqa (imported to trigger registration)
 from . import pytorch_model_ops  # noqa (imported to trigger registration)

lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py CHANGED Viewed

@@ -80,9 +80,10 @@ class Bundle:
         # TODO: Use relations.
         graph = nx.DiGraph()
         if "nodes" in self.dfs:
-            graph.add_nodes_from(
-                self.dfs["nodes"].set_index("id").to_dict("index").items()
-            )
         graph.add_edges_from(
             self.dfs["edges"][["source", "target"]].itertuples(index=False, name=None)
         )

         # TODO: Use relations.
         graph = nx.DiGraph()
         if "nodes" in self.dfs:
+            df = self.dfs["nodes"]
+            if df.index.name != "id":
+                df = df.set_index("id")
+            graph.add_nodes_from(df.to_dict("index").items())
         graph.add_edges_from(
             self.dfs["edges"][["source", "target"]].itertuples(index=False, name=None)
         )