Spaces:
Running
Running
Graph from molecule similarity.
Browse files- README.md +2 -1
- lynxkite-app/src/lynxkite_app/crdt.py +0 -1
- lynxkite-bio/README.md +3 -0
- lynxkite-bio/pyproject.toml +24 -0
- lynxkite-bio/src/lynxkite_bio/__init__.py +67 -0
- lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py +1 -1
- lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py +4 -3
README.md
CHANGED
@@ -14,6 +14,7 @@ original LynxKite. The primary goals of this rewrite are:
|
|
14 |
- `lynxkite-graph-analytics`: Graph analytics plugin. The classical LynxKite experience!
|
15 |
- `lynxkite-pillow`: A simple example plugin.
|
16 |
- `lynxkite-lynxscribe`: A plugin for building and running LynxScribe applications.
|
|
|
17 |
- `docs`: User-facing documentation. It's shared between all packages.
|
18 |
|
19 |
## Development
|
@@ -25,7 +26,7 @@ uv venv
|
|
25 |
source .venv/bin/activate
|
26 |
uvx pre-commit install
|
27 |
# The [dev] tag is only needed if you intend on running tests
|
28 |
-
uv pip install -e lynxkite-core/[dev] -e lynxkite-app/[dev] -e lynxkite-graph-analytics/[dev] -e lynxkite-lynxscribe/ -e lynxkite-pillow-example/
|
29 |
```
|
30 |
|
31 |
This also builds the frontend, hopefully very quickly. To run it:
|
|
|
14 |
- `lynxkite-graph-analytics`: Graph analytics plugin. The classical LynxKite experience!
|
15 |
- `lynxkite-pillow`: A simple example plugin.
|
16 |
- `lynxkite-lynxscribe`: A plugin for building and running LynxScribe applications.
|
17 |
+
- `lynxkite-bio`: Bioinformatics additions for LynxKite Graph Analytics.
|
18 |
- `docs`: User-facing documentation. It's shared between all packages.
|
19 |
|
20 |
## Development
|
|
|
26 |
source .venv/bin/activate
|
27 |
uvx pre-commit install
|
28 |
# The [dev] tag is only needed if you intend on running tests
|
29 |
+
uv pip install -e lynxkite-core/[dev] -e lynxkite-app/[dev] -e lynxkite-graph-analytics/[dev] -e lynxkite-bio -e lynxkite-lynxscribe/ -e lynxkite-pillow-example/
|
30 |
```
|
31 |
|
32 |
This also builds the frontend, hopefully very quickly. To run it:
|
lynxkite-app/src/lynxkite_app/crdt.py
CHANGED
@@ -3,7 +3,6 @@
|
|
3 |
import asyncio
|
4 |
import contextlib
|
5 |
import enum
|
6 |
-
import pathlib
|
7 |
import fastapi
|
8 |
import os.path
|
9 |
import pycrdt
|
|
|
3 |
import asyncio
|
4 |
import contextlib
|
5 |
import enum
|
|
|
6 |
import fastapi
|
7 |
import os.path
|
8 |
import pycrdt
|
lynxkite-bio/README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# LynxKite Bio
|
2 |
+
|
3 |
+
An expansion for `lynxkite-graph-analytics` that provides algorithms for biological applications.
|
lynxkite-bio/pyproject.toml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "lynxkite-bio"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Additional boxes for LynxKite Graph Analytics that add algorithms for biology."
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.11"
|
7 |
+
dependencies = [
|
8 |
+
"fsspec>=2025.2.0",
|
9 |
+
"joblib>=1.4.2",
|
10 |
+
"lynxkite-core",
|
11 |
+
"lynxkite-graph-analytics",
|
12 |
+
"pandas>=2.2.3",
|
13 |
+
"rdkit>=2024.9.5",
|
14 |
+
"scipy>=1.15.2",
|
15 |
+
]
|
16 |
+
|
17 |
+
[project.optional-dependencies]
|
18 |
+
dev = [
|
19 |
+
"pytest>=8.3.4",
|
20 |
+
]
|
21 |
+
|
22 |
+
[tool.uv.sources]
|
23 |
+
lynxkite-core = { path = "../lynxkite-core" }
|
24 |
+
lynxkite-graph-analytics = { path = "../lynxkite-graph-analytics" }
|
lynxkite-bio/src/lynxkite_bio/__init__.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Graph analytics operations. To be split into separate files when we have more."""
|
2 |
+
|
3 |
+
from lynxkite_graph_analytics import Bundle, RelationDefinition
|
4 |
+
from lynxkite.core import ops
|
5 |
+
import joblib
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
import rdkit.Chem
|
9 |
+
import rdkit.Chem.rdFingerprintGenerator
|
10 |
+
import rdkit.Chem.Fingerprints.ClusterMols
|
11 |
+
import scipy
|
12 |
+
|
13 |
+
mem = joblib.Memory("../joblib-cache")
|
14 |
+
ENV = "LynxKite Graph Analytics"
|
15 |
+
op = ops.op_registration(ENV)
|
16 |
+
|
17 |
+
|
18 |
+
@op("Parse SMILES")
|
19 |
+
def parse_smiles(bundle: Bundle, *, table="df", smiles_column="SMILES", save_as="mols"):
|
20 |
+
"""Parse SMILES strings into RDKit molecules."""
|
21 |
+
df = bundle.dfs[table]
|
22 |
+
mols = [rdkit.Chem.MolFromSmiles(smiles) for smiles in df[smiles_column].dropna()]
|
23 |
+
mols = [mol for mol in mols if mol is not None]
|
24 |
+
bundle = bundle.copy()
|
25 |
+
bundle.dfs[table] = df.assign(**{save_as: mols})
|
26 |
+
return bundle
|
27 |
+
|
28 |
+
|
29 |
+
def _get_similarity_matrix(mols):
|
30 |
+
mfpgen = rdkit.Chem.rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
|
31 |
+
fps = [(0, mfpgen.GetFingerprint(mol)) for mol in mols]
|
32 |
+
similarity_matrix = rdkit.Chem.Fingerprints.ClusterMols.GetDistanceMatrix(
|
33 |
+
fps, metric=rdkit.Chem.DataStructs.TanimotoSimilarity, isSimilarity=1
|
34 |
+
)
|
35 |
+
return scipy.spatial.distance.squareform(similarity_matrix)
|
36 |
+
|
37 |
+
|
38 |
+
@op("Graph from molecule similarity")
|
39 |
+
def graph_from_similarity(
|
40 |
+
bundle: Bundle, *, table="df", mols_column="mols", average_degree=10
|
41 |
+
):
|
42 |
+
df = bundle.dfs[table]
|
43 |
+
mols = df[mols_column]
|
44 |
+
similarity_matrix = _get_similarity_matrix(mols)
|
45 |
+
i_idx, j_idx = np.triu_indices_from(similarity_matrix, k=1)
|
46 |
+
sim_values = similarity_matrix[i_idx, j_idx]
|
47 |
+
N = int(average_degree * len(mols))
|
48 |
+
top_n_idx = np.argsort(sim_values)[-N:]
|
49 |
+
top_n_pairs = [(i_idx[k], j_idx[k], sim_values[k]) for k in top_n_idx]
|
50 |
+
edges = pd.DataFrame(top_n_pairs, columns=["source", "target", "similarity"])
|
51 |
+
nodes = df.copy()
|
52 |
+
nodes.index.name = "id"
|
53 |
+
bundle = Bundle(
|
54 |
+
dfs={"edges": edges, "nodes": nodes},
|
55 |
+
relations=[
|
56 |
+
RelationDefinition(
|
57 |
+
df="edges",
|
58 |
+
source_column="source",
|
59 |
+
target_column="target",
|
60 |
+
source_table="nodes",
|
61 |
+
target_table="nodes",
|
62 |
+
source_key="id",
|
63 |
+
target_key="id",
|
64 |
+
)
|
65 |
+
],
|
66 |
+
)
|
67 |
+
return bundle
|
lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
from . import
|
2 |
from . import networkx_ops # noqa (imported to trigger registration)
|
3 |
from . import pytorch_model_ops # noqa (imported to trigger registration)
|
|
|
1 |
+
from .lynxkite_ops import * # noqa (imported to trigger registration)
|
2 |
from . import networkx_ops # noqa (imported to trigger registration)
|
3 |
from . import pytorch_model_ops # noqa (imported to trigger registration)
|
lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py
CHANGED
@@ -80,9 +80,10 @@ class Bundle:
|
|
80 |
# TODO: Use relations.
|
81 |
graph = nx.DiGraph()
|
82 |
if "nodes" in self.dfs:
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
86 |
graph.add_edges_from(
|
87 |
self.dfs["edges"][["source", "target"]].itertuples(index=False, name=None)
|
88 |
)
|
|
|
80 |
# TODO: Use relations.
|
81 |
graph = nx.DiGraph()
|
82 |
if "nodes" in self.dfs:
|
83 |
+
df = self.dfs["nodes"]
|
84 |
+
if df.index.name != "id":
|
85 |
+
df = df.set_index("id")
|
86 |
+
graph.add_nodes_from(df.to_dict("index").items())
|
87 |
graph.add_edges_from(
|
88 |
self.dfs["edges"][["source", "target"]].itertuples(index=False, name=None)
|
89 |
)
|