Spaces:
Running
Running
Merge pull request #79 from biggraph/darabos-progress
Browse files- .github/workflows/test.yaml +5 -0
- lynxkite-app/src/lynxkite_app/crdt.py +7 -8
- lynxkite-app/src/lynxkite_app/main.py +0 -6
- lynxkite-app/web/src/apiTypes.ts +1 -0
- lynxkite-app/web/src/index.css +66 -5
- lynxkite-app/web/src/workspace/nodes/LynxKiteNode.tsx +4 -1
- lynxkite-core/src/lynxkite/core/executors/one_by_one.py +8 -7
- lynxkite-core/src/lynxkite/core/ops.py +13 -3
- lynxkite-core/src/lynxkite/core/workspace.py +35 -0
- lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py +14 -1
- lynxkite-graph-analytics/src/lynxkite_graph_analytics/core.py +198 -0
- lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py +15 -209
- lynxkite-graph-analytics/tests/test_lynxkite_ops.py +5 -3
- lynxkite-lynxscribe/tests/test_llm_ops.py +1 -1
.github/workflows/test.yaml
CHANGED
@@ -56,6 +56,11 @@ jobs:
|
|
56 |
cd lynxkite-graph-analytics
|
57 |
pytest
|
58 |
|
|
|
|
|
|
|
|
|
|
|
59 |
- name: Try building the documentation
|
60 |
run: |
|
61 |
uv pip install mkdocs-material mkdocstrings[python]
|
|
|
56 |
cd lynxkite-graph-analytics
|
57 |
pytest
|
58 |
|
59 |
+
- name: Run LynxScribe tests
|
60 |
+
run: |
|
61 |
+
cd lynxkite-lynxscribe
|
62 |
+
pytest
|
63 |
+
|
64 |
- name: Try building the documentation
|
65 |
run: |
|
66 |
uv pip install mkdocs-material mkdocstrings[python]
|
lynxkite-app/src/lynxkite_app/crdt.py
CHANGED
@@ -86,6 +86,7 @@ def clean_input(ws_pyd):
|
|
86 |
for node in ws_pyd.nodes:
|
87 |
node.data.display = None
|
88 |
node.data.error = None
|
|
|
89 |
node.position.x = 0
|
90 |
node.position.y = 0
|
91 |
if node.model_extra:
|
@@ -175,7 +176,6 @@ delayed_executions = {}
|
|
175 |
async def workspace_changed(name: str, changes: pycrdt.MapEvent, ws_crdt: pycrdt.Map):
|
176 |
"""Callback to react to changes in the workspace.
|
177 |
|
178 |
-
|
179 |
Args:
|
180 |
name: Name of the workspace.
|
181 |
changes: Changes performed to the workspace.
|
@@ -197,6 +197,7 @@ async def workspace_changed(name: str, changes: pycrdt.MapEvent, ws_crdt: pycrdt
|
|
197 |
getattr(change, "keys", {}).get("__execution_delay", {}).get("newValue", 0)
|
198 |
for change in changes
|
199 |
)
|
|
|
200 |
if delay:
|
201 |
task = asyncio.create_task(execute(name, ws_crdt, ws_pyd, delay))
|
202 |
delayed_executions[name] = task
|
@@ -224,17 +225,15 @@ async def execute(
|
|
224 |
assert path.is_relative_to(config.DATA_PATH), "Provided workspace path is invalid"
|
225 |
# Save user changes before executing, in case the execution fails.
|
226 |
workspace.save(ws_pyd, path)
|
227 |
-
await workspace.execute(ws_pyd)
|
228 |
-
workspace.save(ws_pyd, path)
|
229 |
-
# Execution happened on the Python object, we need to replicate
|
230 |
-
# the results to the CRDT object.
|
231 |
with ws_crdt.doc.transaction():
|
232 |
for nc, np in zip(ws_crdt["nodes"], ws_pyd.nodes):
|
233 |
if "data" not in nc:
|
234 |
nc["data"] = pycrdt.Map()
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
|
|
238 |
|
239 |
|
240 |
@contextlib.asynccontextmanager
|
|
|
86 |
for node in ws_pyd.nodes:
|
87 |
node.data.display = None
|
88 |
node.data.error = None
|
89 |
+
node.data.status = workspace.NodeStatus.done
|
90 |
node.position.x = 0
|
91 |
node.position.y = 0
|
92 |
if node.model_extra:
|
|
|
176 |
async def workspace_changed(name: str, changes: pycrdt.MapEvent, ws_crdt: pycrdt.Map):
|
177 |
"""Callback to react to changes in the workspace.
|
178 |
|
|
|
179 |
Args:
|
180 |
name: Name of the workspace.
|
181 |
changes: Changes performed to the workspace.
|
|
|
197 |
getattr(change, "keys", {}).get("__execution_delay", {}).get("newValue", 0)
|
198 |
for change in changes
|
199 |
)
|
200 |
+
print(f"Running {name} in {ws_pyd.env}...")
|
201 |
if delay:
|
202 |
task = asyncio.create_task(execute(name, ws_crdt, ws_pyd, delay))
|
203 |
delayed_executions[name] = task
|
|
|
225 |
assert path.is_relative_to(config.DATA_PATH), "Provided workspace path is invalid"
|
226 |
# Save user changes before executing, in case the execution fails.
|
227 |
workspace.save(ws_pyd, path)
|
|
|
|
|
|
|
|
|
228 |
with ws_crdt.doc.transaction():
|
229 |
for nc, np in zip(ws_crdt["nodes"], ws_pyd.nodes):
|
230 |
if "data" not in nc:
|
231 |
nc["data"] = pycrdt.Map()
|
232 |
+
nc["data"]["status"] = "planned"
|
233 |
+
# Nodes get a reference to their CRDT maps, so they can update them as the results come in.
|
234 |
+
np._crdt = nc
|
235 |
+
await workspace.execute(ws_pyd)
|
236 |
+
workspace.save(ws_pyd, path)
|
237 |
|
238 |
|
239 |
@contextlib.asynccontextmanager
|
lynxkite-app/src/lynxkite_app/main.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
"""The FastAPI server for serving the LynxKite application."""
|
2 |
|
3 |
-
import os
|
4 |
import shutil
|
5 |
import pydantic
|
6 |
import fastapi
|
@@ -13,11 +12,6 @@ from lynxkite.core import ops
|
|
13 |
from lynxkite.core import workspace
|
14 |
from . import crdt, config
|
15 |
|
16 |
-
if os.environ.get("NX_CUGRAPH_AUTOCONFIG", "").strip().lower() == "true":
|
17 |
-
import cudf.pandas
|
18 |
-
|
19 |
-
cudf.pandas.install()
|
20 |
-
|
21 |
|
22 |
def detect_plugins():
|
23 |
plugins = {}
|
|
|
1 |
"""The FastAPI server for serving the LynxKite application."""
|
2 |
|
|
|
3 |
import shutil
|
4 |
import pydantic
|
5 |
import fastapi
|
|
|
12 |
from lynxkite.core import workspace
|
13 |
from . import crdt, config
|
14 |
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def detect_plugins():
|
17 |
plugins = {}
|
lynxkite-app/web/src/apiTypes.ts
CHANGED
@@ -41,6 +41,7 @@ export interface WorkspaceNodeData {
|
|
41 |
};
|
42 |
display?: unknown;
|
43 |
error?: string | null;
|
|
|
44 |
[k: string]: unknown;
|
45 |
}
|
46 |
export interface Position {
|
|
|
41 |
};
|
42 |
display?: unknown;
|
43 |
error?: string | null;
|
44 |
+
in_progress?: boolean;
|
45 |
[k: string]: unknown;
|
46 |
}
|
47 |
export interface Position {
|
lynxkite-app/web/src/index.css
CHANGED
@@ -90,9 +90,33 @@ body {
|
|
90 |
}
|
91 |
|
92 |
.lynxkite-node .title {
|
93 |
-
/* background: oklch(75% 0.2 55); */
|
94 |
font-weight: bold;
|
95 |
padding: 8px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
}
|
97 |
|
98 |
.handle-name {
|
@@ -322,6 +346,38 @@ body {
|
|
322 |
}
|
323 |
}
|
324 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
.react-flow__edge.selected path.react-flow__edge-path {
|
326 |
outline: var(--xy-selection-border, var(--xy-selection-border-default));
|
327 |
outline-offset: 10px;
|
@@ -379,13 +435,15 @@ body {
|
|
379 |
display: flex;
|
380 |
justify-content: space-between;
|
381 |
padding: 8px 12px;
|
382 |
-
|
|
|
383 |
}
|
384 |
|
385 |
/* Alternating background colors for table-like effect */
|
386 |
.graph-creation-view .df-head:nth-child(odd) {
|
387 |
background-color: #f9f9f9;
|
388 |
}
|
|
|
389 |
.graph-creation-view .df-head:nth-child(even) {
|
390 |
background-color: #e0e0e0;
|
391 |
}
|
@@ -393,7 +451,8 @@ body {
|
|
393 |
.graph-relation-attributes {
|
394 |
display: flex;
|
395 |
flex-direction: column;
|
396 |
-
|
|
|
397 |
width: 100%;
|
398 |
}
|
399 |
|
@@ -402,7 +461,8 @@ body {
|
|
402 |
font-weight: bold;
|
403 |
display: block;
|
404 |
margin-bottom: 2px;
|
405 |
-
|
|
|
406 |
}
|
407 |
|
408 |
.graph-relation-attributes input {
|
@@ -415,7 +475,8 @@ body {
|
|
415 |
}
|
416 |
|
417 |
.graph-relation-attributes input:focus {
|
418 |
-
|
|
|
419 |
}
|
420 |
|
421 |
.add-relationship-button {
|
|
|
90 |
}
|
91 |
|
92 |
.lynxkite-node .title {
|
|
|
93 |
font-weight: bold;
|
94 |
padding: 8px;
|
95 |
+
background-image: linear-gradient(
|
96 |
+
to right,
|
97 |
+
var(--status-color-1),
|
98 |
+
var(--status-color-2),
|
99 |
+
var(--status-color-3)
|
100 |
+
);
|
101 |
+
background-size: 180% 180%;
|
102 |
+
--status-color-1: oklch(75% 0.2 55);
|
103 |
+
--status-color-2: oklch(75% 0.2 55);
|
104 |
+
--status-color-3: oklch(75% 0.2 55);
|
105 |
+
transition: --status-color-1 0.3s, --status-color-2 0.3s, --status-color-3
|
106 |
+
0.3s;
|
107 |
+
}
|
108 |
+
|
109 |
+
.lynxkite-node .title.active {
|
110 |
+
--status-color-1: oklch(75% 0.2 55);
|
111 |
+
--status-color-2: oklch(90% 0.2 55);
|
112 |
+
--status-color-3: oklch(75% 0.1 55);
|
113 |
+
/* animation: active-node-gradient-animation 2s ease-in-out infinite; */
|
114 |
+
}
|
115 |
+
|
116 |
+
.lynxkite-node .title.planned {
|
117 |
+
--status-color-1: oklch(75% 0.1 55);
|
118 |
+
--status-color-2: oklch(75% 0.1 55);
|
119 |
+
--status-color-3: oklch(75% 0.1 55);
|
120 |
}
|
121 |
|
122 |
.handle-name {
|
|
|
346 |
}
|
347 |
}
|
348 |
|
349 |
+
@keyframes active-node-gradient-animation {
|
350 |
+
0% {
|
351 |
+
background-position-x: 100%;
|
352 |
+
}
|
353 |
+
|
354 |
+
50% {
|
355 |
+
background-position-x: 0%;
|
356 |
+
}
|
357 |
+
|
358 |
+
100% {
|
359 |
+
background-position-x: 100%;
|
360 |
+
}
|
361 |
+
}
|
362 |
+
|
363 |
+
@property --status-color-1 {
|
364 |
+
syntax: "<color>";
|
365 |
+
initial-value: red;
|
366 |
+
inherits: false;
|
367 |
+
}
|
368 |
+
|
369 |
+
@property --status-color-2 {
|
370 |
+
syntax: "<color>";
|
371 |
+
initial-value: red;
|
372 |
+
inherits: false;
|
373 |
+
}
|
374 |
+
|
375 |
+
@property --status-color-3 {
|
376 |
+
syntax: "<color>";
|
377 |
+
initial-value: red;
|
378 |
+
inherits: false;
|
379 |
+
}
|
380 |
+
|
381 |
.react-flow__edge.selected path.react-flow__edge-path {
|
382 |
outline: var(--xy-selection-border, var(--xy-selection-border-default));
|
383 |
outline-offset: 10px;
|
|
|
435 |
display: flex;
|
436 |
justify-content: space-between;
|
437 |
padding: 8px 12px;
|
438 |
+
/* Adds a separator between rows */
|
439 |
+
border-bottom: 1px solid #ccc;
|
440 |
}
|
441 |
|
442 |
/* Alternating background colors for table-like effect */
|
443 |
.graph-creation-view .df-head:nth-child(odd) {
|
444 |
background-color: #f9f9f9;
|
445 |
}
|
446 |
+
|
447 |
.graph-creation-view .df-head:nth-child(even) {
|
448 |
background-color: #e0e0e0;
|
449 |
}
|
|
|
451 |
.graph-relation-attributes {
|
452 |
display: flex;
|
453 |
flex-direction: column;
|
454 |
+
/* Adds space between each label-input pair */
|
455 |
+
gap: 10px;
|
456 |
width: 100%;
|
457 |
}
|
458 |
|
|
|
461 |
font-weight: bold;
|
462 |
display: block;
|
463 |
margin-bottom: 2px;
|
464 |
+
/* Lighter text for labels */
|
465 |
+
color: #666;
|
466 |
}
|
467 |
|
468 |
.graph-relation-attributes input {
|
|
|
475 |
}
|
476 |
|
477 |
.graph-relation-attributes input:focus {
|
478 |
+
/* Highlight input on focus */
|
479 |
+
border-color: #007bff;
|
480 |
}
|
481 |
|
482 |
.add-relationship-button {
|
lynxkite-app/web/src/workspace/nodes/LynxKiteNode.tsx
CHANGED
@@ -71,7 +71,10 @@ export default function LynxKiteNode(props: LynxKiteNodeProps) {
|
|
71 |
}}
|
72 |
>
|
73 |
<div className="lynxkite-node" style={props.nodeStyle}>
|
74 |
-
<div
|
|
|
|
|
|
|
75 |
{data.title}
|
76 |
{data.error && <span className="title-icon">⚠️</span>}
|
77 |
{expanded || <span className="title-icon">⋯</span>}
|
|
|
71 |
}}
|
72 |
>
|
73 |
<div className="lynxkite-node" style={props.nodeStyle}>
|
74 |
+
<div
|
75 |
+
className={`title bg-primary ${data.status}`}
|
76 |
+
onClick={titleClicked}
|
77 |
+
>
|
78 |
{data.title}
|
79 |
{data.error && <span className="title-icon">⚠️</span>}
|
80 |
{expanded || <span className="title-icon">⋯</span>}
|
lynxkite-core/src/lynxkite/core/executors/one_by_one.py
CHANGED
@@ -104,11 +104,11 @@ async def execute(ws: workspace.Workspace, catalog, cache=None):
|
|
104 |
tasks = {}
|
105 |
NO_INPUT = object() # Marker for initial tasks.
|
106 |
for node in ws.nodes:
|
107 |
-
node.data.error = None
|
108 |
op = catalog.get(node.data.title)
|
109 |
if op is None:
|
110 |
-
node.
|
111 |
continue
|
|
|
112 |
# Start tasks for nodes that have no non-batch inputs.
|
113 |
if all([i.position in "top or bottom" for i in op.inputs.values()]):
|
114 |
tasks[node.id] = [NO_INPUT]
|
@@ -123,12 +123,12 @@ async def execute(ws: workspace.Workspace, catalog, cache=None):
|
|
123 |
next_stage.setdefault(n, []).extend(ts)
|
124 |
continue
|
125 |
node = nodes[n]
|
126 |
-
|
127 |
-
|
128 |
-
params = {**data.params}
|
129 |
if has_ctx(op):
|
130 |
params["_ctx"] = contexts[node.id]
|
131 |
results = []
|
|
|
132 |
for task in ts:
|
133 |
try:
|
134 |
inputs = []
|
@@ -150,7 +150,7 @@ async def execute(ws: workspace.Workspace, catalog, cache=None):
|
|
150 |
output = await await_if_needed(result.output)
|
151 |
except Exception as e:
|
152 |
traceback.print_exc()
|
153 |
-
|
154 |
break
|
155 |
contexts[node.id].last_result = output
|
156 |
# Returned lists and DataFrames are considered multiple tasks.
|
@@ -161,7 +161,7 @@ async def execute(ws: workspace.Workspace, catalog, cache=None):
|
|
161 |
results.extend(output)
|
162 |
else: # Finished all tasks without errors.
|
163 |
if result.display:
|
164 |
-
|
165 |
for edge in edges[node.id]:
|
166 |
t = nodes[edge.target]
|
167 |
op = catalog[t.data.title]
|
@@ -172,5 +172,6 @@ async def execute(ws: workspace.Workspace, catalog, cache=None):
|
|
172 |
).extend(results)
|
173 |
else:
|
174 |
tasks.setdefault(edge.target, []).extend(results)
|
|
|
175 |
tasks = next_stage
|
176 |
return contexts
|
|
|
104 |
tasks = {}
|
105 |
NO_INPUT = object() # Marker for initial tasks.
|
106 |
for node in ws.nodes:
|
|
|
107 |
op = catalog.get(node.data.title)
|
108 |
if op is None:
|
109 |
+
node.publish_error(f'Operation "{node.data.title}" not found.')
|
110 |
continue
|
111 |
+
node.publish_error(None)
|
112 |
# Start tasks for nodes that have no non-batch inputs.
|
113 |
if all([i.position in "top or bottom" for i in op.inputs.values()]):
|
114 |
tasks[node.id] = [NO_INPUT]
|
|
|
123 |
next_stage.setdefault(n, []).extend(ts)
|
124 |
continue
|
125 |
node = nodes[n]
|
126 |
+
op = catalog[node.data.title]
|
127 |
+
params = {**node.data.params}
|
|
|
128 |
if has_ctx(op):
|
129 |
params["_ctx"] = contexts[node.id]
|
130 |
results = []
|
131 |
+
node.publish_started()
|
132 |
for task in ts:
|
133 |
try:
|
134 |
inputs = []
|
|
|
150 |
output = await await_if_needed(result.output)
|
151 |
except Exception as e:
|
152 |
traceback.print_exc()
|
153 |
+
node.publish_error(e)
|
154 |
break
|
155 |
contexts[node.id].last_result = output
|
156 |
# Returned lists and DataFrames are considered multiple tasks.
|
|
|
161 |
results.extend(output)
|
162 |
else: # Finished all tasks without errors.
|
163 |
if result.display:
|
164 |
+
result.display = await await_if_needed(result.display)
|
165 |
for edge in edges[node.id]:
|
166 |
t = nodes[edge.target]
|
167 |
op = catalog[t.data.title]
|
|
|
172 |
).extend(results)
|
173 |
else:
|
174 |
tasks.setdefault(edge.target, []).extend(results)
|
175 |
+
node.publish_result(result)
|
176 |
tasks = next_stage
|
177 |
return contexts
|
lynxkite-core/src/lynxkite/core/ops.py
CHANGED
@@ -9,6 +9,9 @@ import typing
|
|
9 |
from dataclasses import dataclass
|
10 |
from typing_extensions import Annotated
|
11 |
|
|
|
|
|
|
|
12 |
CATALOGS = {}
|
13 |
EXECUTORS = {}
|
14 |
|
@@ -94,8 +97,9 @@ class Result:
|
|
94 |
JSON-serializable.
|
95 |
"""
|
96 |
|
97 |
-
output: typing.Any
|
98 |
display: ReadOnlyJSON | None = None
|
|
|
99 |
|
100 |
|
101 |
MULTI_INPUT = Input(name="multi", type="*")
|
@@ -232,9 +236,15 @@ def register_passive_op(env: str, name: str, inputs=[], outputs=["output"], para
|
|
232 |
|
233 |
|
234 |
def register_executor(env: str):
|
235 |
-
"""Decorator for registering an executor.
|
236 |
|
237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
EXECUTORS[env] = func
|
239 |
return func
|
240 |
|
|
|
9 |
from dataclasses import dataclass
|
10 |
from typing_extensions import Annotated
|
11 |
|
12 |
+
if typing.TYPE_CHECKING:
|
13 |
+
from . import workspace
|
14 |
+
|
15 |
CATALOGS = {}
|
16 |
EXECUTORS = {}
|
17 |
|
|
|
97 |
JSON-serializable.
|
98 |
"""
|
99 |
|
100 |
+
output: typing.Any = None
|
101 |
display: ReadOnlyJSON | None = None
|
102 |
+
error: str | None = None
|
103 |
|
104 |
|
105 |
MULTI_INPUT = Input(name="multi", type="*")
|
|
|
236 |
|
237 |
|
238 |
def register_executor(env: str):
|
239 |
+
"""Decorator for registering an executor.
|
240 |
|
241 |
+
The executor is a function that takes a workspace and executes the operations in it.
|
242 |
+
When it starts executing an operation, it should call `node.publish_started()` to indicate
|
243 |
+
the status on the UI. When the execution is finished, it should call `node.publish_result()`.
|
244 |
+
This will update the UI with the result of the operation.
|
245 |
+
"""
|
246 |
+
|
247 |
+
def decorator(func: typing.Callable[[workspace.Workspace], typing.Any]):
|
248 |
EXECUTORS[env] = func
|
249 |
return func
|
250 |
|
lynxkite-core/src/lynxkite/core/workspace.py
CHANGED
@@ -3,7 +3,9 @@
|
|
3 |
import json
|
4 |
from typing import Optional
|
5 |
import dataclasses
|
|
|
6 |
import os
|
|
|
7 |
import pydantic
|
8 |
import tempfile
|
9 |
from . import ops
|
@@ -20,11 +22,18 @@ class Position(BaseConfig):
|
|
20 |
y: float
|
21 |
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
class WorkspaceNodeData(BaseConfig):
|
24 |
title: str
|
25 |
params: dict
|
26 |
display: Optional[object] = None
|
27 |
error: Optional[str] = None
|
|
|
28 |
# Also contains a "meta" field when going out.
|
29 |
# This is ignored when coming back from the frontend.
|
30 |
|
@@ -36,6 +45,32 @@ class WorkspaceNode(BaseConfig):
|
|
36 |
type: str
|
37 |
data: WorkspaceNodeData
|
38 |
position: Position
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
class WorkspaceEdge(BaseConfig):
|
|
|
3 |
import json
|
4 |
from typing import Optional
|
5 |
import dataclasses
|
6 |
+
import enum
|
7 |
import os
|
8 |
+
import pycrdt
|
9 |
import pydantic
|
10 |
import tempfile
|
11 |
from . import ops
|
|
|
22 |
y: float
|
23 |
|
24 |
|
25 |
+
class NodeStatus(str, enum.Enum):
|
26 |
+
planned = "planned"
|
27 |
+
active = "active"
|
28 |
+
done = "done"
|
29 |
+
|
30 |
+
|
31 |
class WorkspaceNodeData(BaseConfig):
|
32 |
title: str
|
33 |
params: dict
|
34 |
display: Optional[object] = None
|
35 |
error: Optional[str] = None
|
36 |
+
status: NodeStatus = NodeStatus.done
|
37 |
# Also contains a "meta" field when going out.
|
38 |
# This is ignored when coming back from the frontend.
|
39 |
|
|
|
45 |
type: str
|
46 |
data: WorkspaceNodeData
|
47 |
position: Position
|
48 |
+
_crdt: pycrdt.Map
|
49 |
+
|
50 |
+
def publish_started(self):
|
51 |
+
"""Notifies the frontend that work has started on this node."""
|
52 |
+
self.data.error = None
|
53 |
+
self.data.status = NodeStatus.active
|
54 |
+
if hasattr(self, "_crdt"):
|
55 |
+
with self._crdt.doc.transaction():
|
56 |
+
self._crdt["data"]["error"] = None
|
57 |
+
self._crdt["data"]["status"] = NodeStatus.active
|
58 |
+
|
59 |
+
def publish_result(self, result: ops.Result):
|
60 |
+
"""Sends the result to the frontend. Call this in an executor when the result is available."""
|
61 |
+
self.data.display = result.display
|
62 |
+
self.data.error = result.error
|
63 |
+
self.data.status = NodeStatus.done
|
64 |
+
if hasattr(self, "_crdt"):
|
65 |
+
with self._crdt.doc.transaction():
|
66 |
+
self._crdt["data"]["display"] = result.display
|
67 |
+
self._crdt["data"]["error"] = result.error
|
68 |
+
self._crdt["data"]["status"] = NodeStatus.done
|
69 |
+
|
70 |
+
def publish_error(self, error: Exception | str | None):
|
71 |
+
"""Can be called with None to clear the error state."""
|
72 |
+
result = ops.Result(error=str(error) if error else None)
|
73 |
+
self.publish_result(result)
|
74 |
|
75 |
|
76 |
class WorkspaceEdge(BaseConfig):
|
lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py
CHANGED
@@ -1,3 +1,16 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from . import networkx_ops # noqa (imported to trigger registration)
|
3 |
from . import pytorch_model_ops # noqa (imported to trigger registration)
|
|
|
1 |
+
"""Graph analytics environment for LynxKite. The core types and functions are imported here for easy access."""
|
2 |
+
|
3 |
+
import os
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
if os.environ.get("NX_CUGRAPH_AUTOCONFIG", "").strip().lower() == "true":
|
7 |
+
import cudf.pandas
|
8 |
+
|
9 |
+
cudf.pandas.install()
|
10 |
+
|
11 |
+
pd.options.mode.copy_on_write = True # Prepare for Pandas 3.0.
|
12 |
+
|
13 |
+
from .core import * # noqa (easier access for core classes)
|
14 |
+
from . import lynxkite_ops # noqa (imported to trigger registration)
|
15 |
from . import networkx_ops # noqa (imported to trigger registration)
|
16 |
from . import pytorch_model_ops # noqa (imported to trigger registration)
|
lynxkite-graph-analytics/src/lynxkite_graph_analytics/core.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Graph analytics executor and data types."""
|
2 |
+
|
3 |
+
from lynxkite.core import ops
|
4 |
+
import dataclasses
|
5 |
+
import functools
|
6 |
+
import networkx as nx
|
7 |
+
import pandas as pd
|
8 |
+
import polars as pl
|
9 |
+
import traceback
|
10 |
+
import typing
|
11 |
+
|
12 |
+
|
13 |
+
ENV = "LynxKite Graph Analytics"
|
14 |
+
|
15 |
+
|
16 |
+
@dataclasses.dataclass
|
17 |
+
class RelationDefinition:
|
18 |
+
"""Defines a set of edges."""
|
19 |
+
|
20 |
+
df: str # The DataFrame that contains the edges.
|
21 |
+
source_column: (
|
22 |
+
str # The column in the edge DataFrame that contains the source node ID.
|
23 |
+
)
|
24 |
+
target_column: (
|
25 |
+
str # The column in the edge DataFrame that contains the target node ID.
|
26 |
+
)
|
27 |
+
source_table: str # The DataFrame that contains the source nodes.
|
28 |
+
target_table: str # The DataFrame that contains the target nodes.
|
29 |
+
source_key: str # The column in the source table that contains the node ID.
|
30 |
+
target_key: str # The column in the target table that contains the node ID.
|
31 |
+
name: str | None = None # Descriptive name for the relation.
|
32 |
+
|
33 |
+
|
34 |
+
@dataclasses.dataclass
|
35 |
+
class Bundle:
|
36 |
+
"""A collection of DataFrames and other data.
|
37 |
+
|
38 |
+
Can efficiently represent a knowledge graph (homogeneous or heterogeneous) or tabular data.
|
39 |
+
It can also carry other data, such as a trained model.
|
40 |
+
"""
|
41 |
+
|
42 |
+
dfs: dict[str, pd.DataFrame] = dataclasses.field(default_factory=dict)
|
43 |
+
relations: list[RelationDefinition] = dataclasses.field(default_factory=list)
|
44 |
+
other: dict[str, typing.Any] = None
|
45 |
+
|
46 |
+
@classmethod
|
47 |
+
def from_nx(cls, graph: nx.Graph):
|
48 |
+
edges = nx.to_pandas_edgelist(graph)
|
49 |
+
d = dict(graph.nodes(data=True))
|
50 |
+
nodes = pd.DataFrame(d.values(), index=d.keys())
|
51 |
+
nodes["id"] = nodes.index
|
52 |
+
if "index" in nodes.columns:
|
53 |
+
nodes.drop(columns=["index"], inplace=True)
|
54 |
+
return cls(
|
55 |
+
dfs={"edges": edges, "nodes": nodes},
|
56 |
+
relations=[
|
57 |
+
RelationDefinition(
|
58 |
+
df="edges",
|
59 |
+
source_column="source",
|
60 |
+
target_column="target",
|
61 |
+
source_table="nodes",
|
62 |
+
target_table="nodes",
|
63 |
+
source_key="id",
|
64 |
+
target_key="id",
|
65 |
+
)
|
66 |
+
],
|
67 |
+
)
|
68 |
+
|
69 |
+
@classmethod
|
70 |
+
def from_df(cls, df: pd.DataFrame):
|
71 |
+
return cls(dfs={"df": df})
|
72 |
+
|
73 |
+
def to_nx(self):
|
74 |
+
# TODO: Use relations.
|
75 |
+
graph = nx.DiGraph()
|
76 |
+
if "nodes" in self.dfs:
|
77 |
+
df = self.dfs["nodes"]
|
78 |
+
if df.index.name != "id":
|
79 |
+
df = df.set_index("id")
|
80 |
+
graph.add_nodes_from(df.to_dict("index").items())
|
81 |
+
if "edges" in self.dfs:
|
82 |
+
edges = self.dfs["edges"]
|
83 |
+
graph.add_edges_from(
|
84 |
+
[
|
85 |
+
(
|
86 |
+
e["source"],
|
87 |
+
e["target"],
|
88 |
+
{
|
89 |
+
k: e[k]
|
90 |
+
for k in edges.columns
|
91 |
+
if k not in ["source", "target"]
|
92 |
+
},
|
93 |
+
)
|
94 |
+
for e in edges.to_records()
|
95 |
+
]
|
96 |
+
)
|
97 |
+
return graph
|
98 |
+
|
99 |
+
def copy(self):
|
100 |
+
"""Returns a medium depth copy of the bundle. The Bundle is completely new, but the DataFrames and RelationDefinitions are shared."""
|
101 |
+
return Bundle(
|
102 |
+
dfs=dict(self.dfs),
|
103 |
+
relations=list(self.relations),
|
104 |
+
other=dict(self.other) if self.other else None,
|
105 |
+
)
|
106 |
+
|
107 |
+
def to_dict(self, limit: int = 100):
|
108 |
+
return {
|
109 |
+
"dataframes": {
|
110 |
+
name: {
|
111 |
+
"columns": [str(c) for c in df.columns],
|
112 |
+
"data": df_for_frontend(df, limit).values.tolist(),
|
113 |
+
}
|
114 |
+
for name, df in self.dfs.items()
|
115 |
+
},
|
116 |
+
"relations": [dataclasses.asdict(relation) for relation in self.relations],
|
117 |
+
"other": self.other,
|
118 |
+
}
|
119 |
+
|
120 |
+
|
121 |
+
def nx_node_attribute_func(name):
|
122 |
+
"""Decorator for wrapping a function that adds a NetworkX node attribute."""
|
123 |
+
|
124 |
+
def decorator(func):
|
125 |
+
@functools.wraps(func)
|
126 |
+
def wrapper(graph: nx.Graph, **kwargs):
|
127 |
+
graph = graph.copy()
|
128 |
+
attr = func(graph, **kwargs)
|
129 |
+
nx.set_node_attributes(graph, attr, name)
|
130 |
+
return graph
|
131 |
+
|
132 |
+
return wrapper
|
133 |
+
|
134 |
+
return decorator
|
135 |
+
|
136 |
+
|
137 |
+
def disambiguate_edges(ws):
|
138 |
+
"""If an input plug is connected to multiple edges, keep only the last edge."""
|
139 |
+
seen = set()
|
140 |
+
for edge in reversed(ws.edges):
|
141 |
+
if (edge.target, edge.targetHandle) in seen:
|
142 |
+
ws.edges.remove(edge)
|
143 |
+
seen.add((edge.target, edge.targetHandle))
|
144 |
+
|
145 |
+
|
146 |
+
@ops.register_executor(ENV)
|
147 |
+
async def execute(ws):
|
148 |
+
catalog: dict[str, ops.Op] = ops.CATALOGS[ws.env]
|
149 |
+
disambiguate_edges(ws)
|
150 |
+
outputs = {}
|
151 |
+
failed = 0
|
152 |
+
while len(outputs) + failed < len(ws.nodes):
|
153 |
+
for node in ws.nodes:
|
154 |
+
if node.id in outputs:
|
155 |
+
continue
|
156 |
+
# TODO: Take the input/output handles into account.
|
157 |
+
inputs = [edge.source for edge in ws.edges if edge.target == node.id]
|
158 |
+
if all(input in outputs for input in inputs):
|
159 |
+
# All inputs for this node are ready, we can compute the output.
|
160 |
+
inputs = [outputs[input] for input in inputs]
|
161 |
+
params = {**node.data.params}
|
162 |
+
op = catalog.get(node.data.title)
|
163 |
+
if not op:
|
164 |
+
node.publish_error("Operation not found in catalog")
|
165 |
+
failed += 1
|
166 |
+
continue
|
167 |
+
node.publish_started()
|
168 |
+
try:
|
169 |
+
# Convert inputs types to match operation signature.
|
170 |
+
for i, (x, p) in enumerate(zip(inputs, op.inputs.values())):
|
171 |
+
if p.type == nx.Graph and isinstance(x, Bundle):
|
172 |
+
inputs[i] = x.to_nx()
|
173 |
+
elif p.type == Bundle and isinstance(x, nx.Graph):
|
174 |
+
inputs[i] = Bundle.from_nx(x)
|
175 |
+
elif p.type == Bundle and isinstance(x, pd.DataFrame):
|
176 |
+
inputs[i] = Bundle.from_df(x)
|
177 |
+
result = op(*inputs, **params)
|
178 |
+
except Exception as e:
|
179 |
+
traceback.print_exc()
|
180 |
+
node.publish_error(e)
|
181 |
+
failed += 1
|
182 |
+
continue
|
183 |
+
outputs[node.id] = result.output
|
184 |
+
node.publish_result(result)
|
185 |
+
|
186 |
+
|
187 |
+
def df_for_frontend(df: pd.DataFrame, limit: int) -> pd.DataFrame:
|
188 |
+
"""Returns a DataFrame with values that are safe to send to the frontend."""
|
189 |
+
df = df[:limit]
|
190 |
+
if isinstance(df, pl.LazyFrame):
|
191 |
+
df = df.collect()
|
192 |
+
if isinstance(df, pl.DataFrame):
|
193 |
+
df = df.to_pandas()
|
194 |
+
# Convert non-numeric columns to strings.
|
195 |
+
for c in df.columns:
|
196 |
+
if not pd.api.types.is_numeric_dtype(df[c]):
|
197 |
+
df[c] = df[c].astype(str)
|
198 |
+
return df
|
lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py
CHANGED
@@ -1,201 +1,21 @@
|
|
1 |
-
"""Graph analytics operations.
|
2 |
|
3 |
import os
|
4 |
import fsspec
|
5 |
from lynxkite.core import ops
|
6 |
from collections import deque
|
7 |
-
import
|
8 |
-
import functools
|
9 |
import grandcypher
|
10 |
import joblib
|
11 |
import matplotlib
|
12 |
import networkx as nx
|
13 |
import pandas as pd
|
14 |
import polars as pl
|
15 |
-
import traceback
|
16 |
-
import typing
|
17 |
import json
|
18 |
|
19 |
|
20 |
mem = joblib.Memory("../joblib-cache")
|
21 |
-
|
22 |
-
op = ops.op_registration(ENV)
|
23 |
-
|
24 |
-
|
25 |
-
@dataclasses.dataclass
|
26 |
-
class RelationDefinition:
|
27 |
-
"""Defines a set of edges."""
|
28 |
-
|
29 |
-
df: str # The DataFrame that contains the edges.
|
30 |
-
source_column: (
|
31 |
-
str # The column in the edge DataFrame that contains the source node ID.
|
32 |
-
)
|
33 |
-
target_column: (
|
34 |
-
str # The column in the edge DataFrame that contains the target node ID.
|
35 |
-
)
|
36 |
-
source_table: str # The DataFrame that contains the source nodes.
|
37 |
-
target_table: str # The DataFrame that contains the target nodes.
|
38 |
-
source_key: str # The column in the source table that contains the node ID.
|
39 |
-
target_key: str # The column in the target table that contains the node ID.
|
40 |
-
name: str | None = None # Descriptive name for the relation.
|
41 |
-
|
42 |
-
|
43 |
-
@dataclasses.dataclass
|
44 |
-
class Bundle:
|
45 |
-
"""A collection of DataFrames and other data.
|
46 |
-
|
47 |
-
Can efficiently represent a knowledge graph (homogeneous or heterogeneous) or tabular data.
|
48 |
-
It can also carry other data, such as a trained model.
|
49 |
-
"""
|
50 |
-
|
51 |
-
dfs: dict[str, pd.DataFrame] = dataclasses.field(default_factory=dict)
|
52 |
-
relations: list[RelationDefinition] = dataclasses.field(default_factory=list)
|
53 |
-
other: dict[str, typing.Any] = None
|
54 |
-
|
55 |
-
@classmethod
|
56 |
-
def from_nx(cls, graph: nx.Graph):
|
57 |
-
edges = nx.to_pandas_edgelist(graph)
|
58 |
-
d = dict(graph.nodes(data=True))
|
59 |
-
nodes = pd.DataFrame(d.values(), index=d.keys())
|
60 |
-
nodes["id"] = nodes.index
|
61 |
-
if "index" in nodes.columns:
|
62 |
-
nodes.drop(columns=["index"], inplace=True)
|
63 |
-
return cls(
|
64 |
-
dfs={"edges": edges, "nodes": nodes},
|
65 |
-
relations=[
|
66 |
-
RelationDefinition(
|
67 |
-
df="edges",
|
68 |
-
source_column="source",
|
69 |
-
target_column="target",
|
70 |
-
source_table="nodes",
|
71 |
-
target_table="nodes",
|
72 |
-
source_key="id",
|
73 |
-
target_key="id",
|
74 |
-
)
|
75 |
-
],
|
76 |
-
)
|
77 |
-
|
78 |
-
@classmethod
|
79 |
-
def from_df(cls, df: pd.DataFrame):
|
80 |
-
return cls(dfs={"df": df})
|
81 |
-
|
82 |
-
def to_nx(self):
|
83 |
-
# TODO: Use relations.
|
84 |
-
graph = nx.DiGraph()
|
85 |
-
if "nodes" in self.dfs:
|
86 |
-
df = self.dfs["nodes"]
|
87 |
-
if df.index.name != "id":
|
88 |
-
df = df.set_index("id")
|
89 |
-
graph.add_nodes_from(df.to_dict("index").items())
|
90 |
-
if "edges" in self.dfs:
|
91 |
-
edges = self.dfs["edges"]
|
92 |
-
graph.add_edges_from(
|
93 |
-
[
|
94 |
-
(
|
95 |
-
e["source"],
|
96 |
-
e["target"],
|
97 |
-
{
|
98 |
-
k: e[k]
|
99 |
-
for k in edges.columns
|
100 |
-
if k not in ["source", "target"]
|
101 |
-
},
|
102 |
-
)
|
103 |
-
for e in edges.to_records()
|
104 |
-
]
|
105 |
-
)
|
106 |
-
return graph
|
107 |
-
|
108 |
-
def copy(self):
|
109 |
-
"""Returns a medium depth copy of the bundle. The Bundle is completely new, but the DataFrames and RelationDefinitions are shared."""
|
110 |
-
return Bundle(
|
111 |
-
dfs=dict(self.dfs),
|
112 |
-
relations=list(self.relations),
|
113 |
-
other=dict(self.other) if self.other else None,
|
114 |
-
)
|
115 |
-
|
116 |
-
def to_dict(self, limit: int = 100):
|
117 |
-
return {
|
118 |
-
"dataframes": {
|
119 |
-
name: {
|
120 |
-
"columns": [str(c) for c in df.columns],
|
121 |
-
"data": df_for_frontend(df, limit).values.tolist(),
|
122 |
-
}
|
123 |
-
for name, df in self.dfs.items()
|
124 |
-
},
|
125 |
-
"relations": [dataclasses.asdict(relation) for relation in self.relations],
|
126 |
-
"other": self.other,
|
127 |
-
}
|
128 |
-
|
129 |
-
|
130 |
-
def nx_node_attribute_func(name):
|
131 |
-
"""Decorator for wrapping a function that adds a NetworkX node attribute."""
|
132 |
-
|
133 |
-
def decorator(func):
|
134 |
-
@functools.wraps(func)
|
135 |
-
def wrapper(graph: nx.Graph, **kwargs):
|
136 |
-
graph = graph.copy()
|
137 |
-
attr = func(graph, **kwargs)
|
138 |
-
nx.set_node_attributes(graph, attr, name)
|
139 |
-
return graph
|
140 |
-
|
141 |
-
return wrapper
|
142 |
-
|
143 |
-
return decorator
|
144 |
-
|
145 |
-
|
146 |
-
def disambiguate_edges(ws):
|
147 |
-
"""If an input plug is connected to multiple edges, keep only the last edge."""
|
148 |
-
seen = set()
|
149 |
-
for edge in reversed(ws.edges):
|
150 |
-
if (edge.target, edge.targetHandle) in seen:
|
151 |
-
ws.edges.remove(edge)
|
152 |
-
seen.add((edge.target, edge.targetHandle))
|
153 |
-
|
154 |
-
|
155 |
-
@ops.register_executor(ENV)
|
156 |
-
async def execute(ws):
|
157 |
-
catalog: dict[str, ops.Op] = ops.CATALOGS[ENV]
|
158 |
-
disambiguate_edges(ws)
|
159 |
-
outputs = {}
|
160 |
-
failed = 0
|
161 |
-
while len(outputs) + failed < len(ws.nodes):
|
162 |
-
for node in ws.nodes:
|
163 |
-
if node.id in outputs:
|
164 |
-
continue
|
165 |
-
# TODO: Take the input/output handles into account.
|
166 |
-
inputs = [edge.source for edge in ws.edges if edge.target == node.id]
|
167 |
-
if all(input in outputs for input in inputs):
|
168 |
-
# All inputs for this node are ready, we can compute the output.
|
169 |
-
inputs = [outputs[input] for input in inputs]
|
170 |
-
data = node.data
|
171 |
-
params = {**data.params}
|
172 |
-
op = catalog.get(data.title)
|
173 |
-
if not op:
|
174 |
-
data.error = "Operation not found in catalog"
|
175 |
-
failed += 1
|
176 |
-
continue
|
177 |
-
try:
|
178 |
-
# Convert inputs types to match operation signature.
|
179 |
-
for i, (x, p) in enumerate(zip(inputs, op.inputs.values())):
|
180 |
-
if p.type == nx.Graph and isinstance(x, Bundle):
|
181 |
-
inputs[i] = x.to_nx()
|
182 |
-
elif p.type == Bundle and isinstance(x, nx.Graph):
|
183 |
-
inputs[i] = Bundle.from_nx(x)
|
184 |
-
elif p.type == Bundle and isinstance(x, pd.DataFrame):
|
185 |
-
inputs[i] = Bundle.from_df(x)
|
186 |
-
result = op(*inputs, **params)
|
187 |
-
except Exception as e:
|
188 |
-
traceback.print_exc()
|
189 |
-
data.error = str(e)
|
190 |
-
failed += 1
|
191 |
-
continue
|
192 |
-
if len(op.inputs) == 1 and op.inputs.get("multi") == "*":
|
193 |
-
# It's a flexible input. Create n+1 handles.
|
194 |
-
data.inputs = {f"input{i}": None for i in range(len(inputs) + 1)}
|
195 |
-
data.error = None
|
196 |
-
outputs[node.id] = result.output
|
197 |
-
if result.display:
|
198 |
-
data.display = result.display
|
199 |
|
200 |
|
201 |
@op("Import Parquet")
|
@@ -246,14 +66,14 @@ def create_scale_free_graph(*, nodes: int = 10):
|
|
246 |
|
247 |
|
248 |
@op("Compute PageRank")
|
249 |
-
@nx_node_attribute_func("pagerank")
|
250 |
def compute_pagerank(graph: nx.Graph, *, damping=0.85, iterations=100):
|
251 |
# TODO: This requires scipy to be installed.
|
252 |
return nx.pagerank(graph, alpha=damping, max_iter=iterations)
|
253 |
|
254 |
|
255 |
@op("Compute betweenness centrality")
|
256 |
-
@nx_node_attribute_func("betweenness_centrality")
|
257 |
def compute_betweenness_centrality(graph: nx.Graph, *, k=10):
|
258 |
return nx.betweenness_centrality(graph, k=k)
|
259 |
|
@@ -271,7 +91,7 @@ def discard_parallel_edges(graph: nx.Graph):
|
|
271 |
|
272 |
|
273 |
@op("SQL")
|
274 |
-
def sql(bundle: Bundle, *, query: ops.LongStr, save_as: str = "result"):
|
275 |
"""Run a SQL query on the DataFrames in the bundle. Save the results as a new DataFrame."""
|
276 |
bundle = bundle.copy()
|
277 |
if os.environ.get("NX_CUGRAPH_AUTOCONFIG", "").strip().lower() == "true":
|
@@ -292,7 +112,7 @@ def sql(bundle: Bundle, *, query: ops.LongStr, save_as: str = "result"):
|
|
292 |
|
293 |
|
294 |
@op("Cypher")
|
295 |
-
def cypher(bundle: Bundle, *, query: ops.LongStr, save_as: str = "result"):
|
296 |
"""Run a Cypher query on the graph in the bundle. Save the results as a new DataFrame."""
|
297 |
bundle = bundle.copy()
|
298 |
graph = bundle.to_nx()
|
@@ -302,7 +122,7 @@ def cypher(bundle: Bundle, *, query: ops.LongStr, save_as: str = "result"):
|
|
302 |
|
303 |
|
304 |
@op("Organize bundle")
|
305 |
-
def organize_bundle(bundle: Bundle, *, code: ops.LongStr):
|
306 |
"""Lets you rename/copy/delete DataFrames, and modify relations.
|
307 |
|
308 |
TODO: Use a declarative solution instead of Python code. Add UI.
|
@@ -332,7 +152,7 @@ def _map_color(value):
|
|
332 |
if pd.api.types.is_numeric_dtype(value):
|
333 |
cmap = matplotlib.cm.get_cmap("viridis")
|
334 |
value = (value - value.min()) / (value.max() - value.min())
|
335 |
-
rgba = cmap(value)
|
336 |
return [
|
337 |
"#{:02x}{:02x}{:02x}".format(int(r * 255), int(g * 255), int(b * 255))
|
338 |
for r, g, b in rgba[:, :3]
|
@@ -351,13 +171,13 @@ def _map_color(value):
|
|
351 |
|
352 |
@op("Visualize graph", view="visualization")
|
353 |
def visualize_graph(
|
354 |
-
graph: Bundle,
|
355 |
*,
|
356 |
color_nodes_by: ops.NodeAttribute = None,
|
357 |
label_by: ops.NodeAttribute = None,
|
358 |
color_edges_by: ops.EdgeAttribute = None,
|
359 |
):
|
360 |
-
nodes = df_for_frontend(graph.dfs["nodes"], 10_000)
|
361 |
if color_nodes_by:
|
362 |
nodes["color"] = _map_color(nodes[color_nodes_by])
|
363 |
for cols in ["x y", "long lat"]:
|
@@ -387,7 +207,7 @@ def visualize_graph(
|
|
387 |
)
|
388 |
curveness = 0.3
|
389 |
nodes = nodes.to_records()
|
390 |
-
edges = df_for_frontend(
|
391 |
graph.dfs["edges"].drop_duplicates(["source", "target"]), 10_000
|
392 |
)
|
393 |
if color_edges_by:
|
@@ -446,22 +266,8 @@ def visualize_graph(
|
|
446 |
return v
|
447 |
|
448 |
|
449 |
-
def df_for_frontend(df: pd.DataFrame, limit: int) -> pd.DataFrame:
|
450 |
-
"""Returns a DataFrame with values that are safe to send to the frontend."""
|
451 |
-
df = df[:limit]
|
452 |
-
if isinstance(df, pl.LazyFrame):
|
453 |
-
df = df.collect()
|
454 |
-
if isinstance(df, pl.DataFrame):
|
455 |
-
df = df.to_pandas()
|
456 |
-
# Convert non-numeric columns to strings.
|
457 |
-
for c in df.columns:
|
458 |
-
if not pd.api.types.is_numeric_dtype(df[c]):
|
459 |
-
df[c] = df[c].astype(str)
|
460 |
-
return df
|
461 |
-
|
462 |
-
|
463 |
@op("View tables", view="table_view")
|
464 |
-
def view_tables(bundle: Bundle, *, limit: int = 100):
|
465 |
return bundle.to_dict(limit=limit)
|
466 |
|
467 |
|
@@ -470,7 +276,7 @@ def view_tables(bundle: Bundle, *, limit: int = 100):
|
|
470 |
view="graph_creation_view",
|
471 |
outputs=["output"],
|
472 |
)
|
473 |
-
def create_graph(bundle: Bundle, *, relations: str = None) -> Bundle:
|
474 |
"""Replace relations of the given bundle
|
475 |
|
476 |
relations is a stringified JSON, instead of a dict, because complex Yjs types (arrays, maps)
|
@@ -489,6 +295,6 @@ def create_graph(bundle: Bundle, *, relations: str = None) -> Bundle:
|
|
489 |
bundle = bundle.copy()
|
490 |
if not (relations is None or relations.strip() == ""):
|
491 |
bundle.relations = [
|
492 |
-
RelationDefinition(**r) for r in json.loads(relations).values()
|
493 |
]
|
494 |
return ops.Result(output=bundle, display=bundle.to_dict(limit=100))
|
|
|
1 |
+
"""Graph analytics operations."""
|
2 |
|
3 |
import os
|
4 |
import fsspec
|
5 |
from lynxkite.core import ops
|
6 |
from collections import deque
|
7 |
+
from . import core
|
|
|
8 |
import grandcypher
|
9 |
import joblib
|
10 |
import matplotlib
|
11 |
import networkx as nx
|
12 |
import pandas as pd
|
13 |
import polars as pl
|
|
|
|
|
14 |
import json
|
15 |
|
16 |
|
17 |
mem = joblib.Memory("../joblib-cache")
|
18 |
+
op = ops.op_registration(core.ENV)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
|
21 |
@op("Import Parquet")
|
|
|
66 |
|
67 |
|
68 |
@op("Compute PageRank")
|
69 |
+
@core.nx_node_attribute_func("pagerank")
|
70 |
def compute_pagerank(graph: nx.Graph, *, damping=0.85, iterations=100):
|
71 |
# TODO: This requires scipy to be installed.
|
72 |
return nx.pagerank(graph, alpha=damping, max_iter=iterations)
|
73 |
|
74 |
|
75 |
@op("Compute betweenness centrality")
|
76 |
+
@core.nx_node_attribute_func("betweenness_centrality")
|
77 |
def compute_betweenness_centrality(graph: nx.Graph, *, k=10):
|
78 |
return nx.betweenness_centrality(graph, k=k)
|
79 |
|
|
|
91 |
|
92 |
|
93 |
@op("SQL")
|
94 |
+
def sql(bundle: core.Bundle, *, query: ops.LongStr, save_as: str = "result"):
|
95 |
"""Run a SQL query on the DataFrames in the bundle. Save the results as a new DataFrame."""
|
96 |
bundle = bundle.copy()
|
97 |
if os.environ.get("NX_CUGRAPH_AUTOCONFIG", "").strip().lower() == "true":
|
|
|
112 |
|
113 |
|
114 |
@op("Cypher")
|
115 |
+
def cypher(bundle: core.Bundle, *, query: ops.LongStr, save_as: str = "result"):
|
116 |
"""Run a Cypher query on the graph in the bundle. Save the results as a new DataFrame."""
|
117 |
bundle = bundle.copy()
|
118 |
graph = bundle.to_nx()
|
|
|
122 |
|
123 |
|
124 |
@op("Organize bundle")
|
125 |
+
def organize_bundle(bundle: core.Bundle, *, code: ops.LongStr):
|
126 |
"""Lets you rename/copy/delete DataFrames, and modify relations.
|
127 |
|
128 |
TODO: Use a declarative solution instead of Python code. Add UI.
|
|
|
152 |
if pd.api.types.is_numeric_dtype(value):
|
153 |
cmap = matplotlib.cm.get_cmap("viridis")
|
154 |
value = (value - value.min()) / (value.max() - value.min())
|
155 |
+
rgba = cmap(value.values)
|
156 |
return [
|
157 |
"#{:02x}{:02x}{:02x}".format(int(r * 255), int(g * 255), int(b * 255))
|
158 |
for r, g, b in rgba[:, :3]
|
|
|
171 |
|
172 |
@op("Visualize graph", view="visualization")
|
173 |
def visualize_graph(
|
174 |
+
graph: core.Bundle,
|
175 |
*,
|
176 |
color_nodes_by: ops.NodeAttribute = None,
|
177 |
label_by: ops.NodeAttribute = None,
|
178 |
color_edges_by: ops.EdgeAttribute = None,
|
179 |
):
|
180 |
+
nodes = core.df_for_frontend(graph.dfs["nodes"], 10_000)
|
181 |
if color_nodes_by:
|
182 |
nodes["color"] = _map_color(nodes[color_nodes_by])
|
183 |
for cols in ["x y", "long lat"]:
|
|
|
207 |
)
|
208 |
curveness = 0.3
|
209 |
nodes = nodes.to_records()
|
210 |
+
edges = core.df_for_frontend(
|
211 |
graph.dfs["edges"].drop_duplicates(["source", "target"]), 10_000
|
212 |
)
|
213 |
if color_edges_by:
|
|
|
266 |
return v
|
267 |
|
268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
@op("View tables", view="table_view")
|
270 |
+
def view_tables(bundle: core.Bundle, *, limit: int = 100):
|
271 |
return bundle.to_dict(limit=limit)
|
272 |
|
273 |
|
|
|
276 |
view="graph_creation_view",
|
277 |
outputs=["output"],
|
278 |
)
|
279 |
+
def create_graph(bundle: core.Bundle, *, relations: str = None) -> core.Bundle:
|
280 |
"""Replace relations of the given bundle
|
281 |
|
282 |
relations is a stringified JSON, instead of a dict, because complex Yjs types (arrays, maps)
|
|
|
295 |
bundle = bundle.copy()
|
296 |
if not (relations is None or relations.strip() == ""):
|
297 |
bundle.relations = [
|
298 |
+
core.RelationDefinition(**r) for r in json.loads(relations).values()
|
299 |
]
|
300 |
return ops.Result(output=bundle, display=bundle.to_dict(limit=100))
|
lynxkite-graph-analytics/tests/test_lynxkite_ops.py
CHANGED
@@ -2,12 +2,12 @@ import pandas as pd
|
|
2 |
import pytest
|
3 |
import networkx as nx
|
4 |
|
5 |
-
from lynxkite.core import workspace
|
6 |
-
from lynxkite_graph_analytics.
|
7 |
|
8 |
|
9 |
async def test_execute_operation_not_in_catalog():
|
10 |
-
ws = workspace.Workspace(env=
|
11 |
ws.nodes.append(
|
12 |
workspace.WorkspaceNode(
|
13 |
id="1",
|
@@ -23,6 +23,8 @@ async def test_execute_operation_not_in_catalog():
|
|
23 |
async def test_execute_operation_inputs_correct_cast():
|
24 |
# Test that the automatic casting of operation inputs works correctly.
|
25 |
|
|
|
|
|
26 |
@op("Create Bundle")
|
27 |
def create_bundle() -> Bundle:
|
28 |
df = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
|
|
|
2 |
import pytest
|
3 |
import networkx as nx
|
4 |
|
5 |
+
from lynxkite.core import workspace, ops
|
6 |
+
from lynxkite_graph_analytics.core import Bundle, execute, ENV
|
7 |
|
8 |
|
9 |
async def test_execute_operation_not_in_catalog():
|
10 |
+
ws = workspace.Workspace(env=ENV)
|
11 |
ws.nodes.append(
|
12 |
workspace.WorkspaceNode(
|
13 |
id="1",
|
|
|
23 |
async def test_execute_operation_inputs_correct_cast():
|
24 |
# Test that the automatic casting of operation inputs works correctly.
|
25 |
|
26 |
+
op = ops.op_registration("test")
|
27 |
+
|
28 |
@op("Create Bundle")
|
29 |
def create_bundle() -> Bundle:
|
30 |
df = pd.DataFrame({"source": [1, 2, 3], "target": [4, 5, 6]})
|
lynxkite-lynxscribe/tests/test_llm_ops.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import unittest
|
2 |
-
from
|
3 |
from lynxkite.core.executors import one_by_one
|
4 |
from lynxkite.core import ops, workspace
|
5 |
|
|
|
1 |
import unittest
|
2 |
+
from lynxkite_lynxscribe import llm_ops # noqa: F401
|
3 |
from lynxkite.core.executors import one_by_one
|
4 |
from lynxkite.core import ops, workspace
|
5 |
|