Spaces:
Running
Running
Merge branch 'main' into darabos-model-designer
Browse files- Dockerfile +6 -2
- examples/Graph RAG +1 -9
- lynxkite-bio/src/lynxkite_bio/__init__.py +1 -1
- lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py +1 -1
- lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py +4 -4
- lynxkite-lynxscribe/README.md +6 -1
- lynxkite-lynxscribe/src/lynxkite_lynxscribe/llm_ops.py +23 -10
Dockerfile
CHANGED
@@ -5,12 +5,16 @@ USER node
|
|
5 |
ENV HOME=/home/node PATH=/home/node/.local/bin:$PATH
|
6 |
WORKDIR $HOME/app
|
7 |
COPY --chown=node . $HOME/app
|
8 |
-
|
|
|
|
|
9 |
-e lynxkite-core \
|
10 |
-e lynxkite-app \
|
11 |
-e lynxkite-graph-analytics \
|
12 |
-e lynxkite-bio \
|
13 |
-
-e lynxkite-
|
|
|
|
|
14 |
WORKDIR $HOME/app/examples
|
15 |
ENV PORT=7860
|
16 |
CMD ["uv", "run", "lynxkite"]
|
|
|
5 |
ENV HOME=/home/node PATH=/home/node/.local/bin:$PATH
|
6 |
WORKDIR $HOME/app
|
7 |
COPY --chown=node . $HOME/app
|
8 |
+
ENV GIT_SSH_COMMAND="ssh -i /run/secrets/LYNXSCRIBE_DEPLOY_KEY -o StrictHostKeyChecking=no"
|
9 |
+
RUN --mount=type=secret,id=LYNXSCRIBE_DEPLOY_KEY,mode=0444,required=true \
|
10 |
+
uv venv && uv pip install \
|
11 |
-e lynxkite-core \
|
12 |
-e lynxkite-app \
|
13 |
-e lynxkite-graph-analytics \
|
14 |
-e lynxkite-bio \
|
15 |
+
-e lynxkite-lynxscribe \
|
16 |
+
-e lynxkite-pillow-example \
|
17 |
+
chromadb openai
|
18 |
WORKDIR $HOME/app/examples
|
19 |
ENV PORT=7860
|
20 |
CMD ["uv", "run", "lynxkite"]
|
examples/Graph RAG
CHANGED
@@ -510,8 +510,7 @@
|
|
510 |
"title": "Ask LLM",
|
511 |
"params": {
|
512 |
"max_tokens": 100.0,
|
513 |
-
"accepted_regex": ""
|
514 |
-
"model": "SultanR/SmolTulu-1.7b-Instruct"
|
515 |
},
|
516 |
"display": null,
|
517 |
"error": null,
|
@@ -541,13 +540,6 @@
|
|
541 |
"type": {
|
542 |
"type": "<class 'int'>"
|
543 |
}
|
544 |
-
},
|
545 |
-
"model": {
|
546 |
-
"type": {
|
547 |
-
"type": "<class 'str'>"
|
548 |
-
},
|
549 |
-
"default": null,
|
550 |
-
"name": "model"
|
551 |
}
|
552 |
},
|
553 |
"outputs": {
|
|
|
510 |
"title": "Ask LLM",
|
511 |
"params": {
|
512 |
"max_tokens": 100.0,
|
513 |
+
"accepted_regex": ""
|
|
|
514 |
},
|
515 |
"display": null,
|
516 |
"error": null,
|
|
|
540 |
"type": {
|
541 |
"type": "<class 'int'>"
|
542 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
543 |
}
|
544 |
},
|
545 |
"outputs": {
|
lynxkite-bio/src/lynxkite_bio/__init__.py
CHANGED
@@ -10,7 +10,7 @@ import rdkit.Chem.rdFingerprintGenerator
|
|
10 |
import rdkit.Chem.Fingerprints.ClusterMols
|
11 |
import scipy
|
12 |
|
13 |
-
mem = joblib.Memory("joblib-cache")
|
14 |
ENV = "LynxKite Graph Analytics"
|
15 |
op = ops.op_registration(ENV)
|
16 |
|
|
|
10 |
import rdkit.Chem.Fingerprints.ClusterMols
|
11 |
import scipy
|
12 |
|
13 |
+
mem = joblib.Memory(".joblib-cache")
|
14 |
ENV = "LynxKite Graph Analytics"
|
15 |
op = ops.op_registration(ENV)
|
16 |
|
lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py
CHANGED
@@ -40,7 +40,7 @@ from bionemo.scdl.io.single_cell_collection import SingleCellCollection
|
|
40 |
import scanpy
|
41 |
|
42 |
|
43 |
-
mem = joblib.Memory("joblib-cache")
|
44 |
op = ops.op_registration(core.ENV)
|
45 |
DATA_PATH = Path("/workspace")
|
46 |
|
|
|
40 |
import scanpy
|
41 |
|
42 |
|
43 |
+
mem = joblib.Memory(".joblib-cache")
|
44 |
op = ops.op_registration(core.ENV)
|
45 |
DATA_PATH = Path("/workspace")
|
46 |
|
lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py
CHANGED
@@ -15,7 +15,7 @@ import polars as pl
|
|
15 |
import json
|
16 |
|
17 |
|
18 |
-
mem = joblib.Memory("joblib-cache")
|
19 |
op = ops.op_registration(core.ENV)
|
20 |
|
21 |
|
@@ -87,8 +87,8 @@ def import_parquet(*, filename: str):
|
|
87 |
return pd.read_parquet(filename)
|
88 |
|
89 |
|
90 |
-
@mem.cache
|
91 |
@op("Import CSV")
|
|
|
92 |
def import_csv(
|
93 |
*, filename: str, columns: str = "<from file>", separator: str = "<auto>"
|
94 |
):
|
@@ -102,8 +102,8 @@ def import_csv(
|
|
102 |
)
|
103 |
|
104 |
|
105 |
-
@mem.cache
|
106 |
@op("Import GraphML")
|
|
|
107 |
def import_graphml(*, filename: str):
|
108 |
"""Imports a GraphML file."""
|
109 |
files = fsspec.open_files(filename, compression="infer")
|
@@ -114,8 +114,8 @@ def import_graphml(*, filename: str):
|
|
114 |
raise ValueError(f"No .graphml file found at {filename}")
|
115 |
|
116 |
|
117 |
-
@mem.cache
|
118 |
@op("Graph from OSM")
|
|
|
119 |
def import_osm(*, location: str):
|
120 |
import osmnx as ox
|
121 |
|
|
|
15 |
import json
|
16 |
|
17 |
|
18 |
+
mem = joblib.Memory(".joblib-cache")
|
19 |
op = ops.op_registration(core.ENV)
|
20 |
|
21 |
|
|
|
87 |
return pd.read_parquet(filename)
|
88 |
|
89 |
|
|
|
90 |
@op("Import CSV")
|
91 |
+
@mem.cache
|
92 |
def import_csv(
|
93 |
*, filename: str, columns: str = "<from file>", separator: str = "<auto>"
|
94 |
):
|
|
|
102 |
)
|
103 |
|
104 |
|
|
|
105 |
@op("Import GraphML")
|
106 |
+
@mem.cache
|
107 |
def import_graphml(*, filename: str):
|
108 |
"""Imports a GraphML file."""
|
109 |
files = fsspec.open_files(filename, compression="infer")
|
|
|
114 |
raise ValueError(f"No .graphml file found at {filename}")
|
115 |
|
116 |
|
|
|
117 |
@op("Graph from OSM")
|
118 |
+
@mem.cache
|
119 |
def import_osm(*, location: str):
|
120 |
import osmnx as ox
|
121 |
|
lynxkite-lynxscribe/README.md
CHANGED
@@ -16,11 +16,16 @@ Run tests with:
|
|
16 |
uv run pytest
|
17 |
```
|
18 |
|
19 |
-
The LLM agent flow examples use local models.
|
20 |
|
21 |
```bash
|
22 |
uv pip install infinity-emb[all]
|
23 |
infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
|
24 |
uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
|
25 |
python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
|
|
|
|
|
|
|
|
|
|
|
26 |
```
|
|
|
16 |
uv run pytest
|
17 |
```
|
18 |
|
19 |
+
The LLM agent flow examples can use local models.
|
20 |
|
21 |
```bash
|
22 |
uv pip install infinity-emb[all]
|
23 |
infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
|
24 |
uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
|
25 |
python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
|
26 |
+
export LLM_BASE_URL='http://localhost:8080/v1'
|
27 |
+
export LLM_MODEL='SultanR/SmolTulu-1.7b-Instruct'
|
28 |
+
export EMBEDDING_BASE_URL='http://localhost:7997/'
|
29 |
+
export EMBEDDING_MODEL='michaelfeil/bge-small-en-v1.5'
|
30 |
+
lynxkite
|
31 |
```
|
lynxkite-lynxscribe/src/lynxkite_lynxscribe/llm_ops.py
CHANGED
@@ -6,6 +6,7 @@ as an "agentic logic flow". It might just get deleted.
|
|
6 |
(This is why the dependencies are left hanging.)
|
7 |
"""
|
8 |
|
|
|
9 |
from lynxkite.core import ops
|
10 |
import enum
|
11 |
import jinja2
|
@@ -20,13 +21,20 @@ LLM_CACHE = {}
|
|
20 |
ENV = "LLM logic"
|
21 |
one_by_one.register(ENV)
|
22 |
op = ops.op_registration(ENV)
|
|
|
|
|
|
|
|
|
23 |
|
24 |
|
25 |
def chat(*args, **kwargs):
|
26 |
import openai
|
27 |
|
28 |
-
chat_client = openai.OpenAI(base_url=
|
29 |
-
|
|
|
|
|
|
|
30 |
if key not in LLM_CACHE:
|
31 |
completion = chat_client.chat.completions.create(*args, **kwargs)
|
32 |
LLM_CACHE[key] = [c.message.content for c in completion.choices]
|
@@ -36,8 +44,16 @@ def chat(*args, **kwargs):
|
|
36 |
def embedding(*args, **kwargs):
|
37 |
import openai
|
38 |
|
39 |
-
embedding_client = openai.OpenAI(base_url=
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
if key not in LLM_CACHE:
|
42 |
res = embedding_client.embeddings.create(*args, **kwargs)
|
43 |
[data] = res.data
|
@@ -114,8 +130,7 @@ def create_prompt(input, *, save_as="prompt", template: ops.LongStr):
|
|
114 |
|
115 |
|
116 |
@op("Ask LLM")
|
117 |
-
def ask_llm(input, *,
|
118 |
-
assert model, "Please specify the model."
|
119 |
assert "prompt" in input, "Please create the prompt first."
|
120 |
options = {}
|
121 |
if accepted_regex:
|
@@ -123,7 +138,6 @@ def ask_llm(input, *, model: str, accepted_regex: str = None, max_tokens: int =
|
|
123 |
"regex": accepted_regex,
|
124 |
}
|
125 |
results = chat(
|
126 |
-
model=model,
|
127 |
max_tokens=max_tokens,
|
128 |
messages=[
|
129 |
{"role": "user", "content": input["prompt"]},
|
@@ -212,10 +226,9 @@ def rag(
|
|
212 |
results = [db[int(r)] for r in results["ids"][0]]
|
213 |
return {**input, "rag": results, "_collection": collection}
|
214 |
if engine == RagEngine.Custom:
|
215 |
-
model = "michaelfeil/bge-small-en-v1.5"
|
216 |
chat = input[input_field]
|
217 |
-
embeddings = [embedding(input=[r[db_field]]
|
218 |
-
q = embedding(input=[chat]
|
219 |
|
220 |
def cosine_similarity(a, b):
|
221 |
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
|
|
|
6 |
(This is why the dependencies are left hanging.)
|
7 |
"""
|
8 |
|
9 |
+
import os
|
10 |
from lynxkite.core import ops
|
11 |
import enum
|
12 |
import jinja2
|
|
|
21 |
ENV = "LLM logic"
|
22 |
one_by_one.register(ENV)
|
23 |
op = ops.op_registration(ENV)
|
24 |
+
LLM_BASE_URL = os.environ.get("LLM_BASE_URL", None)
|
25 |
+
EMBEDDING_BASE_URL = os.environ.get("EMBEDDING_BASE_URL", None)
|
26 |
+
LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o-mini-2024-07-18")
|
27 |
+
EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-small")
|
28 |
|
29 |
|
30 |
def chat(*args, **kwargs):
|
31 |
import openai
|
32 |
|
33 |
+
chat_client = openai.OpenAI(base_url=LLM_BASE_URL)
|
34 |
+
kwargs.setdefault("model", LLM_MODEL)
|
35 |
+
key = json.dumps(
|
36 |
+
{"method": "chat", "base_url": LLM_BASE_URL, "args": args, "kwargs": kwargs}
|
37 |
+
)
|
38 |
if key not in LLM_CACHE:
|
39 |
completion = chat_client.chat.completions.create(*args, **kwargs)
|
40 |
LLM_CACHE[key] = [c.message.content for c in completion.choices]
|
|
|
44 |
def embedding(*args, **kwargs):
|
45 |
import openai
|
46 |
|
47 |
+
embedding_client = openai.OpenAI(base_url=EMBEDDING_BASE_URL)
|
48 |
+
kwargs.setdefault("model", EMBEDDING_MODEL)
|
49 |
+
key = json.dumps(
|
50 |
+
{
|
51 |
+
"method": "embedding",
|
52 |
+
"base_url": EMBEDDING_BASE_URL,
|
53 |
+
"args": args,
|
54 |
+
"kwargs": kwargs,
|
55 |
+
}
|
56 |
+
)
|
57 |
if key not in LLM_CACHE:
|
58 |
res = embedding_client.embeddings.create(*args, **kwargs)
|
59 |
[data] = res.data
|
|
|
130 |
|
131 |
|
132 |
@op("Ask LLM")
|
133 |
+
def ask_llm(input, *, accepted_regex: str = None, max_tokens: int = 100):
|
|
|
134 |
assert "prompt" in input, "Please create the prompt first."
|
135 |
options = {}
|
136 |
if accepted_regex:
|
|
|
138 |
"regex": accepted_regex,
|
139 |
}
|
140 |
results = chat(
|
|
|
141 |
max_tokens=max_tokens,
|
142 |
messages=[
|
143 |
{"role": "user", "content": input["prompt"]},
|
|
|
226 |
results = [db[int(r)] for r in results["ids"][0]]
|
227 |
return {**input, "rag": results, "_collection": collection}
|
228 |
if engine == RagEngine.Custom:
|
|
|
229 |
chat = input[input_field]
|
230 |
+
embeddings = [embedding(input=[r[db_field]]) for r in db]
|
231 |
+
q = embedding(input=[chat])
|
232 |
|
233 |
def cosine_similarity(a, b):
|
234 |
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
|