darabos commited on
Commit
5426f27
·
2 Parent(s): 6568046 814aca7

Merge branch 'main' into darabos-model-designer

Browse files
Dockerfile CHANGED
@@ -5,12 +5,16 @@ USER node
5
  ENV HOME=/home/node PATH=/home/node/.local/bin:$PATH
6
  WORKDIR $HOME/app
7
  COPY --chown=node . $HOME/app
8
- RUN uv venv && uv pip install \
 
 
9
  -e lynxkite-core \
10
  -e lynxkite-app \
11
  -e lynxkite-graph-analytics \
12
  -e lynxkite-bio \
13
- -e lynxkite-pillow-example
 
 
14
  WORKDIR $HOME/app/examples
15
  ENV PORT=7860
16
  CMD ["uv", "run", "lynxkite"]
 
5
  ENV HOME=/home/node PATH=/home/node/.local/bin:$PATH
6
  WORKDIR $HOME/app
7
  COPY --chown=node . $HOME/app
8
+ ENV GIT_SSH_COMMAND="ssh -i /run/secrets/LYNXSCRIBE_DEPLOY_KEY -o StrictHostKeyChecking=no"
9
+ RUN --mount=type=secret,id=LYNXSCRIBE_DEPLOY_KEY,mode=0444,required=true \
10
+ uv venv && uv pip install \
11
  -e lynxkite-core \
12
  -e lynxkite-app \
13
  -e lynxkite-graph-analytics \
14
  -e lynxkite-bio \
15
+ -e lynxkite-lynxscribe \
16
+ -e lynxkite-pillow-example \
17
+ chromadb openai
18
  WORKDIR $HOME/app/examples
19
  ENV PORT=7860
20
  CMD ["uv", "run", "lynxkite"]
examples/Graph RAG CHANGED
@@ -510,8 +510,7 @@
510
  "title": "Ask LLM",
511
  "params": {
512
  "max_tokens": 100.0,
513
- "accepted_regex": "",
514
- "model": "SultanR/SmolTulu-1.7b-Instruct"
515
  },
516
  "display": null,
517
  "error": null,
@@ -541,13 +540,6 @@
541
  "type": {
542
  "type": "<class 'int'>"
543
  }
544
- },
545
- "model": {
546
- "type": {
547
- "type": "<class 'str'>"
548
- },
549
- "default": null,
550
- "name": "model"
551
  }
552
  },
553
  "outputs": {
 
510
  "title": "Ask LLM",
511
  "params": {
512
  "max_tokens": 100.0,
513
+ "accepted_regex": ""
 
514
  },
515
  "display": null,
516
  "error": null,
 
540
  "type": {
541
  "type": "<class 'int'>"
542
  }
 
 
 
 
 
 
 
543
  }
544
  },
545
  "outputs": {
lynxkite-bio/src/lynxkite_bio/__init__.py CHANGED
@@ -10,7 +10,7 @@ import rdkit.Chem.rdFingerprintGenerator
10
  import rdkit.Chem.Fingerprints.ClusterMols
11
  import scipy
12
 
13
- mem = joblib.Memory("joblib-cache")
14
  ENV = "LynxKite Graph Analytics"
15
  op = ops.op_registration(ENV)
16
 
 
10
  import rdkit.Chem.Fingerprints.ClusterMols
11
  import scipy
12
 
13
+ mem = joblib.Memory(".joblib-cache")
14
  ENV = "LynxKite Graph Analytics"
15
  op = ops.op_registration(ENV)
16
 
lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py CHANGED
@@ -40,7 +40,7 @@ from bionemo.scdl.io.single_cell_collection import SingleCellCollection
40
  import scanpy
41
 
42
 
43
- mem = joblib.Memory("joblib-cache")
44
  op = ops.op_registration(core.ENV)
45
  DATA_PATH = Path("/workspace")
46
 
 
40
  import scanpy
41
 
42
 
43
+ mem = joblib.Memory(".joblib-cache")
44
  op = ops.op_registration(core.ENV)
45
  DATA_PATH = Path("/workspace")
46
 
lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py CHANGED
@@ -15,7 +15,7 @@ import polars as pl
15
  import json
16
 
17
 
18
- mem = joblib.Memory("joblib-cache")
19
  op = ops.op_registration(core.ENV)
20
 
21
 
@@ -87,8 +87,8 @@ def import_parquet(*, filename: str):
87
  return pd.read_parquet(filename)
88
 
89
 
90
- @mem.cache
91
  @op("Import CSV")
 
92
  def import_csv(
93
  *, filename: str, columns: str = "<from file>", separator: str = "<auto>"
94
  ):
@@ -102,8 +102,8 @@ def import_csv(
102
  )
103
 
104
 
105
- @mem.cache
106
  @op("Import GraphML")
 
107
  def import_graphml(*, filename: str):
108
  """Imports a GraphML file."""
109
  files = fsspec.open_files(filename, compression="infer")
@@ -114,8 +114,8 @@ def import_graphml(*, filename: str):
114
  raise ValueError(f"No .graphml file found at {filename}")
115
 
116
 
117
- @mem.cache
118
  @op("Graph from OSM")
 
119
  def import_osm(*, location: str):
120
  import osmnx as ox
121
 
 
15
  import json
16
 
17
 
18
+ mem = joblib.Memory(".joblib-cache")
19
  op = ops.op_registration(core.ENV)
20
 
21
 
 
87
  return pd.read_parquet(filename)
88
 
89
 
 
90
  @op("Import CSV")
91
+ @mem.cache
92
  def import_csv(
93
  *, filename: str, columns: str = "<from file>", separator: str = "<auto>"
94
  ):
 
102
  )
103
 
104
 
 
105
  @op("Import GraphML")
106
+ @mem.cache
107
  def import_graphml(*, filename: str):
108
  """Imports a GraphML file."""
109
  files = fsspec.open_files(filename, compression="infer")
 
114
  raise ValueError(f"No .graphml file found at {filename}")
115
 
116
 
 
117
  @op("Graph from OSM")
118
+ @mem.cache
119
  def import_osm(*, location: str):
120
  import osmnx as ox
121
 
lynxkite-lynxscribe/README.md CHANGED
@@ -16,11 +16,16 @@ Run tests with:
16
  uv run pytest
17
  ```
18
 
19
- The LLM agent flow examples use local models.
20
 
21
  ```bash
22
  uv pip install infinity-emb[all]
23
  infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
24
  uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
25
  python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
 
 
 
 
 
26
  ```
 
16
  uv run pytest
17
  ```
18
 
19
+ The LLM agent flow examples can use local models.
20
 
21
  ```bash
22
  uv pip install infinity-emb[all]
23
  infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
24
  uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
25
  python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
26
+ export LLM_BASE_URL='http://localhost:8080/v1'
27
+ export LLM_MODEL='SultanR/SmolTulu-1.7b-Instruct'
28
+ export EMBEDDING_BASE_URL='http://localhost:7997/'
29
+ export EMBEDDING_MODEL='michaelfeil/bge-small-en-v1.5'
30
+ lynxkite
31
  ```
lynxkite-lynxscribe/src/lynxkite_lynxscribe/llm_ops.py CHANGED
@@ -6,6 +6,7 @@ as an "agentic logic flow". It might just get deleted.
6
  (This is why the dependencies are left hanging.)
7
  """
8
 
 
9
  from lynxkite.core import ops
10
  import enum
11
  import jinja2
@@ -20,13 +21,20 @@ LLM_CACHE = {}
20
  ENV = "LLM logic"
21
  one_by_one.register(ENV)
22
  op = ops.op_registration(ENV)
 
 
 
 
23
 
24
 
25
  def chat(*args, **kwargs):
26
  import openai
27
 
28
- chat_client = openai.OpenAI(base_url="http://localhost:8080/v1")
29
- key = json.dumps({"method": "chat", "args": args, "kwargs": kwargs})
 
 
 
30
  if key not in LLM_CACHE:
31
  completion = chat_client.chat.completions.create(*args, **kwargs)
32
  LLM_CACHE[key] = [c.message.content for c in completion.choices]
@@ -36,8 +44,16 @@ def chat(*args, **kwargs):
36
  def embedding(*args, **kwargs):
37
  import openai
38
 
39
- embedding_client = openai.OpenAI(base_url="http://localhost:7997/")
40
- key = json.dumps({"method": "embedding", "args": args, "kwargs": kwargs})
 
 
 
 
 
 
 
 
41
  if key not in LLM_CACHE:
42
  res = embedding_client.embeddings.create(*args, **kwargs)
43
  [data] = res.data
@@ -114,8 +130,7 @@ def create_prompt(input, *, save_as="prompt", template: ops.LongStr):
114
 
115
 
116
  @op("Ask LLM")
117
- def ask_llm(input, *, model: str, accepted_regex: str = None, max_tokens: int = 100):
118
- assert model, "Please specify the model."
119
  assert "prompt" in input, "Please create the prompt first."
120
  options = {}
121
  if accepted_regex:
@@ -123,7 +138,6 @@ def ask_llm(input, *, model: str, accepted_regex: str = None, max_tokens: int =
123
  "regex": accepted_regex,
124
  }
125
  results = chat(
126
- model=model,
127
  max_tokens=max_tokens,
128
  messages=[
129
  {"role": "user", "content": input["prompt"]},
@@ -212,10 +226,9 @@ def rag(
212
  results = [db[int(r)] for r in results["ids"][0]]
213
  return {**input, "rag": results, "_collection": collection}
214
  if engine == RagEngine.Custom:
215
- model = "michaelfeil/bge-small-en-v1.5"
216
  chat = input[input_field]
217
- embeddings = [embedding(input=[r[db_field]], model=model) for r in db]
218
- q = embedding(input=[chat], model=model)
219
 
220
  def cosine_similarity(a, b):
221
  return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
 
6
  (This is why the dependencies are left hanging.)
7
  """
8
 
9
+ import os
10
  from lynxkite.core import ops
11
  import enum
12
  import jinja2
 
21
  ENV = "LLM logic"
22
  one_by_one.register(ENV)
23
  op = ops.op_registration(ENV)
24
+ LLM_BASE_URL = os.environ.get("LLM_BASE_URL", None)
25
+ EMBEDDING_BASE_URL = os.environ.get("EMBEDDING_BASE_URL", None)
26
+ LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o-mini-2024-07-18")
27
+ EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-small")
28
 
29
 
30
  def chat(*args, **kwargs):
31
  import openai
32
 
33
+ chat_client = openai.OpenAI(base_url=LLM_BASE_URL)
34
+ kwargs.setdefault("model", LLM_MODEL)
35
+ key = json.dumps(
36
+ {"method": "chat", "base_url": LLM_BASE_URL, "args": args, "kwargs": kwargs}
37
+ )
38
  if key not in LLM_CACHE:
39
  completion = chat_client.chat.completions.create(*args, **kwargs)
40
  LLM_CACHE[key] = [c.message.content for c in completion.choices]
 
44
  def embedding(*args, **kwargs):
45
  import openai
46
 
47
+ embedding_client = openai.OpenAI(base_url=EMBEDDING_BASE_URL)
48
+ kwargs.setdefault("model", EMBEDDING_MODEL)
49
+ key = json.dumps(
50
+ {
51
+ "method": "embedding",
52
+ "base_url": EMBEDDING_BASE_URL,
53
+ "args": args,
54
+ "kwargs": kwargs,
55
+ }
56
+ )
57
  if key not in LLM_CACHE:
58
  res = embedding_client.embeddings.create(*args, **kwargs)
59
  [data] = res.data
 
130
 
131
 
132
  @op("Ask LLM")
133
+ def ask_llm(input, *, accepted_regex: str = None, max_tokens: int = 100):
 
134
  assert "prompt" in input, "Please create the prompt first."
135
  options = {}
136
  if accepted_regex:
 
138
  "regex": accepted_regex,
139
  }
140
  results = chat(
 
141
  max_tokens=max_tokens,
142
  messages=[
143
  {"role": "user", "content": input["prompt"]},
 
226
  results = [db[int(r)] for r in results["ids"][0]]
227
  return {**input, "rag": results, "_collection": collection}
228
  if engine == RagEngine.Custom:
 
229
  chat = input[input_field]
230
+ embeddings = [embedding(input=[r[db_field]]) for r in db]
231
+ q = embedding(input=[chat])
232
 
233
  def cosine_similarity(a, b):
234
  return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))