darabos commited on
Commit
896d563
·
1 Parent(s): 98d901e

Hide joblib-cache, fix decorator ordering.

Browse files
lynxkite-bio/src/lynxkite_bio/__init__.py CHANGED
@@ -10,7 +10,7 @@ import rdkit.Chem.rdFingerprintGenerator
10
  import rdkit.Chem.Fingerprints.ClusterMols
11
  import scipy
12
 
13
- mem = joblib.Memory("joblib-cache")
14
  ENV = "LynxKite Graph Analytics"
15
  op = ops.op_registration(ENV)
16
 
 
10
  import rdkit.Chem.Fingerprints.ClusterMols
11
  import scipy
12
 
13
+ mem = joblib.Memory(".joblib-cache")
14
  ENV = "LynxKite Graph Analytics"
15
  op = ops.op_registration(ENV)
16
 
lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py CHANGED
@@ -40,7 +40,7 @@ from bionemo.scdl.io.single_cell_collection import SingleCellCollection
40
  import scanpy
41
 
42
 
43
- mem = joblib.Memory("joblib-cache")
44
  op = ops.op_registration(core.ENV)
45
  DATA_PATH = Path("/workspace")
46
 
 
40
  import scanpy
41
 
42
 
43
+ mem = joblib.Memory(".joblib-cache")
44
  op = ops.op_registration(core.ENV)
45
  DATA_PATH = Path("/workspace")
46
 
lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py CHANGED
@@ -15,7 +15,7 @@ import polars as pl
15
  import json
16
 
17
 
18
- mem = joblib.Memory("joblib-cache")
19
  op = ops.op_registration(core.ENV)
20
 
21
 
@@ -87,8 +87,8 @@ def import_parquet(*, filename: str):
87
  return pd.read_parquet(filename)
88
 
89
 
90
- @mem.cache
91
  @op("Import CSV")
 
92
  def import_csv(
93
  *, filename: str, columns: str = "<from file>", separator: str = "<auto>"
94
  ):
@@ -102,8 +102,8 @@ def import_csv(
102
  )
103
 
104
 
105
- @mem.cache
106
  @op("Import GraphML")
 
107
  def import_graphml(*, filename: str):
108
  """Imports a GraphML file."""
109
  files = fsspec.open_files(filename, compression="infer")
@@ -114,8 +114,8 @@ def import_graphml(*, filename: str):
114
  raise ValueError(f"No .graphml file found at {filename}")
115
 
116
 
117
- @mem.cache
118
  @op("Graph from OSM")
 
119
  def import_osm(*, location: str):
120
  import osmnx as ox
121
 
 
15
  import json
16
 
17
 
18
+ mem = joblib.Memory(".joblib-cache")
19
  op = ops.op_registration(core.ENV)
20
 
21
 
 
87
  return pd.read_parquet(filename)
88
 
89
 
 
90
  @op("Import CSV")
91
+ @mem.cache
92
  def import_csv(
93
  *, filename: str, columns: str = "<from file>", separator: str = "<auto>"
94
  ):
 
102
  )
103
 
104
 
 
105
  @op("Import GraphML")
106
+ @mem.cache
107
  def import_graphml(*, filename: str):
108
  """Imports a GraphML file."""
109
  files = fsspec.open_files(filename, compression="infer")
 
114
  raise ValueError(f"No .graphml file found at {filename}")
115
 
116
 
 
117
  @op("Graph from OSM")
118
+ @mem.cache
119
  def import_osm(*, location: str):
120
  import osmnx as ox
121
 
lynxkite-lynxscribe/README.md CHANGED
@@ -16,11 +16,12 @@ Run tests with:
16
  uv run pytest
17
  ```
18
 
19
- The LLM agent flow examples use local models.
20
 
21
  ```bash
22
  uv pip install infinity-emb[all]
23
  infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
24
  uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
25
  python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
 
26
  ```
 
16
  uv run pytest
17
  ```
18
 
19
+ The LLM agent flow examples can use local models.
20
 
21
  ```bash
22
  uv pip install infinity-emb[all]
23
  infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
24
  uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
25
  python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
26
+ LLM_BASE_URL='http://localhost:8080/v1' EMBEDDING_BASE_URL='http://localhost:7997/' lynxkite
27
  ```
lynxkite-lynxscribe/src/lynxkite_lynxscribe/llm_ops.py CHANGED
@@ -6,6 +6,7 @@ as an "agentic logic flow". It might just get deleted.
6
  (This is why the dependencies are left hanging.)
7
  """
8
 
 
9
  from lynxkite.core import ops
10
  import enum
11
  import jinja2
@@ -20,13 +21,20 @@ LLM_CACHE = {}
20
  ENV = "LLM logic"
21
  one_by_one.register(ENV)
22
  op = ops.op_registration(ENV)
 
 
 
 
23
 
24
 
25
  def chat(*args, **kwargs):
26
  import openai
27
 
28
- chat_client = openai.OpenAI()
29
- key = json.dumps({"method": "chat", "args": args, "kwargs": kwargs})
 
 
 
30
  if key not in LLM_CACHE:
31
  completion = chat_client.chat.completions.create(*args, **kwargs)
32
  LLM_CACHE[key] = [c.message.content for c in completion.choices]
@@ -36,8 +44,16 @@ def chat(*args, **kwargs):
36
  def embedding(*args, **kwargs):
37
  import openai
38
 
39
- embedding_client = openai.OpenAI()
40
- key = json.dumps({"method": "embedding", "args": args, "kwargs": kwargs})
 
 
 
 
 
 
 
 
41
  if key not in LLM_CACHE:
42
  res = embedding_client.embeddings.create(*args, **kwargs)
43
  [data] = res.data
 
6
  (This is why the dependencies are left hanging.)
7
  """
8
 
9
+ import os
10
  from lynxkite.core import ops
11
  import enum
12
  import jinja2
 
21
  ENV = "LLM logic"
22
  one_by_one.register(ENV)
23
  op = ops.op_registration(ENV)
24
+ LLM_BASE_URL = os.environ.get("LLM_BASE_URL", None)
25
+ EMBEDDING_BASE_URL = os.environ.get("EMBEDDING_BASE_URL", None)
26
+ LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o-mini-2024-07-18")
27
+ EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-small")
28
 
29
 
30
  def chat(*args, **kwargs):
31
  import openai
32
 
33
+ chat_client = openai.OpenAI(base_url=LLM_BASE_URL)
34
+ kwargs.setdefault("model", LLM_MODEL)
35
+ key = json.dumps(
36
+ {"method": "chat", "base_url": LLM_BASE_URL, "args": args, "kwargs": kwargs}
37
+ )
38
  if key not in LLM_CACHE:
39
  completion = chat_client.chat.completions.create(*args, **kwargs)
40
  LLM_CACHE[key] = [c.message.content for c in completion.choices]
 
44
  def embedding(*args, **kwargs):
45
  import openai
46
 
47
+ embedding_client = openai.OpenAI(base_url=EMBEDDING_BASE_URL)
48
+ kwargs.setdefault("model", EMBEDDING_MODEL)
49
+ key = json.dumps(
50
+ {
51
+ "method": "embedding",
52
+ "base_url": EMBEDDING_BASE_URL,
53
+ "args": args,
54
+ "kwargs": kwargs,
55
+ }
56
+ )
57
  if key not in LLM_CACHE:
58
  res = embedding_client.embeddings.create(*args, **kwargs)
59
  [data] = res.data