Spaces:

lynx-analytics
/

lynxkite

Running

darabos commited on Mar 18

Commit

896d563

1 Parent(s): 98d901e

Hide joblib-cache, fix decorator ordering.

Files changed (5) hide show

lynxkite-bio/src/lynxkite_bio/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ import rdkit.Chem.rdFingerprintGenerator
 import rdkit.Chem.Fingerprints.ClusterMols
 import scipy
-mem = joblib.Memory("joblib-cache")
 ENV = "LynxKite Graph Analytics"
 op = ops.op_registration(ENV)

 import rdkit.Chem.Fingerprints.ClusterMols
 import scipy
+mem = joblib.Memory(".joblib-cache")
 ENV = "LynxKite Graph Analytics"
 op = ops.op_registration(ENV)

lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py CHANGED Viewed

@@ -40,7 +40,7 @@ from bionemo.scdl.io.single_cell_collection import SingleCellCollection
 import scanpy
-mem = joblib.Memory("joblib-cache")
 op = ops.op_registration(core.ENV)
 DATA_PATH = Path("/workspace")

 import scanpy
+mem = joblib.Memory(".joblib-cache")
 op = ops.op_registration(core.ENV)
 DATA_PATH = Path("/workspace")

lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py CHANGED Viewed

@@ -15,7 +15,7 @@ import polars as pl
 import json
-mem = joblib.Memory("joblib-cache")
 op = ops.op_registration(core.ENV)
@@ -87,8 +87,8 @@ def import_parquet(*, filename: str):
     return pd.read_parquet(filename)
-@mem.cache
 @op("Import CSV")
 def import_csv(
     *, filename: str, columns: str = "<from file>", separator: str = "<auto>"
 ):
@@ -102,8 +102,8 @@ def import_csv(
     )
-@mem.cache
 @op("Import GraphML")
 def import_graphml(*, filename: str):
     """Imports a GraphML file."""
     files = fsspec.open_files(filename, compression="infer")
@@ -114,8 +114,8 @@ def import_graphml(*, filename: str):
     raise ValueError(f"No .graphml file found at {filename}")
-@mem.cache
 @op("Graph from OSM")
 def import_osm(*, location: str):
     import osmnx as ox

 import json
+mem = joblib.Memory(".joblib-cache")
 op = ops.op_registration(core.ENV)
     return pd.read_parquet(filename)
 @op("Import CSV")
+@mem.cache
 def import_csv(
     *, filename: str, columns: str = "<from file>", separator: str = "<auto>"
 ):
     )
 @op("Import GraphML")
+@mem.cache
 def import_graphml(*, filename: str):
     """Imports a GraphML file."""
     files = fsspec.open_files(filename, compression="infer")
     raise ValueError(f"No .graphml file found at {filename}")
 @op("Graph from OSM")
+@mem.cache
 def import_osm(*, location: str):
     import osmnx as ox

lynxkite-lynxscribe/README.md CHANGED Viewed

@@ -16,11 +16,12 @@ Run tests with:
 uv run pytest
 ```
-The LLM agent flow examples use local models.
 ```bash
 uv pip install infinity-emb[all]
 infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
 uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
 python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
 ```

 uv run pytest
 ```
+The LLM agent flow examples can use local models.
 ```bash
 uv pip install infinity-emb[all]
 infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
 uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
 python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
+LLM_BASE_URL='http://localhost:8080/v1' EMBEDDING_BASE_URL='http://localhost:7997/' lynxkite
 ```

lynxkite-lynxscribe/src/lynxkite_lynxscribe/llm_ops.py CHANGED Viewed

@@ -6,6 +6,7 @@ as an "agentic logic flow". It might just get deleted.
 (This is why the dependencies are left hanging.)
 """
 from lynxkite.core import ops
 import enum
 import jinja2
@@ -20,13 +21,20 @@ LLM_CACHE = {}
 ENV = "LLM logic"
 one_by_one.register(ENV)
 op = ops.op_registration(ENV)
 def chat(*args, **kwargs):
     import openai
-    chat_client = openai.OpenAI()
-    key = json.dumps({"method": "chat", "args": args, "kwargs": kwargs})
     if key not in LLM_CACHE:
         completion = chat_client.chat.completions.create(*args, **kwargs)
         LLM_CACHE[key] = [c.message.content for c in completion.choices]
@@ -36,8 +44,16 @@ def chat(*args, **kwargs):
 def embedding(*args, **kwargs):
     import openai
-    embedding_client = openai.OpenAI()
-    key = json.dumps({"method": "embedding", "args": args, "kwargs": kwargs})
     if key not in LLM_CACHE:
         res = embedding_client.embeddings.create(*args, **kwargs)
         [data] = res.data

 (This is why the dependencies are left hanging.)
 """
+import os
 from lynxkite.core import ops
 import enum
 import jinja2
 ENV = "LLM logic"
 one_by_one.register(ENV)
 op = ops.op_registration(ENV)
+LLM_BASE_URL = os.environ.get("LLM_BASE_URL", None)
+EMBEDDING_BASE_URL = os.environ.get("EMBEDDING_BASE_URL", None)
+LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o-mini-2024-07-18")
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-small")
 def chat(*args, **kwargs):
     import openai
+    chat_client = openai.OpenAI(base_url=LLM_BASE_URL)
+    kwargs.setdefault("model", LLM_MODEL)
+    key = json.dumps(
+        {"method": "chat", "base_url": LLM_BASE_URL, "args": args, "kwargs": kwargs}
+    )
     if key not in LLM_CACHE:
         completion = chat_client.chat.completions.create(*args, **kwargs)
         LLM_CACHE[key] = [c.message.content for c in completion.choices]
 def embedding(*args, **kwargs):
     import openai
+    embedding_client = openai.OpenAI(base_url=EMBEDDING_BASE_URL)
+    kwargs.setdefault("model", EMBEDDING_MODEL)
+    key = json.dumps(
+        {
+            "method": "embedding",
+            "base_url": EMBEDDING_BASE_URL,
+            "args": args,
+            "kwargs": kwargs,
+        }
+    )
     if key not in LLM_CACHE:
         res = embedding_client.embeddings.create(*args, **kwargs)
         [data] = res.data