Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

darabos commited on Mar 25

Commit

5426f27

2 Parent(s): 6568046 814aca7

Merge branch 'main' into darabos-model-designer

Browse files

Files changed (7) hide show

Dockerfile +6 -2
examples/Graph RAG +1 -9
lynxkite-bio/src/lynxkite_bio/__init__.py +1 -1
lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py +1 -1
lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py +4 -4
lynxkite-lynxscribe/README.md +6 -1
lynxkite-lynxscribe/src/lynxkite_lynxscribe/llm_ops.py +23 -10

Dockerfile CHANGED Viewed

@@ -5,12 +5,16 @@ USER node
 ENV HOME=/home/node PATH=/home/node/.local/bin:$PATH
 WORKDIR $HOME/app
 COPY --chown=node . $HOME/app
-RUN uv venv && uv pip install \
   -e lynxkite-core \
   -e lynxkite-app \
   -e lynxkite-graph-analytics \
   -e lynxkite-bio \
-  -e lynxkite-pillow-example
 WORKDIR $HOME/app/examples
 ENV PORT=7860
 CMD ["uv", "run", "lynxkite"]

 ENV HOME=/home/node PATH=/home/node/.local/bin:$PATH
 WORKDIR $HOME/app
 COPY --chown=node . $HOME/app
+ENV GIT_SSH_COMMAND="ssh -i /run/secrets/LYNXSCRIBE_DEPLOY_KEY -o StrictHostKeyChecking=no"
+RUN --mount=type=secret,id=LYNXSCRIBE_DEPLOY_KEY,mode=0444,required=true \
+  uv venv && uv pip install \
   -e lynxkite-core \
   -e lynxkite-app \
   -e lynxkite-graph-analytics \
   -e lynxkite-bio \
+  -e lynxkite-lynxscribe \
+  -e lynxkite-pillow-example \
+  chromadb openai
 WORKDIR $HOME/app/examples
 ENV PORT=7860
 CMD ["uv", "run", "lynxkite"]

examples/Graph RAG CHANGED Viewed

@@ -510,8 +510,7 @@
         "title": "Ask LLM",
         "params": {
           "max_tokens": 100.0,
-          "accepted_regex": "",
-          "model": "SultanR/SmolTulu-1.7b-Instruct"
         },
         "display": null,
         "error": null,
@@ -541,13 +540,6 @@
               "type": {
                 "type": "<class 'int'>"
               }
-            },
-            "model": {
-              "type": {
-                "type": "<class 'str'>"
-              },
-              "default": null,
-              "name": "model"
             }
           },
           "outputs": {

         "title": "Ask LLM",
         "params": {
           "max_tokens": 100.0,
+          "accepted_regex": ""
         },
         "display": null,
         "error": null,
               "type": {
                 "type": "<class 'int'>"
               }
             }
           },
           "outputs": {

lynxkite-bio/src/lynxkite_bio/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ import rdkit.Chem.rdFingerprintGenerator
 import rdkit.Chem.Fingerprints.ClusterMols
 import scipy
-mem = joblib.Memory("joblib-cache")
 ENV = "LynxKite Graph Analytics"
 op = ops.op_registration(ENV)

 import rdkit.Chem.Fingerprints.ClusterMols
 import scipy
+mem = joblib.Memory(".joblib-cache")
 ENV = "LynxKite Graph Analytics"
 op = ops.op_registration(ENV)

lynxkite-graph-analytics/src/lynxkite_graph_analytics/bionemo_ops.py CHANGED Viewed

@@ -40,7 +40,7 @@ from bionemo.scdl.io.single_cell_collection import SingleCellCollection
 import scanpy
-mem = joblib.Memory("joblib-cache")
 op = ops.op_registration(core.ENV)
 DATA_PATH = Path("/workspace")

 import scanpy
+mem = joblib.Memory(".joblib-cache")
 op = ops.op_registration(core.ENV)
 DATA_PATH = Path("/workspace")

lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py CHANGED Viewed

@@ -15,7 +15,7 @@ import polars as pl
 import json
-mem = joblib.Memory("joblib-cache")
 op = ops.op_registration(core.ENV)
@@ -87,8 +87,8 @@ def import_parquet(*, filename: str):
     return pd.read_parquet(filename)
-@mem.cache
 @op("Import CSV")
 def import_csv(
     *, filename: str, columns: str = "<from file>", separator: str = "<auto>"
 ):
@@ -102,8 +102,8 @@ def import_csv(
     )
-@mem.cache
 @op("Import GraphML")
 def import_graphml(*, filename: str):
     """Imports a GraphML file."""
     files = fsspec.open_files(filename, compression="infer")
@@ -114,8 +114,8 @@ def import_graphml(*, filename: str):
     raise ValueError(f"No .graphml file found at {filename}")
-@mem.cache
 @op("Graph from OSM")
 def import_osm(*, location: str):
     import osmnx as ox

 import json
+mem = joblib.Memory(".joblib-cache")
 op = ops.op_registration(core.ENV)
     return pd.read_parquet(filename)
 @op("Import CSV")
+@mem.cache
 def import_csv(
     *, filename: str, columns: str = "<from file>", separator: str = "<auto>"
 ):
     )
 @op("Import GraphML")
+@mem.cache
 def import_graphml(*, filename: str):
     """Imports a GraphML file."""
     files = fsspec.open_files(filename, compression="infer")
     raise ValueError(f"No .graphml file found at {filename}")
 @op("Graph from OSM")
+@mem.cache
 def import_osm(*, location: str):
     import osmnx as ox

lynxkite-lynxscribe/README.md CHANGED Viewed

@@ -16,11 +16,16 @@ Run tests with:
 uv run pytest
 ```
-The LLM agent flow examples use local models.
 ```bash
 uv pip install infinity-emb[all]
 infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
 uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
 python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
 ```

 uv run pytest
 ```
+The LLM agent flow examples can use local models.
 ```bash
 uv pip install infinity-emb[all]
 infinity_emb v2 --model-id michaelfeil/bge-small-en-v1.5
 uv pip install "sglang[all]>=0.4.2.post2" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer/
 python -m sglang.launch_server --model-path SultanR/SmolTulu-1.7b-Instruct --port 8080
+export LLM_BASE_URL='http://localhost:8080/v1'
+export LLM_MODEL='SultanR/SmolTulu-1.7b-Instruct'
+export EMBEDDING_BASE_URL='http://localhost:7997/'
+export EMBEDDING_MODEL='michaelfeil/bge-small-en-v1.5'
+lynxkite
 ```

lynxkite-lynxscribe/src/lynxkite_lynxscribe/llm_ops.py CHANGED Viewed

@@ -6,6 +6,7 @@ as an "agentic logic flow". It might just get deleted.
 (This is why the dependencies are left hanging.)
 """
 from lynxkite.core import ops
 import enum
 import jinja2
@@ -20,13 +21,20 @@ LLM_CACHE = {}
 ENV = "LLM logic"
 one_by_one.register(ENV)
 op = ops.op_registration(ENV)
 def chat(*args, **kwargs):
     import openai
-    chat_client = openai.OpenAI(base_url="http://localhost:8080/v1")
-    key = json.dumps({"method": "chat", "args": args, "kwargs": kwargs})
     if key not in LLM_CACHE:
         completion = chat_client.chat.completions.create(*args, **kwargs)
         LLM_CACHE[key] = [c.message.content for c in completion.choices]
@@ -36,8 +44,16 @@ def chat(*args, **kwargs):
 def embedding(*args, **kwargs):
     import openai
-    embedding_client = openai.OpenAI(base_url="http://localhost:7997/")
-    key = json.dumps({"method": "embedding", "args": args, "kwargs": kwargs})
     if key not in LLM_CACHE:
         res = embedding_client.embeddings.create(*args, **kwargs)
         [data] = res.data
@@ -114,8 +130,7 @@ def create_prompt(input, *, save_as="prompt", template: ops.LongStr):
 @op("Ask LLM")
-def ask_llm(input, *, model: str, accepted_regex: str = None, max_tokens: int = 100):
-    assert model, "Please specify the model."
     assert "prompt" in input, "Please create the prompt first."
     options = {}
     if accepted_regex:
@@ -123,7 +138,6 @@ def ask_llm(input, *, model: str, accepted_regex: str = None, max_tokens: int =
             "regex": accepted_regex,
         }
     results = chat(
-        model=model,
         max_tokens=max_tokens,
         messages=[
             {"role": "user", "content": input["prompt"]},
@@ -212,10 +226,9 @@ def rag(
         results = [db[int(r)] for r in results["ids"][0]]
         return {**input, "rag": results, "_collection": collection}
     if engine == RagEngine.Custom:
-        model = "michaelfeil/bge-small-en-v1.5"
         chat = input[input_field]
-        embeddings = [embedding(input=[r[db_field]], model=model) for r in db]
-        q = embedding(input=[chat], model=model)
         def cosine_similarity(a, b):
             return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

 (This is why the dependencies are left hanging.)
 """
+import os
 from lynxkite.core import ops
 import enum
 import jinja2
 ENV = "LLM logic"
 one_by_one.register(ENV)
 op = ops.op_registration(ENV)
+LLM_BASE_URL = os.environ.get("LLM_BASE_URL", None)
+EMBEDDING_BASE_URL = os.environ.get("EMBEDDING_BASE_URL", None)
+LLM_MODEL = os.environ.get("LLM_MODEL", "gpt-4o-mini-2024-07-18")
+EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-small")
 def chat(*args, **kwargs):
     import openai
+    chat_client = openai.OpenAI(base_url=LLM_BASE_URL)
+    kwargs.setdefault("model", LLM_MODEL)
+    key = json.dumps(
+        {"method": "chat", "base_url": LLM_BASE_URL, "args": args, "kwargs": kwargs}
+    )
     if key not in LLM_CACHE:
         completion = chat_client.chat.completions.create(*args, **kwargs)
         LLM_CACHE[key] = [c.message.content for c in completion.choices]
 def embedding(*args, **kwargs):
     import openai
+    embedding_client = openai.OpenAI(base_url=EMBEDDING_BASE_URL)
+    kwargs.setdefault("model", EMBEDDING_MODEL)
+    key = json.dumps(
+        {
+            "method": "embedding",
+            "base_url": EMBEDDING_BASE_URL,
+            "args": args,
+            "kwargs": kwargs,
+        }
+    )
     if key not in LLM_CACHE:
         res = embedding_client.embeddings.create(*args, **kwargs)
         [data] = res.data
 @op("Ask LLM")
+def ask_llm(input, *, accepted_regex: str = None, max_tokens: int = 100):
     assert "prompt" in input, "Please create the prompt first."
     options = {}
     if accepted_regex:
             "regex": accepted_regex,
         }
     results = chat(
         max_tokens=max_tokens,
         messages=[
             {"role": "user", "content": input["prompt"]},
         results = [db[int(r)] for r in results["ids"][0]]
         return {**input, "rag": results, "_collection": collection}
     if engine == RagEngine.Custom:
         chat = input[input_field]
+        embeddings = [embedding(input=[r[db_field]]) for r in db]
+        q = embedding(input=[chat])
         def cosine_similarity(a, b):
             return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))