Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

darabos commited on May 20

Commit

3da975d

1 Parent(s): e05c3b0

Start/stop containers on Kubernetes.

Browse files

Files changed (3) hide show

lynxkite-bio/src/lynxkite_bio/__init__.py +1 -0
lynxkite-bio/src/lynxkite_bio/k8s.py +272 -0
lynxkite-bio/src/lynxkite_bio/llm.py +55 -0

lynxkite-bio/src/lynxkite_bio/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """An expansion for `lynxkite-graph-analytics` that provides algorithms for biological applications."""
 from . import nims  # noqa (imported to trigger registration)
 from . import rdkit  # noqa (imported to trigger registration)

 """An expansion for `lynxkite-graph-analytics` that provides algorithms for biological applications."""
+from . import llm  # noqa (imported to trigger registration)
 from . import nims  # noqa (imported to trigger registration)
 from . import rdkit  # noqa (imported to trigger registration)

lynxkite-bio/src/lynxkite_bio/k8s.py ADDED Viewed

	@@ -0,0 +1,272 @@

+"""Tools for starting and stopping Docker containers on Kubernetes.
+A test setup for this feature:
+```bash
+# Start minikube with GPU support:
+minikube start --driver docker --container-runtime docker --gpus all
+# Make the services accessible:
+minikube tunnel
+```
+Use `k8s.needs()` to declare a Kubernetes dependency for an operation. For example:
+```python
+@op("Ask LLM", slow=True)
+@k8s.needs(
+    name="vllm-for-ask-llm-op",
+    image="vllm/vllm-openai:latest",
+    port=8000,
+    args=["--model", "google/gemma-3-1b-it"],
+    health_probe="/health",
+    env=k8s.env_vars("HUGGING_FACE_HUB_TOKEN"),
+    storage_path="/root/.cache/huggingface",
+    storage_size="10Gi",
+)
+def ask_llm(df: pd.DataFrame, *, question: ops.LongStr):
+    ip = k8s.get_ip("vllm-for-ask-llm-op")
+    client = openai.OpenAI(api_key="EMPTY", base_url=f"http://{ip}/v1")
+    # ...
+```
+"""
+import functools
+import os
+import queue
+import threading
+import time
+import httpx
+from kubernetes import client, config
+from kubernetes.client.rest import ApiException
+config.load_kube_config()
+def _run(
+    *,
+    name,
+    image,
+    port,
+    namespace,
+    storage_size,
+    storage_path,
+    health_probe,
+    **kwargs,
+):
+    print(f"Starting {name} in namespace {namespace}...")
+    volume_mounts = []
+    volumes = []
+    if storage_size:
+        pvc_name = f"{name}-data-volume"
+        if not _pvc_exists(pvc_name, namespace):
+            _create_pvc(pvc_name, size=storage_size, namespace=namespace)
+        volume_mounts.append(
+            client.V1VolumeMount(
+                name=pvc_name,
+                mount_path=storage_path,
+            )
+        )
+        volumes.append(
+            client.V1Volume(
+                name=pvc_name,
+                persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(
+                    claim_name=pvc_name,
+                ),
+            )
+        )
+    container = client.V1Container(
+        name=name,
+        image=image,
+        ports=[client.V1ContainerPort(container_port=port)],
+        volume_mounts=volume_mounts,
+        **kwargs,
+    )
+    if health_probe:
+        container.readiness_probe = client.V1Probe(
+            http_get=client.V1HTTPGetAction(path=health_probe, port=port, scheme="HTTP"),
+        )
+    deployment = client.V1Deployment(
+        metadata=client.V1ObjectMeta(name=name),
+        spec=client.V1DeploymentSpec(
+            replicas=1,
+            selector=client.V1LabelSelector(match_labels={"app": name}),
+            template=client.V1PodTemplateSpec(
+                metadata=client.V1ObjectMeta(labels={"app": name}),
+                spec=client.V1PodSpec(
+                    volumes=volumes,
+                    containers=[container],
+                ),
+            ),
+        ),
+    )
+    apps_v1 = client.AppsV1Api()
+    apps_v1.create_namespaced_deployment(namespace=namespace, body=deployment)
+    service_name = f"{name}-service"
+    service = client.V1Service(
+        metadata=client.V1ObjectMeta(name=service_name, labels={"app": name}),
+        spec=client.V1ServiceSpec(
+            selector={"app": name},
+            ports=[client.V1ServicePort(protocol="TCP", port=80, target_port=port)],
+            type="LoadBalancer",
+        ),
+    )
+    core_v1 = client.CoreV1Api()
+    core_v1.create_namespaced_service(namespace=namespace, body=service)
+def _stop(name, namespace="default"):
+    print(f"Stopping {name} in namespace {namespace}...")
+    apps_v1 = client.AppsV1Api()
+    apps_v1.delete_namespaced_deployment(name, namespace)
+    service_name = f"{name}-service"
+    core_v1 = client.CoreV1Api()
+    core_v1.delete_namespaced_service(service_name, namespace)
+def get_ip(name: str, namespace: str = "default", timeout: int = 3600, interval: int = 1) -> str:
+    """Look up the IP address where the operation can access the service."""
+    service_name = f"{name}-service"
+    core_v1 = client.CoreV1Api()
+    end_time = time.time() + timeout
+    while time.time() < end_time:
+        try:
+            svc = core_v1.read_namespaced_service(service_name, namespace)
+            ingress = svc.status.load_balancer.ingress
+            if ingress:
+                ip = ingress[0].ip or ingress[0].hostname
+                if ip:
+                    if _can_connect(ip):
+                        return ip
+        except ApiException as e:
+            if e.status != 404:
+                raise
+        time.sleep(interval)
+    raise TimeoutError(f"Timed out waiting for external IP of service '{service_name}'")
+def _can_connect(ip: str) -> bool:
+    try:
+        httpx.get(f"http://{ip}/")
+        return True
+    except httpx.RequestError:
+        return False
+def _is_running(name: str, namespace: str = "default") -> bool:
+    apps_v1 = client.AppsV1Api()
+    try:
+        apps_v1.read_namespaced_deployment(name, namespace)
+        return True
+    except ApiException as e:
+        if e.status == 404:
+            return False
+        else:
+            raise
+def _stop_if_running(name, namespace="default"):
+    if _is_running(name, namespace):
+        _stop(name, namespace)
+def _create_pvc(name, size="1Gi", namespace="default"):
+    core_v1 = client.CoreV1Api()
+    pvc = client.V1PersistentVolumeClaim(
+        metadata=client.V1ObjectMeta(name=name),
+        spec=client.V1PersistentVolumeClaimSpec(
+            access_modes=["ReadWriteOnce"],
+            resources=client.V1ResourceRequirements(requests={"storage": size}),
+        ),
+    )
+    core_v1.create_namespaced_persistent_volume_claim(namespace=namespace, body=pvc)
+def _pvc_exists(name: str, namespace: str = "default") -> bool:
+    core_v1 = client.CoreV1Api()
+    try:
+        core_v1.read_namespaced_persistent_volume_claim(name=name, namespace=namespace)
+        return True
+    except ApiException as e:
+        if e.status == 404:
+            return False
+        else:
+            raise
+def env_vars(*names: str):
+    """A convenient way to pass local environment variables to the microservice."""
+    return [{"name": name, "value": os.environ[name]} for name in names]
+def needs(
+    name: str,
+    image: str,
+    port: int,
+    args: list = None,
+    env: list = None,
+    health_probe: str = None,
+    storage_size: str = None,
+    storage_path: str = "/data",
+    namespace: str = "default",
+):
+    """Use this decorator to configure a microservice that the operation depends on.
+    LynxKite will manage the lifecycle of the microservice for you.
+    """
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*func_args, **func_kwargs):
+            _using(
+                name=name,
+                image=image,
+                port=port,
+                args=args or [],
+                env=env or [],
+                health_probe=health_probe,
+                storage_size=storage_size,
+                storage_path=storage_path,
+                namespace=namespace,
+            )
+            try:
+                return func(*func_args, **func_kwargs)
+            finally:
+                _stop_using(name, namespace)
+        return wrapper
+    return decorator
+_USER_COUNTERS = {}
+def _using(name, **kwargs):
+    q = _USER_COUNTERS.setdefault(name, queue.Queue(-1))
+    q.put(1)
+    try:
+        if not _is_running(name):
+            _run(name=name, **kwargs)
+    except Exception as e:
+        q.get()
+        raise e
+def _stop_using(name, namespace):
+    q = _USER_COUNTERS[name]
+    q.get()
+    if q.empty():
+        _stop_later(name, namespace)
+def _stop_later(name, namespace):
+    q = _USER_COUNTERS[name]
+    def stop():
+        time.sleep(6000)
+        if q.empty():
+            # Nobody started the service in the meantime.
+            _stop(name, namespace)
+    t = threading.Thread(target=stop)
+    t.start()

lynxkite-bio/src/lynxkite_bio/llm.py ADDED Viewed

	@@ -0,0 +1,55 @@

+"""LLM on Kubernetes, for use in bio projects or otherwise.
+The BioNeMo NIMs are large. This module provides a small LLM that can be used
+for demonstrating the Kubernetes lifecycle management without huge hardware
+requirements.
+"""
+import openai
+import pandas as pd
+from lynxkite.core import ops
+from . import k8s
+ENV = "LynxKite Graph Analytics"
+op = ops.op_registration(ENV)
+@op("Ask LLM", slow=True)
+@k8s.needs(
+    name="lynxkite-bio-small-llm",
+    image="vllm/vllm-openai:latest",
+    port=8000,
+    args=["--model", "google/gemma-3-1b-it"],
+    health_probe="/health",
+    env=k8s.env_vars("HUGGING_FACE_HUB_TOKEN"),
+    storage_path="/root/.cache/huggingface",
+    storage_size="10Gi",
+)
+def ask_llm(df: pd.DataFrame, *, question: ops.LongStr, include_columns="<all>"):
+    if not question:
+        return df
+    ip = k8s.get_ip("lynxkite-bio-small-llm")
+    print(f"LLM is running at {ip}")
+    client = openai.OpenAI(api_key="EMPTY", base_url=f"http://{ip}/v1")
+    responses = []
+    for row in df.iterrows():
+        data = row[1].to_dict()
+        if include_columns != "<all>":
+            data = {k: v for k, v in data.items() if k in include_columns}
+        prompt = (
+            f"Answer the question based on the following data:\n\n{data}\n\nQuestion: {question}"
+        )
+        response = client.chat.completions.create(
+            model="google/gemma-3-1b-it",
+            messages=[
+                {
+                    "role": "user",
+                    "content": prompt,
+                },
+            ],
+        )
+        responses.append(response.choices[0].message.content)
+    df = df.copy()
+    df["response"] = responses
+    return df