Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

darabos commited on Sep 10, 2024

Commit

e8a8341

1 Parent(s): 5826642

Split one-by-one executor into separate module.

Browse files

Files changed (2) hide show

server/executors/one_by_one.py +125 -0
server/llm_ops.py +6 -119

server/executors/one_by_one.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from .. import ops
+from .. import workspace
+import fastapi
+import json
+import pandas as pd
+import traceback
+import inspect
+import typing
+class Context(ops.BaseConfig):
+  '''Passed to operation functions as "_ctx" if they have such a parameter.'''
+  node: workspace.WorkspaceNode
+  last_result: typing.Any = None
+class Output(ops.BaseConfig):
+  '''Return this to send values to specific outputs of a node.'''
+  output_handle: str
+  value: dict
+def df_to_list(df):
+  return [dict(zip(df.columns, row)) for row in df.values]
+def has_ctx(op):
+  sig = inspect.signature(op.func)
+  return '_ctx' in sig.parameters
+def register(env: str):
+  '''Registers the one-by-one executor.'''
+  ops.EXECUTORS[env] = execute
+def get_stages(ws, catalog):
+  '''Inputs on top are batch inputs. We decompose the graph into a DAG of components along these edges.'''
+  nodes = {n.id: n for n in ws.nodes}
+  batch_inputs = {}
+  inputs = {}
+  for edge in ws.edges:
+    inputs.setdefault(edge.target, []).append(edge.source)
+    node = nodes[edge.target]
+    op = catalog[node.data.title]
+    i = op.inputs[edge.targetHandle]
+    if i.position == 'top':
+      batch_inputs.setdefault(edge.target, []).append(edge.source)
+  stages = []
+  for bt, bss in batch_inputs.items():
+    upstream = set(bss)
+    new = set(bss)
+    while new:
+      n = new.pop()
+      for i in inputs.get(n, []):
+        if i not in upstream:
+          upstream.add(i)
+          new.add(i)
+    stages.append(upstream)
+  stages.sort(key=lambda s: len(s))
+  stages.append(set(nodes))
+  return stages
+EXECUTOR_OUTPUT_CACHE = {}
+def execute(ws, catalog, cache=None):
+  nodes = {n.id: n for n in ws.nodes}
+  contexts = {n.id: Context(node=n) for n in ws.nodes}
+  edges = {n.id: [] for n in ws.nodes}
+  for e in ws.edges:
+    edges[e.source].append(e)
+  tasks = {}
+  NO_INPUT = object() # Marker for initial tasks.
+  for node in ws.nodes:
+    node.data.error = None
+    op = catalog[node.data.title]
+    # Start tasks for nodes that have no inputs.
+    if not op.inputs:
+      tasks[node.id] = [NO_INPUT]
+  batch_inputs = {}
+  # Run the rest until we run out of tasks.
+  for stage in get_stages(ws, catalog):
+    next_stage = {}
+    while tasks:
+      n, ts = tasks.popitem()
+      if n not in stage:
+        next_stage.setdefault(n, []).extend(ts)
+        continue
+      node = nodes[n]
+      data = node.data
+      op = catalog[data.title]
+      params = {**data.params}
+      if has_ctx(op):
+        params['_ctx'] = contexts[node.id]
+      results = []
+      for task in ts:
+        try:
+          inputs = [
+            batch_inputs[(n, i.name)] if i.position == 'top' else task
+            for i in op.inputs.values()]
+          key = json.dumps(fastapi.encoders.jsonable_encoder((inputs, params)))
+          if cache:
+            if key not in cache:
+              cache[key] = op.func(*inputs, **params)
+            result = cache[key]
+          else:
+            result = op.func(*inputs, **params)
+        except Exception as e:
+          traceback.print_exc()
+          data.error = str(e)
+          break
+        contexts[node.id].last_result = result
+        # Returned lists and DataFrames are considered multiple tasks.
+        if isinstance(result, pd.DataFrame):
+          result = df_to_list(result)
+        elif not isinstance(result, list):
+          result = [result]
+        results.extend(result)
+      else: # Finished all tasks without errors.
+        if op.type == 'visualization' or op.type == 'table_view':
+          data.display = results[0]
+        for edge in edges[node.id]:
+          t = nodes[edge.target]
+          op = catalog[t.data.title]
+          i = op.inputs[edge.targetHandle]
+          if i.position == 'top':
+            batch_inputs.setdefault((edge.target, edge.targetHandle), []).extend(results)
+          else:
+            tasks.setdefault(edge.target, []).extend(results)
+    tasks = next_stage

server/llm_ops.py CHANGED Viewed

@@ -1,33 +1,20 @@
 '''For specifying an LLM agent logic flow.'''
 from . import ops
 import chromadb
-import fastapi.encoders
-import inspect
 import jinja2
 import json
 import openai
 import pandas as pd
-import traceback
-import typing
-from . import workspace
 client = openai.OpenAI(base_url="http://localhost:11434/v1")
 jinja = jinja2.Environment()
 chroma_client = chromadb.Client()
 LLM_CACHE = {}
 ENV = 'LLM logic'
 op = ops.op_registration(ENV)
-class Context(ops.BaseConfig):
-  '''Passed to operation functions as "_ctx" if they have such a parameter.'''
-  node: workspace.WorkspaceNode
-  last_result: typing.Any = None
-class Output(ops.BaseConfig):
-  '''Return this to send values to specific outputs of a node.'''
-  output_handle: str
-  value: dict
 def chat(*args, **kwargs):
   key = json.dumps({'args': args, 'kwargs': kwargs})
   if key not in LLM_CACHE:
@@ -66,7 +53,7 @@ def ask_llm(input, *, model: str, accepted_regex: str = None, max_tokens: int =
   return [{**input, 'response': r} for r in results]
 @op("View", view="table_view")
-def view(input, *, _ctx: Context):
   v = _ctx.last_result
   if v:
     columns = v['dataframes']['df']['columns']
@@ -84,7 +71,7 @@ def view(input, *, _ctx: Context):
 @ops.input_position(input="right")
 @ops.output_position(output="left")
 @op("Loop")
-def loop(input, *, max_iterations: int = 3, _ctx: Context):
   '''Data can flow back here max_iterations-1 times.'''
   key = f'iterations-{_ctx.node.id}'
   input[key] = input.get(key, 0) + 1
@@ -94,11 +81,11 @@ def loop(input, *, max_iterations: int = 3, _ctx: Context):
 @op('Branch', outputs=['true', 'false'])
 def branch(input, *, expression: str):
   res = eval(expression, input)
-  return Output(output_handle=str(bool(res)).lower(), value=input)
 @ops.input_position(db="top")
 @op('RAG')
-def rag(input, db, *, input_field='text', db_field='text', num_matches: int=10, _ctx: Context):
   last = _ctx.last_result
   if last:
     collection = last['_collection']
@@ -127,104 +114,4 @@ def run_python(input, *, template: str):
     p = p.replace(k.upper(), str(v))
   return p
-EXECUTOR_OUTPUT_CACHE = {}
-@ops.register_executor(ENV)
-def execute(ws):
-  catalog = ops.CATALOGS[ENV]
-  nodes = {n.id: n for n in ws.nodes}
-  contexts = {n.id: Context(node=n) for n in ws.nodes}
-  edges = {n.id: [] for n in ws.nodes}
-  for e in ws.edges:
-    edges[e.source].append(e)
-  tasks = {}
-  NO_INPUT = object() # Marker for initial tasks.
-  for node in ws.nodes:
-    node.data.error = None
-    op = catalog[node.data.title]
-    # Start tasks for nodes that have no inputs.
-    if not op.inputs:
-      tasks[node.id] = [NO_INPUT]
-  batch_inputs = {}
-  # Run the rest until we run out of tasks.
-  for stage in get_stages(ws):
-    next_stage = {}
-    while tasks:
-      n, ts = tasks.popitem()
-      if n not in stage:
-        next_stage.setdefault(n, []).extend(ts)
-        continue
-      node = nodes[n]
-      data = node.data
-      op = catalog[data.title]
-      params = {**data.params}
-      if has_ctx(op):
-        params['_ctx'] = contexts[node.id]
-      results = []
-      for task in ts:
-        try:
-          inputs = [
-            batch_inputs[(n, i.name)] if i.position == 'top' else task
-            for i in op.inputs.values()]
-          key = json.dumps(fastapi.encoders.jsonable_encoder((inputs, params)))
-          if key not in EXECUTOR_OUTPUT_CACHE:
-            EXECUTOR_OUTPUT_CACHE[key] = op.func(*inputs, **params)
-          result = EXECUTOR_OUTPUT_CACHE[key]
-        except Exception as e:
-          traceback.print_exc()
-          data.error = str(e)
-          break
-        contexts[node.id].last_result = result
-        # Returned lists and DataFrames are considered multiple tasks.
-        if isinstance(result, pd.DataFrame):
-          result = df_to_list(result)
-        elif not isinstance(result, list):
-          result = [result]
-        results.extend(result)
-      else: # Finished all tasks without errors.
-        if op.type == 'visualization' or op.type == 'table_view':
-          data.display = results[0]
-        for edge in edges[node.id]:
-          t = nodes[edge.target]
-          op = catalog[t.data.title]
-          i = op.inputs[edge.targetHandle]
-          if i.position == 'top':
-            batch_inputs.setdefault((edge.target, edge.targetHandle), []).extend(results)
-          else:
-            tasks.setdefault(edge.target, []).extend(results)
-    tasks = next_stage
-def df_to_list(df):
-  return [dict(zip(df.columns, row)) for row in df.values]
-def has_ctx(op):
-  sig = inspect.signature(op.func)
-  return '_ctx' in sig.parameters
-def get_stages(ws):
-  '''Inputs on top are batch inputs. We decompose the graph into a DAG of components along these edges.'''
-  catalog = ops.CATALOGS[ENV]
-  nodes = {n.id: n for n in ws.nodes}
-  batch_inputs = {}
-  inputs = {}
-  for edge in ws.edges:
-    inputs.setdefault(edge.target, []).append(edge.source)
-    node = nodes[edge.target]
-    op = catalog[node.data.title]
-    i = op.inputs[edge.targetHandle]
-    if i.position == 'top':
-      batch_inputs.setdefault(edge.target, []).append(edge.source)
-  stages = []
-  for bt, bss in batch_inputs.items():
-    upstream = set(bss)
-    new = set(bss)
-    while new:
-      n = new.pop()
-      for i in inputs.get(n, []):
-        if i not in upstream:
-          upstream.add(i)
-          new.add(i)
-    stages.append(upstream)
-  stages.sort(key=lambda s: len(s))
-  stages.append(set(nodes))
-  return stages

 '''For specifying an LLM agent logic flow.'''
 from . import ops
 import chromadb
 import jinja2
 import json
 import openai
 import pandas as pd
+from .executors import one_by_one
 client = openai.OpenAI(base_url="http://localhost:11434/v1")
 jinja = jinja2.Environment()
 chroma_client = chromadb.Client()
 LLM_CACHE = {}
 ENV = 'LLM logic'
+one_by_one.register(ENV)
 op = ops.op_registration(ENV)
 def chat(*args, **kwargs):
   key = json.dumps({'args': args, 'kwargs': kwargs})
   if key not in LLM_CACHE:
   return [{**input, 'response': r} for r in results]
 @op("View", view="table_view")
+def view(input, *, _ctx: one_by_one.Context):
   v = _ctx.last_result
   if v:
     columns = v['dataframes']['df']['columns']
 @ops.input_position(input="right")
 @ops.output_position(output="left")
 @op("Loop")
+def loop(input, *, max_iterations: int = 3, _ctx: one_by_one.Context):
   '''Data can flow back here max_iterations-1 times.'''
   key = f'iterations-{_ctx.node.id}'
   input[key] = input.get(key, 0) + 1
 @op('Branch', outputs=['true', 'false'])
 def branch(input, *, expression: str):
   res = eval(expression, input)
+  return one_by_one.Output(output_handle=str(bool(res)).lower(), value=input)
 @ops.input_position(db="top")
 @op('RAG')
+def rag(input, db, *, input_field='text', db_field='text', num_matches: int=10, _ctx: one_by_one.Context):
   last = _ctx.last_result
   if last:
     collection = last['_collection']
     p = p.replace(k.upper(), str(v))
   return p