Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

darabos commited on Sep 11, 2024

Commit

eda8f97

1 Parent(s): 03a6805

Retrieval in RAG demo working.

Browse files

Files changed (5) hide show

data/Graph RAG +41 -19
server/executors/one_by_one.py +4 -3
server/llm_ops.py +8 -8
server/test_llm_ops.py +31 -6
web/src/NodeWithTableView.svelte +13 -2

data/Graph RAG CHANGED Viewed

@@ -56,10 +56,7 @@
               ],
               "data": [
                 [
-                  "**Date**: September 11, 2024\n**Prepared by**: Operations Team\n**Document Type**: Pricing and Menu Structure Overview"
-                ],
-                [
-                  "---"
                 ],
                 [
                   "### 1. **Overview**"
@@ -345,8 +342,8 @@
         }
       },
       "position": {
-        "x": -87.76278050287166,
-        "y": -166.8483642187531
       },
       "parentId": null
     },
@@ -357,7 +354,7 @@
         "title": "Add neighbors",
         "params": {},
         "display": null,
-        "error": "('Add neighbors 1', 'edges')",
         "meta": {
           "name": "Add neighbors",
           "params": {},
@@ -395,11 +392,12 @@
           },
           "type": "basic",
           "sub_nodes": null
-        }
       },
       "position": {
-        "x": -128.04108014008284,
-        "y": 132.7214893330056
       },
       "parentId": null
     },
@@ -454,7 +452,7 @@
         "title": "Create prompt",
         "params": {
           "save_as": "prompt",
-          "template": null
         },
         "display": null,
         "error": null,
@@ -499,8 +497,8 @@
         }
       },
       "position": {
-        "x": 208.45977258554205,
-        "y": 97.14836709561501
       },
       "parentId": null
     },
@@ -510,7 +508,28 @@
       "data": {
         "title": "View",
         "params": {},
-        "display": null,
         "error": null,
         "meta": {
           "name": "View",
@@ -527,13 +546,16 @@
           "outputs": {},
           "type": "table_view",
           "sub_nodes": null
-        }
       },
       "position": {
-        "x": 605.0792427265845,
-        "y": 293.3800944106007
       },
-      "parentId": null
     },
     {
       "id": "RAG 1",
@@ -613,7 +635,7 @@
         }
       },
       "position": {
-        "x": -481.37046820744615,
         "y": 56.69054032617606
       },
       "parentId": null

               ],
               "data": [
                 [
+                  "hello"
                 ],
                 [
                   "### 1. **Overview**"
         }
       },
       "position": {
+        "x": -92.52685728742009,
+        "y": -177.9645433826994
       },
       "parentId": null
     },
         "title": "Add neighbors",
         "params": {},
         "display": null,
+        "error": null,
         "meta": {
           "name": "Add neighbors",
           "params": {},
           },
           "type": "basic",
           "sub_nodes": null
+        },
+        "collapsed": false
       },
       "position": {
+        "x": -113.7488497864376,
+        "y": 145.42569409180135
       },
       "parentId": null
     },
         "title": "Create prompt",
         "params": {
           "save_as": "prompt",
+          "template": "{text}"
         },
         "display": null,
         "error": null,
         }
       },
       "position": {
+        "x": 324.81988008998496,
+        "y": -9.071826950189632
       },
       "parentId": null
     },
       "data": {
         "title": "View",
         "params": {},
+        "display": {
+          "dataframes": {
+            "df": {
+              "columns": [
+                "text",
+                "rag",
+                "prompt"
+              ],
+              "data": [
+                [
+                  "What's your cheapest drink?",
+                  [
+                    "### 6. **Drinks**",
+                    "| Size               | Price         |\n|--------------------|---------------|\n| 20 oz Bottle       | $1.99         |\n| 2-Liter Bottle     | $3.50         |",
+                    "Available options: Coke, Diet Coke, Sprite, Root Beer, Lemonade."
+                  ],
+                  "{text}"
+                ]
+              ]
+            }
+          }
+        },
         "error": null,
         "meta": {
           "name": "View",
           "outputs": {},
           "type": "table_view",
           "sub_nodes": null
+        },
+        "beingResized": false
       },
       "position": {
+        "x": 659.7852850905575,
+        "y": -41.48719521129472
       },
+      "parentId": null,
+      "width": 492,
+      "height": 391
     },
     {
       "id": "RAG 1",
         }
       },
       "position": {
+        "x": -449.6099563104567,
         "y": 56.69054032617606
       },
       "parentId": null

server/executors/one_by_one.py CHANGED Viewed

@@ -76,12 +76,13 @@ def execute(ws, catalog, cache=None):
   for node in ws.nodes:
     node.data.error = None
     op = catalog[node.data.title]
-    # Start tasks for nodes that have no inputs.
-    if not op.inputs:
       tasks[node.id] = [NO_INPUT]
   batch_inputs = {}
   # Run the rest until we run out of tasks.
-  for stage in get_stages(ws, catalog):
     next_stage = {}
     while tasks:
       n, ts = tasks.popitem()

   for node in ws.nodes:
     node.data.error = None
     op = catalog[node.data.title]
+    # Start tasks for nodes that have no non-batch inputs.
+    if all([i.position == 'top' for i in op.inputs.values()]):
       tasks[node.id] = [NO_INPUT]
   batch_inputs = {}
   # Run the rest until we run out of tasks.
+  stages = get_stages(ws, catalog)
+  for stage in stages:
     next_stage = {}
     while tasks:
       n, ts = tasks.popitem()

server/llm_ops.py CHANGED Viewed

@@ -54,32 +54,32 @@ def split_document(input, *, delimiter: str = '\\n\\n'):
 @ops.input_position(input="top")
 @op("Build document graph")
 def build_document_graph(input):
-  chunks = input['text']
-  return pd.DataFrame([{'source': i, 'target': i+1} for i in range(len(chunks)-1)]),
 @ops.input_position(nodes="top", edges="top")
 @op("Predict links")
 def predict_links(nodes, edges):
   '''A placeholder for a real algorithm. For now just adds 2-hop neighbors.'''
-  edges = edges.to_dict(orient='records')
   edge_map = {} # Source -> [Targets]
   for edge in edges:
     edge_map.setdefault(edge['source'], [])
     edge_map[edge['source']].append(edge['target'])
   new_edges = []
-  for source, target in edges.items():
-    for t in edge_map.get(target, []):
-      new_edges.append({'source': source, 'target': t})
-  return pd.DataFrame(edges.append(new_edges))
 @ops.input_position(nodes="top", edges="top")
 @op("Add neighbors")
 def add_neighbors(nodes, edges, item):
   matches = item['rag']
   additional_matches = []
   for m in matches:
     node = nodes[nodes['text'] == m].index[0]
-    neighbors = edges[edges['source'] == node]['target']
     additional_matches.extend(nodes.loc[neighbors, 'text'])
   return {**item, 'rag': matches + additional_matches}

 @ops.input_position(input="top")
 @op("Build document graph")
 def build_document_graph(input):
+  return [{'source': i, 'target': i+1} for i in range(len(input)-1)]
 @ops.input_position(nodes="top", edges="top")
 @op("Predict links")
 def predict_links(nodes, edges):
   '''A placeholder for a real algorithm. For now just adds 2-hop neighbors.'''
   edge_map = {} # Source -> [Targets]
   for edge in edges:
     edge_map.setdefault(edge['source'], [])
     edge_map[edge['source']].append(edge['target'])
   new_edges = []
+  for edge in edges:
+    for t in edge_map.get(edge['target'], []):
+      new_edges.append({'source': edge['source'], 'target': t})
+  return edges + new_edges
 @ops.input_position(nodes="top", edges="top")
 @op("Add neighbors")
 def add_neighbors(nodes, edges, item):
+  nodes = pd.DataFrame(nodes)
+  edges = pd.DataFrame(edges)
   matches = item['rag']
   additional_matches = []
   for m in matches:
     node = nodes[nodes['text'] == m].index[0]
+    neighbors = edges[edges['source'] == node]['target'].to_list()
     additional_matches.extend(nodes.loc[neighbors, 'text'])
   return {**item, 'rag': matches + additional_matches}

server/test_llm_ops.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import unittest
 from . import llm_ops
 from . import workspace
 def make_node(id, op, type='basic', **params):
@@ -11,7 +13,7 @@ def make_node(id, op, type='basic', **params):
   )
 def make_input(id):
   return make_node(
-    id, 'Input',
     filename='/Users/danieldarabos/Downloads/aimo-train.csv',
     key='problem')
 def make_edge(source, target, targetHandle='input'):
@@ -22,7 +24,7 @@ class LLMOpsTest(unittest.TestCase):
   def testExecute(self):
     ws = workspace.Workspace(env='LLM logic', nodes=[
       make_node(
-        '0', 'Input',
         filename='/Users/danieldarabos/Downloads/aimo-train.csv',
         key='problem'),
       make_node(
@@ -30,8 +32,9 @@ class LLMOpsTest(unittest.TestCase):
     ], edges=[
       make_edge('0', '1')
     ])
-    llm_ops.execute(ws)
-    self.assertEqual('', ws.nodes[1].data.display)
   def testStages(self):
     ws = workspace.Workspace(env='LLM logic', nodes=[
@@ -43,8 +46,30 @@ class LLMOpsTest(unittest.TestCase):
       make_edge('rag1', 'p1'), make_edge('p1', 'rag2', 'db'),
       make_edge('in3', 'p2'), make_edge('p3', 'rag2'),
     ])
-    stages = llm_ops.get_stages(ws)
-    self.assertEqual('', stages)
 if __name__ == '__main__':
   unittest.main()

 import unittest
+from . import ops
 from . import llm_ops
+from .executors import one_by_one
 from . import workspace
 def make_node(id, op, type='basic', **params):
   )
 def make_input(id):
   return make_node(
+    id, 'Input CSV',
     filename='/Users/danieldarabos/Downloads/aimo-train.csv',
     key='problem')
 def make_edge(source, target, targetHandle='input'):
   def testExecute(self):
     ws = workspace.Workspace(env='LLM logic', nodes=[
       make_node(
+        '0', 'Input CSV',
         filename='/Users/danieldarabos/Downloads/aimo-train.csv',
         key='problem'),
       make_node(
     ], edges=[
       make_edge('0', '1')
     ])
+    catalog = ops.CATALOGS[ws.env]
+    one_by_one.execute(ws, catalog)
+    # self.assertEqual('', ws.nodes[1].data.display)
   def testStages(self):
     ws = workspace.Workspace(env='LLM logic', nodes=[
       make_edge('rag1', 'p1'), make_edge('p1', 'rag2', 'db'),
       make_edge('in3', 'p2'), make_edge('p3', 'rag2'),
     ])
+    catalog = ops.CATALOGS[ws.env]
+    stages = one_by_one.get_stages(ws, catalog)
+    print(stages)
+    # self.assertEqual('', stages)
+  def testStagesMultiInput(self):
+    ws = workspace.Workspace(env='LLM logic', nodes=[
+      make_node('doc', 'Input document'),
+      make_node('split', 'Split document'),
+      make_node('graph', 'Build document graph'),
+      make_node('chat', 'Input chat'),
+      make_node('rag', 'RAG'),
+      make_node('neighbors', 'Add neighbors'),
+    ], edges=[
+      make_edge('doc', 'split'), make_edge('split', 'graph'),
+      make_edge('split', 'rag', 'db'), make_edge('chat', 'rag', 'input'),
+      make_edge('split', 'neighbors', 'nodes'),
+      make_edge('graph', 'neighbors', 'edges'),
+      make_edge('rag', 'neighbors', 'item'),
+    ])
+    catalog = ops.CATALOGS[ws.env]
+    stages = one_by_one.get_stages(ws, catalog)
+    print(stages)
+    # self.assertEqual('', stages)
 if __name__ == '__main__':
   unittest.main()

web/src/NodeWithTableView.svelte CHANGED Viewed

@@ -1,6 +1,5 @@
 <script lang="ts">
   import { type NodeProps } from '@xyflow/svelte';
-  import { Tabulator } from 'tabulator-tables';
   import LynxKiteNode from './LynxKiteNode.svelte';
   import Table from './Table.svelte';
   type $$Props = NodeProps;
@@ -14,7 +13,16 @@
     {#each Object.entries(data.display.dataframes || {}) as [name, df]}
       {#if !single}<div class="df-head" on:click={() => open[name] = !open[name]}>{name}</div>{/if}
       {#if single || open[name]}
-        <Table columns={df.columns} data={df.data} />
       {/if}
     {/each}
     {#each Object.entries(data.display.others || {}) as [name, o]}
@@ -35,4 +43,7 @@
   table {
     table-layout: fixed;
   }
 </style>

 <script lang="ts">
   import { type NodeProps } from '@xyflow/svelte';
   import LynxKiteNode from './LynxKiteNode.svelte';
   import Table from './Table.svelte';
   type $$Props = NodeProps;
     {#each Object.entries(data.display.dataframes || {}) as [name, df]}
       {#if !single}<div class="df-head" on:click={() => open[name] = !open[name]}>{name}</div>{/if}
       {#if single || open[name]}
+        {#if df.data.length > 1}
+          <Table columns={df.columns} data={df.data} />
+        {:else}
+          <dl>
+          {#each df.columns as c, i}
+            <dt>{c}</dt>
+            <dd>{df.data[0][i]}</dd>
+          {/each}
+          </dl>
+        {/if}
       {/if}
     {/each}
     {#each Object.entries(data.display.others || {}) as [name, o]}
   table {
     table-layout: fixed;
   }
+  dl {
+    margin: 10px;
+  }
 </style>