Spaces:
Running
Running
Retrieval in RAG demo working.
Browse files- data/Graph RAG +41 -19
- server/executors/one_by_one.py +4 -3
- server/llm_ops.py +8 -8
- server/test_llm_ops.py +31 -6
- web/src/NodeWithTableView.svelte +13 -2
data/Graph RAG
CHANGED
@@ -56,10 +56,7 @@
|
|
56 |
],
|
57 |
"data": [
|
58 |
[
|
59 |
-
"
|
60 |
-
],
|
61 |
-
[
|
62 |
-
"---"
|
63 |
],
|
64 |
[
|
65 |
"### 1. **Overview**"
|
@@ -345,8 +342,8 @@
|
|
345 |
}
|
346 |
},
|
347 |
"position": {
|
348 |
-
"x": -
|
349 |
-
"y": -
|
350 |
},
|
351 |
"parentId": null
|
352 |
},
|
@@ -357,7 +354,7 @@
|
|
357 |
"title": "Add neighbors",
|
358 |
"params": {},
|
359 |
"display": null,
|
360 |
-
"error":
|
361 |
"meta": {
|
362 |
"name": "Add neighbors",
|
363 |
"params": {},
|
@@ -395,11 +392,12 @@
|
|
395 |
},
|
396 |
"type": "basic",
|
397 |
"sub_nodes": null
|
398 |
-
}
|
|
|
399 |
},
|
400 |
"position": {
|
401 |
-
"x": -
|
402 |
-
"y":
|
403 |
},
|
404 |
"parentId": null
|
405 |
},
|
@@ -454,7 +452,7 @@
|
|
454 |
"title": "Create prompt",
|
455 |
"params": {
|
456 |
"save_as": "prompt",
|
457 |
-
"template":
|
458 |
},
|
459 |
"display": null,
|
460 |
"error": null,
|
@@ -499,8 +497,8 @@
|
|
499 |
}
|
500 |
},
|
501 |
"position": {
|
502 |
-
"x":
|
503 |
-
"y":
|
504 |
},
|
505 |
"parentId": null
|
506 |
},
|
@@ -510,7 +508,28 @@
|
|
510 |
"data": {
|
511 |
"title": "View",
|
512 |
"params": {},
|
513 |
-
"display":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
514 |
"error": null,
|
515 |
"meta": {
|
516 |
"name": "View",
|
@@ -527,13 +546,16 @@
|
|
527 |
"outputs": {},
|
528 |
"type": "table_view",
|
529 |
"sub_nodes": null
|
530 |
-
}
|
|
|
531 |
},
|
532 |
"position": {
|
533 |
-
"x":
|
534 |
-
"y":
|
535 |
},
|
536 |
-
"parentId": null
|
|
|
|
|
537 |
},
|
538 |
{
|
539 |
"id": "RAG 1",
|
@@ -613,7 +635,7 @@
|
|
613 |
}
|
614 |
},
|
615 |
"position": {
|
616 |
-
"x": -
|
617 |
"y": 56.69054032617606
|
618 |
},
|
619 |
"parentId": null
|
|
|
56 |
],
|
57 |
"data": [
|
58 |
[
|
59 |
+
"hello"
|
|
|
|
|
|
|
60 |
],
|
61 |
[
|
62 |
"### 1. **Overview**"
|
|
|
342 |
}
|
343 |
},
|
344 |
"position": {
|
345 |
+
"x": -92.52685728742009,
|
346 |
+
"y": -177.9645433826994
|
347 |
},
|
348 |
"parentId": null
|
349 |
},
|
|
|
354 |
"title": "Add neighbors",
|
355 |
"params": {},
|
356 |
"display": null,
|
357 |
+
"error": null,
|
358 |
"meta": {
|
359 |
"name": "Add neighbors",
|
360 |
"params": {},
|
|
|
392 |
},
|
393 |
"type": "basic",
|
394 |
"sub_nodes": null
|
395 |
+
},
|
396 |
+
"collapsed": false
|
397 |
},
|
398 |
"position": {
|
399 |
+
"x": -113.7488497864376,
|
400 |
+
"y": 145.42569409180135
|
401 |
},
|
402 |
"parentId": null
|
403 |
},
|
|
|
452 |
"title": "Create prompt",
|
453 |
"params": {
|
454 |
"save_as": "prompt",
|
455 |
+
"template": "{text}"
|
456 |
},
|
457 |
"display": null,
|
458 |
"error": null,
|
|
|
497 |
}
|
498 |
},
|
499 |
"position": {
|
500 |
+
"x": 324.81988008998496,
|
501 |
+
"y": -9.071826950189632
|
502 |
},
|
503 |
"parentId": null
|
504 |
},
|
|
|
508 |
"data": {
|
509 |
"title": "View",
|
510 |
"params": {},
|
511 |
+
"display": {
|
512 |
+
"dataframes": {
|
513 |
+
"df": {
|
514 |
+
"columns": [
|
515 |
+
"text",
|
516 |
+
"rag",
|
517 |
+
"prompt"
|
518 |
+
],
|
519 |
+
"data": [
|
520 |
+
[
|
521 |
+
"What's your cheapest drink?",
|
522 |
+
[
|
523 |
+
"### 6. **Drinks**",
|
524 |
+
"| Size | Price |\n|--------------------|---------------|\n| 20 oz Bottle | $1.99 |\n| 2-Liter Bottle | $3.50 |",
|
525 |
+
"Available options: Coke, Diet Coke, Sprite, Root Beer, Lemonade."
|
526 |
+
],
|
527 |
+
"{text}"
|
528 |
+
]
|
529 |
+
]
|
530 |
+
}
|
531 |
+
}
|
532 |
+
},
|
533 |
"error": null,
|
534 |
"meta": {
|
535 |
"name": "View",
|
|
|
546 |
"outputs": {},
|
547 |
"type": "table_view",
|
548 |
"sub_nodes": null
|
549 |
+
},
|
550 |
+
"beingResized": false
|
551 |
},
|
552 |
"position": {
|
553 |
+
"x": 659.7852850905575,
|
554 |
+
"y": -41.48719521129472
|
555 |
},
|
556 |
+
"parentId": null,
|
557 |
+
"width": 492,
|
558 |
+
"height": 391
|
559 |
},
|
560 |
{
|
561 |
"id": "RAG 1",
|
|
|
635 |
}
|
636 |
},
|
637 |
"position": {
|
638 |
+
"x": -449.6099563104567,
|
639 |
"y": 56.69054032617606
|
640 |
},
|
641 |
"parentId": null
|
server/executors/one_by_one.py
CHANGED
@@ -76,12 +76,13 @@ def execute(ws, catalog, cache=None):
|
|
76 |
for node in ws.nodes:
|
77 |
node.data.error = None
|
78 |
op = catalog[node.data.title]
|
79 |
-
# Start tasks for nodes that have no inputs.
|
80 |
-
if
|
81 |
tasks[node.id] = [NO_INPUT]
|
82 |
batch_inputs = {}
|
83 |
# Run the rest until we run out of tasks.
|
84 |
-
|
|
|
85 |
next_stage = {}
|
86 |
while tasks:
|
87 |
n, ts = tasks.popitem()
|
|
|
76 |
for node in ws.nodes:
|
77 |
node.data.error = None
|
78 |
op = catalog[node.data.title]
|
79 |
+
# Start tasks for nodes that have no non-batch inputs.
|
80 |
+
if all([i.position == 'top' for i in op.inputs.values()]):
|
81 |
tasks[node.id] = [NO_INPUT]
|
82 |
batch_inputs = {}
|
83 |
# Run the rest until we run out of tasks.
|
84 |
+
stages = get_stages(ws, catalog)
|
85 |
+
for stage in stages:
|
86 |
next_stage = {}
|
87 |
while tasks:
|
88 |
n, ts = tasks.popitem()
|
server/llm_ops.py
CHANGED
@@ -54,32 +54,32 @@ def split_document(input, *, delimiter: str = '\\n\\n'):
|
|
54 |
@ops.input_position(input="top")
|
55 |
@op("Build document graph")
|
56 |
def build_document_graph(input):
|
57 |
-
|
58 |
-
return pd.DataFrame([{'source': i, 'target': i+1} for i in range(len(chunks)-1)]),
|
59 |
|
60 |
@ops.input_position(nodes="top", edges="top")
|
61 |
@op("Predict links")
|
62 |
def predict_links(nodes, edges):
|
63 |
'''A placeholder for a real algorithm. For now just adds 2-hop neighbors.'''
|
64 |
-
edges = edges.to_dict(orient='records')
|
65 |
edge_map = {} # Source -> [Targets]
|
66 |
for edge in edges:
|
67 |
edge_map.setdefault(edge['source'], [])
|
68 |
edge_map[edge['source']].append(edge['target'])
|
69 |
new_edges = []
|
70 |
-
for
|
71 |
-
for t in edge_map.get(target, []):
|
72 |
-
new_edges.append({'source': source, 'target': t})
|
73 |
-
return
|
74 |
|
75 |
@ops.input_position(nodes="top", edges="top")
|
76 |
@op("Add neighbors")
|
77 |
def add_neighbors(nodes, edges, item):
|
|
|
|
|
78 |
matches = item['rag']
|
79 |
additional_matches = []
|
80 |
for m in matches:
|
81 |
node = nodes[nodes['text'] == m].index[0]
|
82 |
-
neighbors = edges[edges['source'] == node]['target']
|
83 |
additional_matches.extend(nodes.loc[neighbors, 'text'])
|
84 |
return {**item, 'rag': matches + additional_matches}
|
85 |
|
|
|
54 |
@ops.input_position(input="top")
|
55 |
@op("Build document graph")
|
56 |
def build_document_graph(input):
|
57 |
+
return [{'source': i, 'target': i+1} for i in range(len(input)-1)]
|
|
|
58 |
|
59 |
@ops.input_position(nodes="top", edges="top")
|
60 |
@op("Predict links")
|
61 |
def predict_links(nodes, edges):
|
62 |
'''A placeholder for a real algorithm. For now just adds 2-hop neighbors.'''
|
|
|
63 |
edge_map = {} # Source -> [Targets]
|
64 |
for edge in edges:
|
65 |
edge_map.setdefault(edge['source'], [])
|
66 |
edge_map[edge['source']].append(edge['target'])
|
67 |
new_edges = []
|
68 |
+
for edge in edges:
|
69 |
+
for t in edge_map.get(edge['target'], []):
|
70 |
+
new_edges.append({'source': edge['source'], 'target': t})
|
71 |
+
return edges + new_edges
|
72 |
|
73 |
@ops.input_position(nodes="top", edges="top")
|
74 |
@op("Add neighbors")
|
75 |
def add_neighbors(nodes, edges, item):
|
76 |
+
nodes = pd.DataFrame(nodes)
|
77 |
+
edges = pd.DataFrame(edges)
|
78 |
matches = item['rag']
|
79 |
additional_matches = []
|
80 |
for m in matches:
|
81 |
node = nodes[nodes['text'] == m].index[0]
|
82 |
+
neighbors = edges[edges['source'] == node]['target'].to_list()
|
83 |
additional_matches.extend(nodes.loc[neighbors, 'text'])
|
84 |
return {**item, 'rag': matches + additional_matches}
|
85 |
|
server/test_llm_ops.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import unittest
|
|
|
2 |
from . import llm_ops
|
|
|
3 |
from . import workspace
|
4 |
|
5 |
def make_node(id, op, type='basic', **params):
|
@@ -11,7 +13,7 @@ def make_node(id, op, type='basic', **params):
|
|
11 |
)
|
12 |
def make_input(id):
|
13 |
return make_node(
|
14 |
-
id, 'Input',
|
15 |
filename='/Users/danieldarabos/Downloads/aimo-train.csv',
|
16 |
key='problem')
|
17 |
def make_edge(source, target, targetHandle='input'):
|
@@ -22,7 +24,7 @@ class LLMOpsTest(unittest.TestCase):
|
|
22 |
def testExecute(self):
|
23 |
ws = workspace.Workspace(env='LLM logic', nodes=[
|
24 |
make_node(
|
25 |
-
'0', 'Input',
|
26 |
filename='/Users/danieldarabos/Downloads/aimo-train.csv',
|
27 |
key='problem'),
|
28 |
make_node(
|
@@ -30,8 +32,9 @@ class LLMOpsTest(unittest.TestCase):
|
|
30 |
], edges=[
|
31 |
make_edge('0', '1')
|
32 |
])
|
33 |
-
|
34 |
-
|
|
|
35 |
|
36 |
def testStages(self):
|
37 |
ws = workspace.Workspace(env='LLM logic', nodes=[
|
@@ -43,8 +46,30 @@ class LLMOpsTest(unittest.TestCase):
|
|
43 |
make_edge('rag1', 'p1'), make_edge('p1', 'rag2', 'db'),
|
44 |
make_edge('in3', 'p2'), make_edge('p3', 'rag2'),
|
45 |
])
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
if __name__ == '__main__':
|
50 |
unittest.main()
|
|
|
1 |
import unittest
|
2 |
+
from . import ops
|
3 |
from . import llm_ops
|
4 |
+
from .executors import one_by_one
|
5 |
from . import workspace
|
6 |
|
7 |
def make_node(id, op, type='basic', **params):
|
|
|
13 |
)
|
14 |
def make_input(id):
|
15 |
return make_node(
|
16 |
+
id, 'Input CSV',
|
17 |
filename='/Users/danieldarabos/Downloads/aimo-train.csv',
|
18 |
key='problem')
|
19 |
def make_edge(source, target, targetHandle='input'):
|
|
|
24 |
def testExecute(self):
|
25 |
ws = workspace.Workspace(env='LLM logic', nodes=[
|
26 |
make_node(
|
27 |
+
'0', 'Input CSV',
|
28 |
filename='/Users/danieldarabos/Downloads/aimo-train.csv',
|
29 |
key='problem'),
|
30 |
make_node(
|
|
|
32 |
], edges=[
|
33 |
make_edge('0', '1')
|
34 |
])
|
35 |
+
catalog = ops.CATALOGS[ws.env]
|
36 |
+
one_by_one.execute(ws, catalog)
|
37 |
+
# self.assertEqual('', ws.nodes[1].data.display)
|
38 |
|
39 |
def testStages(self):
|
40 |
ws = workspace.Workspace(env='LLM logic', nodes=[
|
|
|
46 |
make_edge('rag1', 'p1'), make_edge('p1', 'rag2', 'db'),
|
47 |
make_edge('in3', 'p2'), make_edge('p3', 'rag2'),
|
48 |
])
|
49 |
+
catalog = ops.CATALOGS[ws.env]
|
50 |
+
stages = one_by_one.get_stages(ws, catalog)
|
51 |
+
print(stages)
|
52 |
+
# self.assertEqual('', stages)
|
53 |
+
|
54 |
+
def testStagesMultiInput(self):
|
55 |
+
ws = workspace.Workspace(env='LLM logic', nodes=[
|
56 |
+
make_node('doc', 'Input document'),
|
57 |
+
make_node('split', 'Split document'),
|
58 |
+
make_node('graph', 'Build document graph'),
|
59 |
+
make_node('chat', 'Input chat'),
|
60 |
+
make_node('rag', 'RAG'),
|
61 |
+
make_node('neighbors', 'Add neighbors'),
|
62 |
+
], edges=[
|
63 |
+
make_edge('doc', 'split'), make_edge('split', 'graph'),
|
64 |
+
make_edge('split', 'rag', 'db'), make_edge('chat', 'rag', 'input'),
|
65 |
+
make_edge('split', 'neighbors', 'nodes'),
|
66 |
+
make_edge('graph', 'neighbors', 'edges'),
|
67 |
+
make_edge('rag', 'neighbors', 'item'),
|
68 |
+
])
|
69 |
+
catalog = ops.CATALOGS[ws.env]
|
70 |
+
stages = one_by_one.get_stages(ws, catalog)
|
71 |
+
print(stages)
|
72 |
+
# self.assertEqual('', stages)
|
73 |
|
74 |
if __name__ == '__main__':
|
75 |
unittest.main()
|
web/src/NodeWithTableView.svelte
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
<script lang="ts">
|
2 |
import { type NodeProps } from '@xyflow/svelte';
|
3 |
-
import { Tabulator } from 'tabulator-tables';
|
4 |
import LynxKiteNode from './LynxKiteNode.svelte';
|
5 |
import Table from './Table.svelte';
|
6 |
type $$Props = NodeProps;
|
@@ -14,7 +13,16 @@
|
|
14 |
{#each Object.entries(data.display.dataframes || {}) as [name, df]}
|
15 |
{#if !single}<div class="df-head" on:click={() => open[name] = !open[name]}>{name}</div>{/if}
|
16 |
{#if single || open[name]}
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
{/if}
|
19 |
{/each}
|
20 |
{#each Object.entries(data.display.others || {}) as [name, o]}
|
@@ -35,4 +43,7 @@
|
|
35 |
table {
|
36 |
table-layout: fixed;
|
37 |
}
|
|
|
|
|
|
|
38 |
</style>
|
|
|
1 |
<script lang="ts">
|
2 |
import { type NodeProps } from '@xyflow/svelte';
|
|
|
3 |
import LynxKiteNode from './LynxKiteNode.svelte';
|
4 |
import Table from './Table.svelte';
|
5 |
type $$Props = NodeProps;
|
|
|
13 |
{#each Object.entries(data.display.dataframes || {}) as [name, df]}
|
14 |
{#if !single}<div class="df-head" on:click={() => open[name] = !open[name]}>{name}</div>{/if}
|
15 |
{#if single || open[name]}
|
16 |
+
{#if df.data.length > 1}
|
17 |
+
<Table columns={df.columns} data={df.data} />
|
18 |
+
{:else}
|
19 |
+
<dl>
|
20 |
+
{#each df.columns as c, i}
|
21 |
+
<dt>{c}</dt>
|
22 |
+
<dd>{df.data[0][i]}</dd>
|
23 |
+
{/each}
|
24 |
+
</dl>
|
25 |
+
{/if}
|
26 |
{/if}
|
27 |
{/each}
|
28 |
{#each Object.entries(data.display.others || {}) as [name, o]}
|
|
|
43 |
table {
|
44 |
table-layout: fixed;
|
45 |
}
|
46 |
+
dl {
|
47 |
+
margin: 10px;
|
48 |
+
}
|
49 |
</style>
|