Spaces:

lynx-analytics
/

lynxkite

Running

App Files Files Community

darabos commited on Dec 10, 2024

Commit

0832c91

1 Parent(s): d60fc1c

Format/lint changes.

Browse files

Files changed (2) hide show

server/llm_ops.py +154 -125
web/src/workspace/nodes/LynxKiteNode.tsx +0 -3

server/llm_ops.py CHANGED Viewed

@@ -1,4 +1,5 @@
-'''For specifying an LLM agent logic flow.'''
 from . import ops
 import chromadb
 import enum
@@ -14,177 +15,205 @@ embedding_client = openai.OpenAI(base_url="http://localhost:7997/")
 jinja = jinja2.Environment()
 chroma_client = chromadb.Client()
 LLM_CACHE = {}
-ENV = 'LLM logic'
 one_by_one.register(ENV)
 op = ops.op_registration(ENV)
 def chat(*args, **kwargs):
-  key = json.dumps({'method': 'chat', 'args': args, 'kwargs': kwargs})
-  if key not in LLM_CACHE:
-    completion = chat_client.chat.completions.create(*args, **kwargs)
-    LLM_CACHE[key] = [c.message.content for c in completion.choices]
-  return LLM_CACHE[key]
 def embedding(*args, **kwargs):
-  key = json.dumps({'method': 'embedding', 'args': args, 'kwargs': kwargs})
-  if key not in LLM_CACHE:
-    res = embedding_client.embeddings.create(*args, **kwargs)
-    [data] = res.data
-    LLM_CACHE[key] = data.embedding
-  return LLM_CACHE[key]
 @op("Input CSV")
 def input_csv(*, filename: ops.PathStr, key: str):
-  return pd.read_csv(filename).rename(columns={key: 'text'})
 @op("Input document")
 def input_document(*, filename: ops.PathStr):
-  with open(filename) as f:
-    return {'text': f.read()}
 @op("Input chat")
 def input_chat(*, chat: str):
-  return {'text': chat}
 @op("Split document")
-def split_document(input, *, delimiter: str = '\\n\\n'):
-  delimiter = delimiter.encode().decode('unicode_escape')
-  chunks = input['text'].split(delimiter)
-  return pd.DataFrame(chunks, columns=['text'])
 @ops.input_position(input="top")
 @op("Build document graph")
 def build_document_graph(input):
-  return [{'source': i, 'target': i+1} for i in range(len(input)-1)]
 @ops.input_position(nodes="top", edges="top")
 @op("Predict links")
 def predict_links(nodes, edges):
-  '''A placeholder for a real algorithm. For now just adds 2-hop neighbors.'''
-  edge_map = {} # Source -> [Targets]
-  for edge in edges:
-    edge_map.setdefault(edge['source'], [])
-    edge_map[edge['source']].append(edge['target'])
-  new_edges = []
-  for edge in edges:
-    for t in edge_map.get(edge['target'], []):
-      new_edges.append({'source': edge['source'], 'target': t})
-  return edges + new_edges
 @ops.input_position(nodes="top", edges="top")
 @op("Add neighbors")
 def add_neighbors(nodes, edges, item):
-  nodes = pd.DataFrame(nodes)
-  edges = pd.DataFrame(edges)
-  matches = item['rag']
-  additional_matches = []
-  for m in matches:
-    node = nodes[nodes['text'] == m].index[0]
-    neighbors = edges[edges['source'] == node]['target'].to_list()
-    additional_matches.extend(nodes.loc[neighbors, 'text'])
-  return {**item, 'rag': matches + additional_matches}
 @op("Create prompt")
-def create_prompt(input, *, save_as='prompt', template: ops.LongStr):
-  assert template, 'Please specify the template. Refer to columns using the Jinja2 syntax.'
-  t = jinja.from_string(template)
-  prompt = t.render(**input)
-  return {**input, save_as: prompt}
 @op("Ask LLM")
 def ask_llm(input, *, model: str, accepted_regex: str = None, max_tokens: int = 100):
-  assert model, 'Please specify the model.'
-  assert 'prompt' in input, 'Please create the prompt first.'
-  options = {}
-  if accepted_regex:
-    options['extra_body'] = {
-      "guided_regex": accepted_regex,
-    }
-  results = chat(
-    model=model,
-    max_tokens=max_tokens,
-    messages=[
-      {"role": "user", "content": input['prompt']},
-    ],
-    **options,
-  )
-  return [{**input, 'response': r} for r in results]
 @op("View", view="table_view")
 def view(input, *, _ctx: one_by_one.Context):
-  v = _ctx.last_result
-  if v:
-    columns = v['dataframes']['df']['columns']
-    v['dataframes']['df']['data'].append([input[c] for c in columns])
-  else:
-    columns = [str(c) for c in input.keys() if not str(c).startswith('_')]
-    v = {
-      'dataframes': { 'df': {
-        'columns': columns,
-        'data': [[input[c] for c in columns]],
-      }}
-    }
-  return v
 @ops.input_position(input="right")
 @ops.output_position(output="left")
 @op("Loop")
 def loop(input, *, max_iterations: int = 3, _ctx: one_by_one.Context):
-  '''Data can flow back here max_iterations-1 times.'''
-  key = f'iterations-{_ctx.node.id}'
-  input[key] = input.get(key, 0) + 1
-  if input[key] < max_iterations:
-    return input
-@op('Branch', outputs=['true', 'false'])
 def branch(input, *, expression: str):
-  res = eval(expression, input)
-  return one_by_one.Output(output_handle=str(bool(res)).lower(), value=input)
 class RagEngine(enum.Enum):
-  Chroma = 'Chroma'
-  Custom = 'Custom'
 @ops.input_position(db="top")
-@op('RAG')
 def rag(
-  input, db, *,
-  engine: RagEngine = RagEngine.Chroma,
-  input_field='text', db_field='text', num_matches: int = 10,
-  _ctx: one_by_one.Context):
-  if engine == RagEngine.Chroma:
-    last = _ctx.last_result
-    if last:
-      collection = last['_collection']
-    else:
-      collection_name = _ctx.node.id.replace(' ', '_')
-      for c in chroma_client.list_collections():
-        if c.name == collection_name:
-          chroma_client.delete_collection(name=collection_name)
-      collection = chroma_client.create_collection(name=collection_name)
-      collection.add(
-        documents=[r[db_field] for r in db],
-        ids=[str(i) for i in range(len(db))],
-      )
-    results = collection.query(
-      query_texts=[input[input_field]],
-      n_results=num_matches,
-    )
-    results = [db[int(r)] for r in results['ids'][0]]
-    return {**input, 'rag': results, '_collection': collection}
-  if engine == RagEngine.Custom:
-    model = 'google/gemma-2-2b-it'
-    chat = input[input_field]
-    embeddings = [embedding(input=[r[db_field]], model=model) for r in db]
-    q = embedding(input=[chat], model=model)
-    def cosine_similarity(a, b):
-      return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
-    scores = [(i, cosine_similarity(q, e)) for i, e in enumerate(embeddings)]
-    scores.sort(key=lambda x: -x[1])
-    matches = [db[i][db_field] for i, _ in scores[:num_matches]]
-    return {**input, 'rag': matches}
-@op('Run Python')
 def run_python(input, *, template: str):
-  '''TODO: Implement.'''
-  return input

+"""For specifying an LLM agent logic flow."""
 from . import ops
 import chromadb
 import enum
 jinja = jinja2.Environment()
 chroma_client = chromadb.Client()
 LLM_CACHE = {}
+ENV = "LLM logic"
 one_by_one.register(ENV)
 op = ops.op_registration(ENV)
 def chat(*args, **kwargs):
+    key = json.dumps({"method": "chat", "args": args, "kwargs": kwargs})
+    if key not in LLM_CACHE:
+        completion = chat_client.chat.completions.create(*args, **kwargs)
+        LLM_CACHE[key] = [c.message.content for c in completion.choices]
+    return LLM_CACHE[key]
 def embedding(*args, **kwargs):
+    key = json.dumps({"method": "embedding", "args": args, "kwargs": kwargs})
+    if key not in LLM_CACHE:
+        res = embedding_client.embeddings.create(*args, **kwargs)
+        [data] = res.data
+        LLM_CACHE[key] = data.embedding
+    return LLM_CACHE[key]
 @op("Input CSV")
 def input_csv(*, filename: ops.PathStr, key: str):
+    return pd.read_csv(filename).rename(columns={key: "text"})
 @op("Input document")
 def input_document(*, filename: ops.PathStr):
+    with open(filename) as f:
+        return {"text": f.read()}
 @op("Input chat")
 def input_chat(*, chat: str):
+    return {"text": chat}
 @op("Split document")
+def split_document(input, *, delimiter: str = "\\n\\n"):
+    delimiter = delimiter.encode().decode("unicode_escape")
+    chunks = input["text"].split(delimiter)
+    return pd.DataFrame(chunks, columns=["text"])
 @ops.input_position(input="top")
 @op("Build document graph")
 def build_document_graph(input):
+    return [{"source": i, "target": i + 1} for i in range(len(input) - 1)]
 @ops.input_position(nodes="top", edges="top")
 @op("Predict links")
 def predict_links(nodes, edges):
+    """A placeholder for a real algorithm. For now just adds 2-hop neighbors."""
+    edge_map = {}  # Source -> [Targets]
+    for edge in edges:
+        edge_map.setdefault(edge["source"], [])
+        edge_map[edge["source"]].append(edge["target"])
+    new_edges = []
+    for edge in edges:
+        for t in edge_map.get(edge["target"], []):
+            new_edges.append({"source": edge["source"], "target": t})
+    return edges + new_edges
 @ops.input_position(nodes="top", edges="top")
 @op("Add neighbors")
 def add_neighbors(nodes, edges, item):
+    nodes = pd.DataFrame(nodes)
+    edges = pd.DataFrame(edges)
+    matches = item["rag"]
+    additional_matches = []
+    for m in matches:
+        node = nodes[nodes["text"] == m].index[0]
+        neighbors = edges[edges["source"] == node]["target"].to_list()
+        additional_matches.extend(nodes.loc[neighbors, "text"])
+    return {**item, "rag": matches + additional_matches}
 @op("Create prompt")
+def create_prompt(input, *, save_as="prompt", template: ops.LongStr):
+    assert (
+        template
+    ), "Please specify the template. Refer to columns using the Jinja2 syntax."
+    t = jinja.from_string(template)
+    prompt = t.render(**input)
+    return {**input, save_as: prompt}
 @op("Ask LLM")
 def ask_llm(input, *, model: str, accepted_regex: str = None, max_tokens: int = 100):
+    assert model, "Please specify the model."
+    assert "prompt" in input, "Please create the prompt first."
+    options = {}
+    if accepted_regex:
+        options["extra_body"] = {
+            "guided_regex": accepted_regex,
+        }
+    results = chat(
+        model=model,
+        max_tokens=max_tokens,
+        messages=[
+            {"role": "user", "content": input["prompt"]},
+        ],
+        **options,
+    )
+    return [{**input, "response": r} for r in results]
 @op("View", view="table_view")
 def view(input, *, _ctx: one_by_one.Context):
+    v = _ctx.last_result
+    if v:
+        columns = v["dataframes"]["df"]["columns"]
+        v["dataframes"]["df"]["data"].append([input[c] for c in columns])
+    else:
+        columns = [str(c) for c in input.keys() if not str(c).startswith("_")]
+        v = {
+            "dataframes": {
+                "df": {
+                    "columns": columns,
+                    "data": [[input[c] for c in columns]],
+                }
+            }
+        }
+    return v
 @ops.input_position(input="right")
 @ops.output_position(output="left")
 @op("Loop")
 def loop(input, *, max_iterations: int = 3, _ctx: one_by_one.Context):
+    """Data can flow back here max_iterations-1 times."""
+    key = f"iterations-{_ctx.node.id}"
+    input[key] = input.get(key, 0) + 1
+    if input[key] < max_iterations:
+        return input
+@op("Branch", outputs=["true", "false"])
 def branch(input, *, expression: str):
+    res = eval(expression, input)
+    return one_by_one.Output(output_handle=str(bool(res)).lower(), value=input)
 class RagEngine(enum.Enum):
+    Chroma = "Chroma"
+    Custom = "Custom"
 @ops.input_position(db="top")
+@op("RAG")
 def rag(
+    input,
+    db,
+    *,
+    engine: RagEngine = RagEngine.Chroma,
+    input_field="text",
+    db_field="text",
+    num_matches: int = 10,
+    _ctx: one_by_one.Context,
+):
+    if engine == RagEngine.Chroma:
+        last = _ctx.last_result
+        if last:
+            collection = last["_collection"]
+        else:
+            collection_name = _ctx.node.id.replace(" ", "_")
+            for c in chroma_client.list_collections():
+                if c.name == collection_name:
+                    chroma_client.delete_collection(name=collection_name)
+            collection = chroma_client.create_collection(name=collection_name)
+            collection.add(
+                documents=[r[db_field] for r in db],
+                ids=[str(i) for i in range(len(db))],
+            )
+        results = collection.query(
+            query_texts=[input[input_field]],
+            n_results=num_matches,
+        )
+        results = [db[int(r)] for r in results["ids"][0]]
+        return {**input, "rag": results, "_collection": collection}
+    if engine == RagEngine.Custom:
+        model = "google/gemma-2-2b-it"
+        chat = input[input_field]
+        embeddings = [embedding(input=[r[db_field]], model=model) for r in db]
+        q = embedding(input=[chat], model=model)
+        def cosine_similarity(a, b):
+            return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+        scores = [(i, cosine_similarity(q, e)) for i, e in enumerate(embeddings)]
+        scores.sort(key=lambda x: -x[1])
+        matches = [db[i][db_field] for i, _ in scores[:num_matches]]
+        return {**input, "rag": matches}
+@op("Run Python")
 def run_python(input, *, template: str):
+    """TODO: Implement."""
+    return input

web/src/workspace/nodes/LynxKiteNode.tsx CHANGED Viewed

@@ -1,5 +1,3 @@
-import { useContext } from 'react';
-import { LynxKiteState } from '../LynxKiteState';
 import { useReactFlow, Handle, NodeResizeControl, Position } from '@xyflow/react';
 // @ts-ignore
 import ChevronDownRight from '~icons/tabler/chevron-down-right.jsx';
@@ -45,7 +43,6 @@ function getHandles(inputs: object, outputs: object) {
 export default function LynxKiteNode(props: LynxKiteNodeProps) {
   const reactFlow = useReactFlow();
   const data = props.data;
-  const state = useContext(LynxKiteState);
   const expanded = !data.collapsed;
   const handles = getHandles(data.meta?.inputs || {}, data.meta?.outputs || {});
   function asPx(n: number | undefined) {

 import { useReactFlow, Handle, NodeResizeControl, Position } from '@xyflow/react';
 // @ts-ignore
 import ChevronDownRight from '~icons/tabler/chevron-down-right.jsx';
 export default function LynxKiteNode(props: LynxKiteNodeProps) {
   const reactFlow = useReactFlow();
   const data = props.data;
   const expanded = !data.collapsed;
   const handles = getHandles(data.meta?.inputs || {}, data.meta?.outputs || {});
   function asPx(n: number | undefined) {