'''For specifying an LLM agent logic flow.'''
from . import ops
import chromadb
import enum
import jinja2
import json
import openai
import numpy as np
import pandas as pd
from .executors import one_by_one

chat_client = openai.OpenAI(base_url="http://localhost:8080/v1")
embedding_client = openai.OpenAI(base_url="http://localhost:7997/")
jinja = jinja2.Environment()
chroma_client = chromadb.Client()
LLM_CACHE = {}
ENV = 'LLM logic'
one_by_one.register(ENV)
op = ops.op_registration(ENV)

def chat(*args, **kwargs):
  key = json.dumps({'method': 'chat', 'args': args, 'kwargs': kwargs})
  if key not in LLM_CACHE:
    completion = chat_client.chat.completions.create(*args, **kwargs)
    LLM_CACHE[key] = [c.message.content for c in completion.choices]
  return LLM_CACHE[key]

def embedding(*args, **kwargs):
  key = json.dumps({'method': 'embedding', 'args': args, 'kwargs': kwargs})
  if key not in LLM_CACHE:
    res = embedding_client.embeddings.create(*args, **kwargs)
    [data] = res.data
    LLM_CACHE[key] = data.embedding
  return LLM_CACHE[key]

@op("Input CSV")
def input_csv(*, filename: ops.PathStr, key: str):
  return pd.read_csv(filename).rename(columns={key: 'text'})

@op("Input document")
def input_document(*, filename: ops.PathStr):
  with open(filename) as f:
    return {'text': f.read()}

@op("Input chat")
def input_chat(*, chat: str):
  return {'text': chat}

@op("Split document")
def split_document(input, *, delimiter: str = '\\n\\n'):
  delimiter = delimiter.encode().decode('unicode_escape')
  chunks = input['text'].split(delimiter)
  return pd.DataFrame(chunks, columns=['text'])

@ops.input_position(input="top")
@op("Build document graph")
def build_document_graph(input):
  return [{'source': i, 'target': i+1} for i in range(len(input)-1)]

@ops.input_position(nodes="top", edges="top")
@op("Predict links")
def predict_links(nodes, edges):
  '''A placeholder for a real algorithm. For now just adds 2-hop neighbors.'''
  edge_map = {} # Source -> [Targets]
  for edge in edges:
    edge_map.setdefault(edge['source'], [])
    edge_map[edge['source']].append(edge['target'])
  new_edges = []
  for edge in edges:
    for t in edge_map.get(edge['target'], []):
      new_edges.append({'source': edge['source'], 'target': t})
  return edges + new_edges

@ops.input_position(nodes="top", edges="top")
@op("Add neighbors")
def add_neighbors(nodes, edges, item):
  nodes = pd.DataFrame(nodes)
  edges = pd.DataFrame(edges)
  matches = item['rag']
  additional_matches = []
  for m in matches:
    node = nodes[nodes['text'] == m].index[0]
    neighbors = edges[edges['source'] == node]['target'].to_list()
    additional_matches.extend(nodes.loc[neighbors, 'text'])
  return {**item, 'rag': matches + additional_matches}

@op("Create prompt")
def create_prompt(input, *, save_as='prompt', template: ops.LongStr):
  assert template, 'Please specify the template. Refer to columns using the Jinja2 syntax.'
  t = jinja.from_string(template)
  prompt = t.render(**input)
  return {**input, save_as: prompt}

@op("Ask LLM")
def ask_llm(input, *, model: str, accepted_regex: str = None, max_tokens: int = 100):
  assert model, 'Please specify the model.'
  assert 'prompt' in input, 'Please create the prompt first.'
  options = {}
  if accepted_regex:
    options['extra_body'] = {
      "guided_regex": accepted_regex,
    }
  results = chat(
    model=model,
    max_tokens=max_tokens,
    messages=[
      {"role": "user", "content": input['prompt']},
    ],
    **options,
  )
  return [{**input, 'response': r} for r in results]

@op("View", view="table_view")
def view(input, *, _ctx: one_by_one.Context):
  v = _ctx.last_result
  if v:
    columns = v['dataframes']['df']['columns']
    v['dataframes']['df']['data'].append([input[c] for c in columns])
  else:
    columns = [str(c) for c in input.keys() if not str(c).startswith('_')]
    v = {
      'dataframes': { 'df': {
        'columns': columns,
        'data': [[input[c] for c in columns]],
      }}
    }
  return v

@ops.input_position(input="right")
@ops.output_position(output="left")
@op("Loop")
def loop(input, *, max_iterations: int = 3, _ctx: one_by_one.Context):
  '''Data can flow back here max_iterations-1 times.'''
  key = f'iterations-{_ctx.node.id}'
  input[key] = input.get(key, 0) + 1
  if input[key] < max_iterations:
    return input

@op('Branch', outputs=['true', 'false'])
def branch(input, *, expression: str):
  res = eval(expression, input)
  return one_by_one.Output(output_handle=str(bool(res)).lower(), value=input)

class RagEngine(enum.Enum):
  Chroma = 'Chroma'
  Custom = 'Custom'

@ops.input_position(db="top")
@op('RAG')
def rag(
  input, db, *,
  engine: RagEngine = RagEngine.Chroma,
  input_field='text', db_field='text', num_matches: int = 10,
  _ctx: one_by_one.Context):
  if engine == RagEngine.Chroma:
    last = _ctx.last_result
    if last:
      collection = last['_collection']
    else:
      collection_name = _ctx.node.id.replace(' ', '_')
      for c in chroma_client.list_collections():
        if c.name == collection_name:
          chroma_client.delete_collection(name=collection_name)
      collection = chroma_client.create_collection(name=collection_name)
      collection.add(
        documents=[r[db_field] for r in db],
        ids=[str(i) for i in range(len(db))],
      )
    results = collection.query(
      query_texts=[input[input_field]],
      n_results=num_matches,
    )
    results = [db[int(r)] for r in results['ids'][0]]
    return {**input, 'rag': results, '_collection': collection}
  if engine == RagEngine.Custom:
    model = 'google/gemma-2-2b-it'
    chat = input[input_field]
    embeddings = [embedding(input=[r[db_field]], model=model) for r in db]
    q = embedding(input=[chat], model=model)
    def cosine_similarity(a, b):
      return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
    scores = [(i, cosine_similarity(q, e)) for i, e in enumerate(embeddings)]
    scores.sort(key=lambda x: -x[1])
    matches = [db[i][db_field] for i, _ in scores[:num_matches]]
    return {**input, 'rag': matches}

@op('Run Python')
def run_python(input, *, template: str):
  '''TODO: Implement.'''
  return input