"""
You call this ENDPOINT and it returns you a JSON which is of this format:

POST FORMAT: (/api/groq or api/google or /api/ollama ...)
{
    "query": "????",
    "llm": "llama70b-whatever",
    "knn": "3",
    "stream": False
}

RESPONSE FORMAT:
{
    "response": "blabla",
    "references": "1, 2, 3"
}
"""

# TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING)

from flask import Flask
from flask import request

from utils import embedding_output, db_output, groq_llm_output, ollama_llm_output, google_llm_output


app = Flask(__name__)

@app.route("/api/groq/generate", methods=['POST'])
def groq_completion():
    message = request.get_json()

    query: str = message['query']
    llm: str = message['llm']
    knn: int = int(message['knn'])
    stream: bool = bool(message['stream'])

    embedding_data = embedding_output(query)
    db_knn = db_output(embedding_data, knn)
    output, references = groq_llm_output(query, db_knn, llm, stream)

    return {
        "response": output,
        "references": references
    }


@app.route("/api/ollama/generate", methods=['POST'])
def ollama_completion():
    message = request.get_json()

    query: str = message['query']
    llm: str = message['llm']
    knn: int = int(message['knn'])
    stream: bool = bool(message['stream'])

    embedding_data = embedding_output(query)
    db_knn = db_output(embedding_data, knn)
    response_json, references = ollama_llm_output(query, db_knn, llm, stream)

    if response_json.get("error"):
        print(response_json)
        return {
            "response": "An error occured, try again.",
            "references": "No references"
        }

    return {
        "response": response_json['response'],
        "references": references
    }


@app.route("/api/google/generate", methods=['POST'])
def google_completion():
    message = request.get_json()

    query: str = message['query']
    llm: str = message['llm']
    knn: int = int(message['knn'])
    stream: bool = bool(message['stream'])

    embedding_data = embedding_output(query)
    db_knn = db_output(embedding_data, knn)
    response_json, references = google_llm_output(query, db_knn, llm, stream)

    return {
        "response": response_json,
        "references": references
    }


"""
curl -X POST http://localhost:8000/api/groq/generate -H "Content-Type: application/json" -d '{
    "query": "How do I create a sphere in FURY?",
    "llm": "llama3-70b-8192",
    "knn": "3",
    "stream": false
  }'

  
curl -X POST http://localhost:8000/api/ollama/generate -H "Content-Type: application/json" -d '{
    "query": "How do I create a sphere in FURY?",
    "llm": "phi3",
    "knn": "3",
    "stream": false
  }'
"""