File size: 2,736 Bytes
6cff55d
 
 
f23adce
6cff55d
 
 
 
 
 
 
 
 
 
f23adce
6cff55d
 
 
 
 
 
 
 
a0aed2c
6cff55d
 
f23adce
6cff55d
f23adce
 
 
6cff55d
f23adce
 
 
 
6cff55d
f23adce
 
 
6cff55d
f23adce
 
 
 
6cff55d
 
f23adce
 
6cff55d
 
 
 
 
 
 
 
 
f23adce
 
 
 
 
 
 
 
6cff55d
 
f23adce
6cff55d
 
 
a0aed2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5001ff
a0aed2c
 
 
 
6cff55d
207726a
 
 
 
 
 
 
 
f23adce
6cff55d
f23adce
6cff55d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
You call this ENDPOINT and it returns you a JSON which is of this format:

POST FORMAT: (/api/groq or api/google or /api/ollama ...)
{
    "query": "????",
    "llm": "llama70b-whatever",
    "knn": "3",
    "stream": False
}

RESPONSE FORMAT:
{
    "response": "blabla",
    "references": "1, 2, 3"
}
"""

# TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING)

from flask import Flask
from flask import request

from utils import embedding_output, db_output, groq_llm_output, ollama_llm_output, google_llm_output


app = Flask(__name__)

@app.route("/api/groq/generate", methods=['POST'])
def groq_completion():
    message = request.get_json()

    query: str = message['query']
    llm: str = message['llm']
    knn: int = int(message['knn'])
    stream: bool = bool(message['stream'])

    embedding_data = embedding_output(query)
    db_knn = db_output(embedding_data, knn)
    output, references = groq_llm_output(query, db_knn, llm, stream)

    return {
        "response": output,
        "references": references
    }


@app.route("/api/ollama/generate", methods=['POST'])
def ollama_completion():
    message = request.get_json()

    query: str = message['query']
    llm: str = message['llm']
    knn: int = int(message['knn'])
    stream: bool = bool(message['stream'])

    embedding_data = embedding_output(query)
    db_knn = db_output(embedding_data, knn)
    response_json, references = ollama_llm_output(query, db_knn, llm, stream)

    if response_json.get("error"):
        print(response_json)
        return {
            "response": "An error occured, try again.",
            "references": "No references"
        }

    return {
        "response": response_json['response'],
        "references": references
    }


@app.route("/api/google/generate", methods=['POST'])
def google_completion():
    message = request.get_json()

    query: str = message['query']
    llm: str = message['llm']
    knn: int = int(message['knn'])
    stream: bool = bool(message['stream'])

    embedding_data = embedding_output(query)
    db_knn = db_output(embedding_data, knn)
    response_json, references = google_llm_output(query, db_knn, llm, stream)

    return {
        "response": response_json,
        "references": references
    }


"""
curl -X POST http://localhost:8000/api/groq/generate -H "Content-Type: application/json" -d '{
    "query": "How do I create a sphere in FURY?",
    "llm": "llama3-70b-8192",
    "knn": "3",
    "stream": false
  }'

  
curl -X POST http://localhost:8000/api/ollama/generate -H "Content-Type: application/json" -d '{
    "query": "How do I create a sphere in FURY?",
    "llm": "phi3",
    "knn": "3",
    "stream": false
  }'
"""