Spaces:
Sleeping
Sleeping
File size: 2,736 Bytes
6cff55d f23adce 6cff55d f23adce 6cff55d a0aed2c 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d a0aed2c f5001ff a0aed2c 6cff55d 207726a f23adce 6cff55d f23adce 6cff55d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
"""
You call this ENDPOINT and it returns you a JSON which is of this format:
POST FORMAT: (/api/groq or api/google or /api/ollama ...)
{
"query": "????",
"llm": "llama70b-whatever",
"knn": "3",
"stream": False
}
RESPONSE FORMAT:
{
"response": "blabla",
"references": "1, 2, 3"
}
"""
# TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING)
from flask import Flask
from flask import request
from utils import embedding_output, db_output, groq_llm_output, ollama_llm_output, google_llm_output
app = Flask(__name__)
@app.route("/api/groq/generate", methods=['POST'])
def groq_completion():
message = request.get_json()
query: str = message['query']
llm: str = message['llm']
knn: int = int(message['knn'])
stream: bool = bool(message['stream'])
embedding_data = embedding_output(query)
db_knn = db_output(embedding_data, knn)
output, references = groq_llm_output(query, db_knn, llm, stream)
return {
"response": output,
"references": references
}
@app.route("/api/ollama/generate", methods=['POST'])
def ollama_completion():
message = request.get_json()
query: str = message['query']
llm: str = message['llm']
knn: int = int(message['knn'])
stream: bool = bool(message['stream'])
embedding_data = embedding_output(query)
db_knn = db_output(embedding_data, knn)
response_json, references = ollama_llm_output(query, db_knn, llm, stream)
if response_json.get("error"):
print(response_json)
return {
"response": "An error occured, try again.",
"references": "No references"
}
return {
"response": response_json['response'],
"references": references
}
@app.route("/api/google/generate", methods=['POST'])
def google_completion():
message = request.get_json()
query: str = message['query']
llm: str = message['llm']
knn: int = int(message['knn'])
stream: bool = bool(message['stream'])
embedding_data = embedding_output(query)
db_knn = db_output(embedding_data, knn)
response_json, references = google_llm_output(query, db_knn, llm, stream)
return {
"response": response_json,
"references": references
}
"""
curl -X POST http://localhost:8000/api/groq/generate -H "Content-Type: application/json" -d '{
"query": "How do I create a sphere in FURY?",
"llm": "llama3-70b-8192",
"knn": "3",
"stream": false
}'
curl -X POST http://localhost:8000/api/ollama/generate -H "Content-Type: application/json" -d '{
"query": "How do I create a sphere in FURY?",
"llm": "phi3",
"knn": "3",
"stream": false
}'
"""
|