Spaces:
Running
Running
File size: 2,204 Bytes
6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d f23adce 6cff55d 207726a f23adce 6cff55d f23adce 6cff55d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
"""
You call this ENDPOINT and it returns you a JSON which is of this format:
POST FORMAT: (/api/groq or api/google or /api/ollama ...)
{
"query": "????",
"llm": "llama70b-whatever",
"knn": "3",
"stream": False
}
RESPONSE FORMAT:
{
"response": "blabla",
"references": "1, 2, 3"
}
"""
# TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING)
from flask import Flask
from flask import request
from utils import embedding_output, db_output, groq_llm_output, ollama_llm_output
app = Flask(__name__)
@app.route("/api/groq/generate", methods=['POST'])
def groq_completion():
message = request.get_json()
query: str = message['query']
llm: str = message['llm']
knn: int = int(message['knn'])
stream: bool = bool(message['stream'])
embedding_data = embedding_output(query)
db_knn = db_output(embedding_data, knn)
output, references = groq_llm_output(query, db_knn, llm, stream)
return {
"response": output,
"references": references
}
@app.route("/api/ollama/generate", methods=['POST'])
def ollama_completion():
message = request.get_json()
query: str = message['query']
llm: str = message['llm']
knn: int = int(message['knn'])
stream: bool = bool(message['stream'])
embedding_data = embedding_output(query)
db_knn = db_output(embedding_data, knn)
response_json, references = ollama_llm_output(query, db_knn, llm, stream)
if response_json.get("error"):
print(response_json)
return {
"response": "An error occured, try again.",
"references": "No references"
}
return {
"response": response_json['response'],
"references": references
}
"""
curl -X POST http://localhost:8000/api/groq/generate -H "Content-Type: application/json" -d '{
"query": "How do I create a sphere in FURY?",
"llm": "llama3-70b-8192",
"knn": "3",
"stream": false
}'
curl -X POST http://localhost:8000/api/ollama/generate -H "Content-Type: application/json" -d '{
"query": "How do I create a sphere in FURY?",
"llm": "phi3",
"knn": "3",
"stream": false
}'
"""
|