File size: 1,163 Bytes
c360d11 a49e481 ceede36 0f637e1 c360d11 ceede36 65c3a8f ceede36 83b1560 ceede36 0202999 ceede36 c360d11 0f637e1 c360d11 ec92ffb 8b9d2b3 83b1560 8b9d2b3 527d2b4 c360d11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from fastapi import FastAPI, Request
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
import requests
app = FastAPI()
@app.post("/api")
async def completion(request: Request):
try:
data = await request.json()
prompt = data["prompt"]
mode = data['mode']
if mode == 'f':
return requests.post("https://mrali-codexpert-computing.hf.space/api", json={"prompt": prompt}).json()
else:
res = model_l(
prompt,
temperature=0.6,
echo=False,
max_tokens=32,
)
return {"responses": res["choices"]}
except Exception as j:
print(j)
return {"responses": "Error!"}
if __name__ == "__main__":
os.system("ulimit -l unlimited")
hf_hub_download("TheBloke/deepseek-coder-1.3b-base-GGUF", "deepseek-coder-1.3b-base.Q5_K_M.gguf", local_dir="./")
global model_l
model_l = Llama(model_path="./deepseek-coder-1.3b-base.Q5_K_M.gguf", n_ctx=16192, n_gpu_layers=0, n_threads=2, use_mlock=True)
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|