File size: 950 Bytes
c360d11
 
 
a49e481
 
c360d11
 
 
 
 
0ba2f1b
c360d11
 
 
 
 
 
 
 
f97aea4
b85ab89
c360d11
 
0ba2f1b
c360d11
0ba2f1b
c360d11
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from fastapi import FastAPI, Request
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
os.system("ulimit -l")

app = FastAPI()

hf_hub_download("TheBloke/deepseek-coder-1.3b-instruct-GGUF", "deepseek-coder-1.3b-instruct.Q5_K_M.gguf", local_dir="./")

model_l = Llama(model_path="./deepseek-coder-1.3b-instruct.Q5_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock=True)

@app.get("/check")
async def index():
    return {"msg": "Hey!"}

@app.post("/api")
async def completion(request: Request):
    data = await request.json()
    prompt = data["prompt"]
    prompt = f"Complete the following code, do not comment(return only the completed code!):\n\n{prompt}"
    res = model_l(
        prompt,
        temperature=0.7,
        echo=False,
        max_tokens=64,
    )
    return {"responses": res["choices"]}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)