Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ app = FastAPI()
|
|
8 |
|
9 |
hf_hub_download("TheBloke/deepseek-coder-1.3b-instruct-GGUF", "deepseek-coder-1.3b-instruct.Q5_K_M.gguf", local_dir="./")
|
10 |
|
11 |
-
model_l = Llama(model_path="./deepseek-coder-1.3b-instruct.Q5_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock
|
12 |
|
13 |
@app.get("/check")
|
14 |
async def index():
|
@@ -17,12 +17,12 @@ async def index():
|
|
17 |
@app.post("/api")
|
18 |
async def completion(request: Request):
|
19 |
data = await request.json()
|
20 |
-
prompt = data["prompt"]
|
21 |
res = model_l(
|
22 |
prompt,
|
23 |
-
temperature=0.
|
24 |
echo=False,
|
25 |
-
max_tokens=
|
26 |
)
|
27 |
return {"responses": res["choices"]}
|
28 |
|
|
|
8 |
|
9 |
hf_hub_download("TheBloke/deepseek-coder-1.3b-instruct-GGUF", "deepseek-coder-1.3b-instruct.Q5_K_M.gguf", local_dir="./")
|
10 |
|
11 |
+
model_l = Llama(model_path="./deepseek-coder-1.3b-instruct.Q5_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock=True)
|
12 |
|
13 |
@app.get("/check")
|
14 |
async def index():
|
|
|
17 |
@app.post("/api")
|
18 |
async def completion(request: Request):
|
19 |
data = await request.json()
|
20 |
+
prompt = f"Complete the following code, do not use comments too much and longer than half-line(return only the completed code!):\n\n{data["prompt"]}"
|
21 |
res = model_l(
|
22 |
prompt,
|
23 |
+
temperature=0.7,
|
24 |
echo=False,
|
25 |
+
max_tokens=64,
|
26 |
)
|
27 |
return {"responses": res["choices"]}
|
28 |
|