MrAli commited on
Commit
511b17b
·
verified ·
1 Parent(s): 71ff61b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -6,9 +6,11 @@ os.system("ulimit -l unlimited")
6
 
7
  app = FastAPI()
8
 
9
- hf_hub_download("TheBloke/deepseek-coder-6.7B-base-GGUF", "deepseek-coder-6.7b-base.Q5_K_M.gguf", local_dir="./")
 
10
 
11
- model_l = Llama(model_path="./deepseek-coder-6.7b-base.Q5_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock=True)
 
12
 
13
  @app.get("/check")
14
  async def index():
@@ -18,12 +20,21 @@ async def index():
18
  async def completion(request: Request):
19
  data = await request.json()
20
  prompt = data["prompt"]
21
- res = model_l(
22
- prompt,
23
- temperature=0.6,
24
- echo=False,
25
- max_tokens=41,
26
- )
 
 
 
 
 
 
 
 
 
27
  return {"responses": res["choices"]}
28
 
29
  if __name__ == "__main__":
 
6
 
7
  app = FastAPI()
8
 
9
+ hf_hub_download("TheBloke/deepseek-coder-1.3b-base-GGUF", "deepseek-coder-1.3b-base.Q5_K_M.gguf", local_dir="./")
10
+ hf_hub_download("TheBloke/deepseek-coder-6.7B-base-GGUF", "deepseek-coder-6.7b-base.Q4_K_M.gguf", local_dir="./")
11
 
12
+ model_l = Llama(model_path="./deepseek-coder-1.3b-base.Q5_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock=True)
13
+ model_f = Llama(model_path="./deepseek-coder-6.7b-base.Q5_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock=True)
14
 
15
  @app.get("/check")
16
  async def index():
 
20
  async def completion(request: Request):
21
  data = await request.json()
22
  prompt = data["prompt"]
23
+ mode = data['mode']
24
+ if mode == 'f':
25
+ res = model_f(
26
+ prompt,
27
+ temperature=0.6,
28
+ echo=False,
29
+ max_tokens=41,
30
+ )
31
+ else:
32
+ res = model_l(
33
+ prompt,
34
+ temperature=0.6,
35
+ echo=False,
36
+ max_tokens=41,
37
+ )
38
  return {"responses": res["choices"]}
39
 
40
  if __name__ == "__main__":