Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Request | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
import os | |
os.system("ulimit -l unlimited") | |
app = FastAPI() | |
hf_hub_download("TheBloke/deepseek-coder-6.7B-base-GGUF", "deepseek-coder-6.7b-base.Q4_K_M.gguf", local_dir="./") | |
model_l = Llama(model_path="./deepseek-coder-6.7b-base.Q4_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock=True) | |
async def index(): | |
return {"msg": "OK!"} | |
async def completion(request: Request): | |
data = await request.json() | |
prompt = data["prompt"] | |
mode = data['mode'] | |
res = model_l( | |
prompt, | |
temperature=0.6, | |
echo=False, | |
max_tokens=41, | |
) | |
return {"responses": res["choices"]} | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) | |