Spaces:
Paused
Paused
File size: 838 Bytes
818675e 751c1e6 818675e 751c1e6 9dd267d 818675e 9dd267d 818675e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
from llama_cpp.server.app import create_app, Settings
from fastapi.responses import HTMLResponse
import os
import requests
url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
response = requests.get(url)
with open("./model.gguf", mode="wb") as file:
file.write(response.content)
app = create_app(
Settings(
n_threads=2, # set to number of cpu cores
model="./model.gguf",
embedding=False
)
)
# Read the content of index.html once and store it in memory
with open("index.html", "r") as f:
content = f.read()
@app.get("/", response_class=HTMLResponse)
async def read_items():
return content
if __name__ == "__main__":
import uvicorn
uvicorn.run(app,
host="0.0.0.0",
port=int("2600")
)
|