File size: 838 Bytes
818675e
 
 
751c1e6
818675e
751c1e6
 
 
 
9dd267d
818675e
 
 
9dd267d
818675e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from llama_cpp.server.app import create_app, Settings
from fastapi.responses import HTMLResponse
import os
import requests

url="https://huggingface.co/TheBloke/WizardLM-13B-V1.2-GGUF/resolve/main/wizardlm-13b-v1.2.Q4_0.gguf"
response = requests.get(url)
with open("./model.gguf", mode="wb") as file:
  file.write(response.content)

app = create_app(
    Settings(
        n_threads=2,  # set to number of cpu cores
        model="./model.gguf",
        embedding=False
    )
)

# Read the content of index.html once and store it in memory
with open("index.html", "r") as f:
    content = f.read()


@app.get("/", response_class=HTMLResponse)
async def read_items():
    return content

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app,
                host="0.0.0.0",
                port=int("2600")
                )