Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import fastapi
|
| 2 |
from fastapi.responses import JSONResponse
|
| 3 |
from time import time
|
| 4 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
#MODEL_PATH = "./qwen1_5-0_5b-chat-q4_0.gguf" #"./qwen1_5-0_5b-chat-q4_0.gguf"
|
| 6 |
import logging
|
| 7 |
import llama_cpp
|
|
@@ -25,7 +25,7 @@ llm_chat = llama_cpp.Llama.from_pretrained(
|
|
| 25 |
verbose=False,
|
| 26 |
n_ctx=1024,
|
| 27 |
n_gpu_layers=0,
|
| 28 |
-
|
| 29 |
)
|
| 30 |
llm_generate = llama_cpp.Llama.from_pretrained(
|
| 31 |
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
|
|
@@ -37,7 +37,7 @@ llm_generate = llama_cpp.Llama.from_pretrained(
|
|
| 37 |
mirostat_mode=2,
|
| 38 |
mirostat_tau=4.0,
|
| 39 |
mirostat_eta=1.1
|
| 40 |
-
|
| 41 |
)
|
| 42 |
# Logger setup
|
| 43 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -83,7 +83,7 @@ async def chat(gen:GenModel):
|
|
| 83 |
et = time()
|
| 84 |
output["time"] = et - st
|
| 85 |
messages.append({'role': "assistant", "content": output['choices'][0]['message']['content']})
|
| 86 |
-
print(messages)
|
| 87 |
return output
|
| 88 |
except Exception as e:
|
| 89 |
logger.error(f"Error in /complete endpoint: {e}")
|
|
@@ -131,5 +131,4 @@ async def generate(gen:GenModel):
|
|
| 131 |
|
| 132 |
if __name__ == "__main__":
|
| 133 |
import uvicorn
|
| 134 |
-
|
| 135 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
import fastapi
|
| 2 |
from fastapi.responses import JSONResponse
|
| 3 |
from time import time
|
| 4 |
+
#from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
#MODEL_PATH = "./qwen1_5-0_5b-chat-q4_0.gguf" #"./qwen1_5-0_5b-chat-q4_0.gguf"
|
| 6 |
import logging
|
| 7 |
import llama_cpp
|
|
|
|
| 25 |
verbose=False,
|
| 26 |
n_ctx=1024,
|
| 27 |
n_gpu_layers=0,
|
| 28 |
+
chat_format="llama-2"
|
| 29 |
)
|
| 30 |
llm_generate = llama_cpp.Llama.from_pretrained(
|
| 31 |
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
|
|
|
|
| 37 |
mirostat_mode=2,
|
| 38 |
mirostat_tau=4.0,
|
| 39 |
mirostat_eta=1.1
|
| 40 |
+
chat_format="llama-2"
|
| 41 |
)
|
| 42 |
# Logger setup
|
| 43 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 83 |
et = time()
|
| 84 |
output["time"] = et - st
|
| 85 |
messages.append({'role': "assistant", "content": output['choices'][0]['message']['content']})
|
| 86 |
+
#print(messages)
|
| 87 |
return output
|
| 88 |
except Exception as e:
|
| 89 |
logger.error(f"Error in /complete endpoint: {e}")
|
|
|
|
| 131 |
|
| 132 |
if __name__ == "__main__":
|
| 133 |
import uvicorn
|
|
|
|
| 134 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|