Update main.py
Browse files
main.py
CHANGED
@@ -4,6 +4,7 @@ import uvicorn
|
|
4 |
import prompt_style
|
5 |
import os
|
6 |
from huggingface_hub import hf_hub_download
|
|
|
7 |
|
8 |
|
9 |
model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3"
|
@@ -11,7 +12,7 @@ filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf"
|
|
11 |
# model_path = hf_hub_download(repo_id=model_id, filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf", token=os.environ['HF_TOKEN'])
|
12 |
# model = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096, verbose=False)
|
13 |
|
14 |
-
model = Llama.from_pretrained(repo_id=model_id, filename=filename, n_gpu_layers=-1,
|
15 |
n_ctx=4096, verbose=False, attn_implementation="flash_attention_2")
|
16 |
|
17 |
class Item(BaseModel):
|
|
|
4 |
import prompt_style
|
5 |
import os
|
6 |
from huggingface_hub import hf_hub_download
|
7 |
+
from llama_cpp import Llama
|
8 |
|
9 |
|
10 |
model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3"
|
|
|
12 |
# model_path = hf_hub_download(repo_id=model_id, filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf", token=os.environ['HF_TOKEN'])
|
13 |
# model = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096, verbose=False)
|
14 |
|
15 |
+
model = Llama.from_pretrained(repo_id=model_id, filename=filename, n_gpu_layers=-1, token=os.environ['HF_TOKEN'],
|
16 |
n_ctx=4096, verbose=False, attn_implementation="flash_attention_2")
|
17 |
|
18 |
class Item(BaseModel):
|