SD-Prompt-Generator

Paused

kamran-r123 commited on Sep 17, 2024

Commit

8e7163b

verified ·

1 Parent(s): 4091744

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -4,6 +4,7 @@ import uvicorn
 import prompt_style
 import os
 from huggingface_hub import hf_hub_download
 model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3"
@@ -11,7 +12,7 @@ filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf"
 # model_path = hf_hub_download(repo_id=model_id, filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf", token=os.environ['HF_TOKEN'])
 # model = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096, verbose=False)
-model = Llama.from_pretrained(repo_id=model_id, filename=filename, n_gpu_layers=-1,
                               n_ctx=4096, verbose=False, attn_implementation="flash_attention_2")
 class Item(BaseModel):

 import prompt_style
 import os
 from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
 model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3"
 # model_path = hf_hub_download(repo_id=model_id, filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf", token=os.environ['HF_TOKEN'])
 # model = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096, verbose=False)
+model = Llama.from_pretrained(repo_id=model_id, filename=filename, n_gpu_layers=-1, token=os.environ['HF_TOKEN'],
                               n_ctx=4096, verbose=False, attn_implementation="flash_attention_2")
 class Item(BaseModel):