kamran-r123 commited on
Commit
8e7163b
·
verified ·
1 Parent(s): 4091744

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +2 -1
main.py CHANGED
@@ -4,6 +4,7 @@ import uvicorn
4
  import prompt_style
5
  import os
6
  from huggingface_hub import hf_hub_download
 
7
 
8
 
9
  model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3"
@@ -11,7 +12,7 @@ filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf"
11
  # model_path = hf_hub_download(repo_id=model_id, filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf", token=os.environ['HF_TOKEN'])
12
  # model = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096, verbose=False)
13
 
14
- model = Llama.from_pretrained(repo_id=model_id, filename=filename, n_gpu_layers=-1,
15
  n_ctx=4096, verbose=False, attn_implementation="flash_attention_2")
16
 
17
  class Item(BaseModel):
 
4
  import prompt_style
5
  import os
6
  from huggingface_hub import hf_hub_download
7
+ from llama_cpp import Llama
8
 
9
 
10
  model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3"
 
12
  # model_path = hf_hub_download(repo_id=model_id, filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf", token=os.environ['HF_TOKEN'])
13
  # model = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096, verbose=False)
14
 
15
+ model = Llama.from_pretrained(repo_id=model_id, filename=filename, n_gpu_layers=-1, token=os.environ['HF_TOKEN'],
16
  n_ctx=4096, verbose=False, attn_implementation="flash_attention_2")
17
 
18
  class Item(BaseModel):