Update main.py
Browse files
    	
        main.py
    CHANGED
    
    | @@ -4,6 +4,7 @@ import uvicorn | |
| 4 | 
             
            import prompt_style
         | 
| 5 | 
             
            import os
         | 
| 6 | 
             
            from huggingface_hub import hf_hub_download
         | 
|  | |
| 7 |  | 
| 8 |  | 
| 9 | 
             
            model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3"
         | 
| @@ -11,7 +12,7 @@ filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf" | |
| 11 | 
             
            # model_path = hf_hub_download(repo_id=model_id, filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf", token=os.environ['HF_TOKEN'])
         | 
| 12 | 
             
            # model = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096, verbose=False)
         | 
| 13 |  | 
| 14 | 
            -
            model = Llama.from_pretrained(repo_id=model_id, filename=filename, n_gpu_layers=-1, 
         | 
| 15 | 
             
                                          n_ctx=4096, verbose=False, attn_implementation="flash_attention_2")
         | 
| 16 |  | 
| 17 | 
             
            class Item(BaseModel):
         | 
|  | |
| 4 | 
             
            import prompt_style
         | 
| 5 | 
             
            import os
         | 
| 6 | 
             
            from huggingface_hub import hf_hub_download
         | 
| 7 | 
            +
            from llama_cpp import Llama
         | 
| 8 |  | 
| 9 |  | 
| 10 | 
             
            model_id = "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3"
         | 
|  | |
| 12 | 
             
            # model_path = hf_hub_download(repo_id=model_id, filename="Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf", token=os.environ['HF_TOKEN'])
         | 
| 13 | 
             
            # model = Llama(model_path=model_path, n_gpu_layers=-1, n_ctx=4096, verbose=False)
         | 
| 14 |  | 
| 15 | 
            +
            model = Llama.from_pretrained(repo_id=model_id, filename=filename, n_gpu_layers=-1, token=os.environ['HF_TOKEN'], 
         | 
| 16 | 
             
                                          n_ctx=4096, verbose=False, attn_implementation="flash_attention_2")
         | 
| 17 |  | 
| 18 | 
             
            class Item(BaseModel):
         |