jonACE commited on
Commit
7a6b70d
·
verified ·
1 Parent(s): 710e3c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -0
app.py CHANGED
@@ -103,6 +103,15 @@ tokenizer.save_pretrained("./llama-2-7b-chat_fine_tuned")
103
  model.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token)
104
  tokenizer.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token)
105
 
 
 
 
 
 
 
 
 
 
106
 
107
  # CHATBOT START
108
  # chatbot = pipeline("text-generation", model="jonACE/llama-2-7b-chat_fine_tuned")
 
103
  model.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token)
104
  tokenizer.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token)
105
 
106
+ # save GGUF versions
107
+ model.save_pretrained_gguf("./llama-2-7b-chat_fine_tuned", tokenizer,)
108
+ model.push_to_hub_gguf("jonACE/llama-2-7b-chat_fine_tuned", tokenizer)
109
+
110
+ model.save_pretrained_gguf("./llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "f16")
111
+ model.push_to_hub_gguf("jonACE/llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "f16")
112
+
113
+ model.save_pretrained_gguf("./llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "q4_k_m")
114
+ model.push_to_hub_gguf("jonACE/llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "q4_k_m")
115
 
116
  # CHATBOT START
117
  # chatbot = pipeline("text-generation", model="jonACE/llama-2-7b-chat_fine_tuned")