Update app.py
Browse files
app.py
CHANGED
@@ -103,6 +103,15 @@ tokenizer.save_pretrained("./llama-2-7b-chat_fine_tuned")
|
|
103 |
model.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token)
|
104 |
tokenizer.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token)
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
# CHATBOT START
|
108 |
# chatbot = pipeline("text-generation", model="jonACE/llama-2-7b-chat_fine_tuned")
|
|
|
103 |
model.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token)
|
104 |
tokenizer.push_to_hub("jonACE/llama-2-7b-chat_fine_tuned", token=hf_token)
|
105 |
|
106 |
+
# save GGUF versions
|
107 |
+
model.save_pretrained_gguf("./llama-2-7b-chat_fine_tuned", tokenizer,)
|
108 |
+
model.push_to_hub_gguf("jonACE/llama-2-7b-chat_fine_tuned", tokenizer)
|
109 |
+
|
110 |
+
model.save_pretrained_gguf("./llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "f16")
|
111 |
+
model.push_to_hub_gguf("jonACE/llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "f16")
|
112 |
+
|
113 |
+
model.save_pretrained_gguf("./llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "q4_k_m")
|
114 |
+
model.push_to_hub_gguf("jonACE/llama-2-7b-chat_fine_tuned", tokenizer, quantization_method = "q4_k_m")
|
115 |
|
116 |
# CHATBOT START
|
117 |
# chatbot = pipeline("text-generation", model="jonACE/llama-2-7b-chat_fine_tuned")
|