Update app.py
Browse files
app.py
CHANGED
@@ -30,7 +30,6 @@ from datasets import Dataset
|
|
30 |
from transformers import AutoTokenizer
|
31 |
import unsloth
|
32 |
|
33 |
-
unsloth.optimize_all()
|
34 |
|
35 |
model_name = "meta-llama/Llama-2-7b-hf" # You can use a smaller one like "meta-llama/Llama-2-7b-chat-hf"
|
36 |
|
@@ -46,6 +45,7 @@ tokenizer.pad_token = tokenizer.eos_token # Use EOS as PAD token
|
|
46 |
# tokenizer.add_special_tokens({'pad_token': '[PAD]'})
|
47 |
|
48 |
# Tokenization function
|
|
|
49 |
def tokenize_function(examples):
|
50 |
tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
|
51 |
tokens["labels"] = tokens["input_ids"].copy() # Use input as labels for text generation
|
@@ -95,6 +95,7 @@ trainer = Trainer(
|
|
95 |
tokenizer=tokenizer,
|
96 |
)
|
97 |
|
|
|
98 |
trainer.train()
|
99 |
|
100 |
model.save_pretrained("./fine_tuned_llama2")
|
|
|
30 |
from transformers import AutoTokenizer
|
31 |
import unsloth
|
32 |
|
|
|
33 |
|
34 |
model_name = "meta-llama/Llama-2-7b-hf" # You can use a smaller one like "meta-llama/Llama-2-7b-chat-hf"
|
35 |
|
|
|
45 |
# tokenizer.add_special_tokens({'pad_token': '[PAD]'})
|
46 |
|
47 |
# Tokenization function
|
48 |
+
@unsloth.optimize
|
49 |
def tokenize_function(examples):
|
50 |
tokens = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
|
51 |
tokens["labels"] = tokens["input_ids"].copy() # Use input as labels for text generation
|
|
|
95 |
tokenizer=tokenizer,
|
96 |
)
|
97 |
|
98 |
+
@unsloth.optimize
|
99 |
trainer.train()
|
100 |
|
101 |
model.save_pretrained("./fine_tuned_llama2")
|