Spaces:
Running
Running
File size: 5,567 Bytes
9aa17f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import comet_ml
from unsloth import PatchDPOTrainer
from accelerate import Accelerator
from config import SAVED_MODEL
PatchDPOTrainer()
import torch
from transformers import TextStreamer, AutoTokenizer
from datasets import load_dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
from trl import DPOConfig, DPOTrainer
from accelerate import init_empty_weights
class MyLlamaModel:
max_seq_length = 256
NUM_TRAIN_EPOCHS = 6
beta = 0.5
LOAD_IN_4BIT = False
device_map = "auto"
save_method = "lora" # merged_X just means the whole model is saved, not just the transformer
lora_dropout = 0.
lora_alpha = 32
learning_rate=2e-5
r = 32
base_output_dir = f"{SAVED_MODEL}/{max_seq_length}maxSeqLen_{NUM_TRAIN_EPOCHS}Epochs_{device_map}devmap_4Bit{LOAD_IN_4BIT}_{save_method}_beta{beta}_loraDropout{lora_dropout}_r{r}_lora_alpha{lora_alpha}_lr{learning_rate}/"
def __init__(self):
self.model_name="unsloth/DeepSeek-R1-GGUF"
self.model_path = f"{self.base_output_dir}/{self.model_name}"
def get_model_tokenizer(self, model_name: str):
print(f"Using model {model_name}")
self.model_name = model_name
self.model_path = f"{self.base_output_dir}/{model_name}"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=self.model_name,
# max_seq_length=self.max_seq_length,
load_in_4bit=self.LOAD_IN_4BIT, # "You can activate QLoRA by setting load_in_4bit to True" LLMEngineering, p251
# quantization_config=bnb_config, # helped with memory but caused non-zero probabilities when demoed
# # device_map=self.device_map, # try this
trust_remote_code=True,
)
return model, tokenizer
def train_and_save(self):
model, tokenizer = self.get_model_tokenizer(self.model_name)
with init_empty_weights():
model = FastLanguageModel.get_peft_model(
model,
r=self.r,
lora_alpha=self.lora_alpha,
lora_dropout=self.lora_dropout,
target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
)
torch.nn.Module.to_empty(model, device=torch.device("cuda")) # this eliminates 'NotImplementedError: Cannot copy out of meta tensor'
accelerator = Accelerator(mixed_precision="bf16", cpu=True) # Enable mixed precision for memory efficiency
device = accelerator.device
# model.to(device)
# optimizer = AdamW(params=model.parameters(), lr=3e-2)
# Move the model to the appropriate device
model = accelerator.prepare(model)
self.do_dpo(model, tokenizer)
def do_dpo(self, model, tokenizer):
dataset = self.load_prepared_dataset(tokenizer.eos_token)
trainer = DPOTrainer(
model=model,
ref_model=None,
tokenizer=tokenizer,
beta=self.beta,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
max_length=self.max_seq_length // 2,
max_prompt_length=self.max_seq_length // 2,
args=DPOConfig(
learning_rate=self.learning_rate,
lr_scheduler_type="linear",
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
gradient_accumulation_steps=8,
num_train_epochs=self.NUM_TRAIN_EPOCHS,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
weight_decay=0.01,
warmup_steps=10,
output_dir="output",
eval_strategy="steps",
eval_steps=0.2,
logging_steps=1,
report_to="comet_ml",
seed=0,
),
)
trainer.train()
model.save_pretrained_merged(self.model_path, tokenizer=tokenizer, save_method=self.save_method) # merged_4bit_forced
generate_text_using(model, tokenizer)
@staticmethod
def load_prepared_dataset(eos_token):
alpaca_template = """Below is an instruction that describes a task.
Write a response that appropriately completes the request.
### Instruction:
{}
### Response:
"""
def format_samples(example):
example["prompt"] = alpaca_template.format(example["prompt"])
example["chosen"] = example['chosen'] + eos_token
example["rejected"] = example['rejected'] + eos_token
return {"prompt": example["prompt"], "chosen":
example["chosen"], "rejected": example["rejected"]}
dataset = load_dataset("mlabonne/llmtwin-dpo", split="train")
dataset = dataset.map(format_samples)
dataset = dataset.train_test_split(test_size=0.05)
return dataset
def generate_text_using(model, tokenizer):
print(f"Model of type {type(model)}, tokenizer of type {type(tokenizer)}")
#"pt", "tf", "np", "jax", "mlx"
inputs = tokenizer(["Who are the creators of the course that is under the 'Decoding ML' umbrella?"], return_tensors="pt").to("cuda")
text_streamer = TextStreamer(tokenizer)
FastLanguageModel.for_inference(model)
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=MyLlamaModel.max_seq_length, use_cache=True)
if __name__ == "__main__":
my_model = MyLlamaModel()
my_model.train_and_save() |