File size: 5,567 Bytes
9aa17f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import comet_ml
from unsloth import PatchDPOTrainer
from accelerate import Accelerator
from config import SAVED_MODEL

PatchDPOTrainer()

import torch
from transformers import TextStreamer, AutoTokenizer
from datasets import load_dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
from trl import DPOConfig, DPOTrainer
from accelerate import init_empty_weights


class MyLlamaModel:
    max_seq_length = 256
    NUM_TRAIN_EPOCHS = 6
    beta = 0.5
    LOAD_IN_4BIT = False
    device_map = "auto"
    save_method = "lora"  # merged_X just means the whole model is saved, not just the transformer
    lora_dropout = 0.
    lora_alpha = 32
    learning_rate=2e-5
    r = 32
    base_output_dir = f"{SAVED_MODEL}/{max_seq_length}maxSeqLen_{NUM_TRAIN_EPOCHS}Epochs_{device_map}devmap_4Bit{LOAD_IN_4BIT}_{save_method}_beta{beta}_loraDropout{lora_dropout}_r{r}_lora_alpha{lora_alpha}_lr{learning_rate}/"

    def __init__(self):
        self.model_name="unsloth/DeepSeek-R1-GGUF"
        self.model_path = f"{self.base_output_dir}/{self.model_name}"

    def get_model_tokenizer(self, model_name: str):
        print(f"Using model {model_name}")
        self.model_name = model_name
        self.model_path = f"{self.base_output_dir}/{model_name}"
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name=self.model_name,
            # max_seq_length=self.max_seq_length,
            load_in_4bit=self.LOAD_IN_4BIT, # "You can activate QLoRA by setting load_in_4bit to True"  LLMEngineering, p251
            # quantization_config=bnb_config, # helped with memory but caused non-zero probabilities when demoed
            # # device_map=self.device_map, # try this
            trust_remote_code=True,
        )
        return model, tokenizer

    def train_and_save(self):
        model, tokenizer = self.get_model_tokenizer(self.model_name)
        with init_empty_weights():
            model = FastLanguageModel.get_peft_model(
                model,
                r=self.r,
                lora_alpha=self.lora_alpha,
                lora_dropout=self.lora_dropout,
                target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
            )
            torch.nn.Module.to_empty(model, device=torch.device("cuda"))  # this eliminates 'NotImplementedError: Cannot copy out of meta tensor'
            accelerator = Accelerator(mixed_precision="bf16", cpu=True)  # Enable mixed precision for memory efficiency
            device = accelerator.device
            # model.to(device)
            # optimizer = AdamW(params=model.parameters(), lr=3e-2)

            # Move the model to the appropriate device
            model = accelerator.prepare(model)
            self.do_dpo(model, tokenizer)

    def do_dpo(self, model, tokenizer):
        dataset = self.load_prepared_dataset(tokenizer.eos_token)
        trainer = DPOTrainer(
            model=model,
            ref_model=None,
            tokenizer=tokenizer,
            beta=self.beta,
            train_dataset=dataset["train"],
            eval_dataset=dataset["test"],
            max_length=self.max_seq_length // 2,
            max_prompt_length=self.max_seq_length // 2,
            args=DPOConfig(
                learning_rate=self.learning_rate,
                lr_scheduler_type="linear",
                per_device_train_batch_size=1,
                per_device_eval_batch_size=1,
                gradient_accumulation_steps=8,
                num_train_epochs=self.NUM_TRAIN_EPOCHS,
                fp16=not is_bfloat16_supported(),
                bf16=is_bfloat16_supported(),
                weight_decay=0.01,
                warmup_steps=10,
                output_dir="output",
                eval_strategy="steps",
                eval_steps=0.2,
                logging_steps=1,
                report_to="comet_ml",
                seed=0,
            ),
        )
        trainer.train()
        model.save_pretrained_merged(self.model_path, tokenizer=tokenizer, save_method=self.save_method) # merged_4bit_forced
        generate_text_using(model, tokenizer)


    @staticmethod
    def load_prepared_dataset(eos_token):
        alpaca_template = """Below is an instruction that describes a task.
        Write a response that appropriately completes the request.
        ### Instruction:
        {}
        ### Response:
        """

        def format_samples(example):
            example["prompt"] = alpaca_template.format(example["prompt"])
            example["chosen"] = example['chosen'] + eos_token
            example["rejected"] = example['rejected'] + eos_token
            return {"prompt": example["prompt"], "chosen":
                example["chosen"], "rejected": example["rejected"]}

        dataset = load_dataset("mlabonne/llmtwin-dpo", split="train")
        dataset = dataset.map(format_samples)
        dataset = dataset.train_test_split(test_size=0.05)
        return dataset


def generate_text_using(model, tokenizer):
    print(f"Model of type {type(model)}, tokenizer of type {type(tokenizer)}")
    #"pt",  "tf",  "np", "jax", "mlx"
    inputs = tokenizer(["Who are the creators of the course that is under the 'Decoding ML' umbrella?"], return_tensors="pt").to("cuda")
    text_streamer = TextStreamer(tokenizer)
    FastLanguageModel.for_inference(model)
    _ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=MyLlamaModel.max_seq_length, use_cache=True)


if __name__ == "__main__":
    my_model = MyLlamaModel()
    my_model.train_and_save()