File size: 4,763 Bytes
20fd975
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git

!pip install -q datasets bitsandbytes einops wandb


from datasets import load_dataset

# Specify the name of the dataset
dataset_name = "yahma/alpaca-cleaned"

# Load the dataset from the specified name and select the "train" split
dataset = load_dataset(dataset_name, split="train")

# We will be loading the Falcon 7B model, applying 4bit quantization to it, and then adding LoRA adapters to the model.
import torch

from transformers import FalconForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Defining the name of the Falcon model
model_name = "ybelkada/falcon-7b-sharded-bf16"

# Configuring the BitsAndBytes quantization
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
)

# Loading the Falcon model with quantization configuration
model = FalconForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
trust_remote_code=True
)

# Disabling cache usage in the model configuration
model.config.use_cache = False

# Load the tokenizer for the Falcon 7B model with remote code trust
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Set the padding token to be the same as the end-of-sequence token
tokenizer.pad_token = tokenizer.eos_token

# Import the necessary module for LoRA configuration
from peft import LoraConfig

# Define the parameters for LoRA configuration
lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

# Create the LoRA configuration object
peft_config = LoraConfig(
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
r=lora_r,
bias="none",
task_type="CAUSAL_LM",
target_modules=[
"query_key_value",
"dense",
"dense_h_to_4h",
"dense_4h_to_h",
]
)

from transformers import TrainingArguments
# Define the directory to save training results
output_dir = "./results"

# Set the batch size per device during training
per_device_train_batch_size = 4

# Number of steps to accumulate gradients before updating the model
gradient_accumulation_steps = 4

# Choose the optimizer type (e.g., "paged_adamw_32bit")
optim = "paged_adamw_32bit"

# Interval to save model checkpoints (every 10 steps)
save_steps = 10

# Interval to log training metrics (every 10 steps)
logging_steps = 10

# Learning rate for optimization
learning_rate = 2e-4

# Maximum gradient norm for gradient clipping
max_grad_norm = 0.3

# Maximum number of training steps
max_steps = 50

# Warmup ratio for learning rate scheduling
warmup_ratio = 0.03

# Type of learning rate scheduler (e.g., "constant")
lr_scheduler_type = "constant"

# Create a TrainingArguments object to configure the training process
training_arguments = TrainingArguments(
output_dir=output_dir,
per_device_train_batch_size=per_device_train_batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
optim=optim,
save_steps=save_steps,
logging_steps=logging_steps,
learning_rate=learning_rate,
fp16=True,  # Use mixed precision training (16-bit)
max_grad_norm=max_grad_norm,
max_steps=max_steps,
warmup_ratio=warmup_ratio,
group_by_length=True,
lr_scheduler_type=lr_scheduler_type,
)


dataset = dataset.map(lambda x: {"text": x["input"]+x["output"]})

# Import the SFTTrainer from the TRL library
from trl import SFTTrainer

# Set the maximum sequence length
max_seq_length = 512

# Create a trainer instance using SFTTrainer
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
peft_config=peft_config,
dataset_text_field="text",
max_seq_length=max_seq_length,
tokenizer=tokenizer,
args=training_arguments,
)


# Iterate through the named modules of the trainer's model
for name, module in trainer.model.named_modules():

# Check if the name contains "norm"
  if "norm" in name:
	  # Convert the module to use torch.float32 data type
	  module = module.to(torch.float32)

trainer.train()


prompt = "Generate a python script to add prime numbers between one and ten"

inputs = tokenizer.encode(prompt, return_tensors='pt')

outputs = model.generate(inputs, max_length=100, temperature = .7, do_sample=True)

completion = tokenizer.decode(outputs[0])

print(completion)




from transformers import AutoModelForCausalLM, AutoTokenizer

checkpoint_name="ArmelR/starcoder-gradio-v0"
model = AutoModelForCausalLM.from_pretrained(checkpoint_name)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_name)

prompt = "Create a gradio application that help to convert temperature in celcius into temperature in Fahrenheit"
inputs = tokenizer(f"Question: {prompt}\n\nAnswer: ", return_tensors="pt")

outputs = model.generate(
  inputs["input_ids"],
  temperature=0.2,
  top_p=0.95,
  max_new_tokens=200
)

input_len=len(inputs["input_ids"])
print(tokenizer.decode(outputs[0][input_len:]))