File size: 3,516 Bytes
9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 9a84d4a 9340dd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
from transformers import LlamaForCausalLM, LlamaTokenizer, Trainer, TrainingArguments
from transformers import BitsAndBytesConfig
import datasets
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from accelerate import Accelerator
# Version and CUDA check
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# Load Llama model and tokenizer
MODEL_ID = "meta-llama/Llama-2-7b-hf"
tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID)
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# Quantization config
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
# Load model with FlashAttention 2
model = LlamaForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16, # Matches A100
device_map="auto",
quantization_config=quantization_config,
attn_implementation="flash_attention_2"
)
# Prepare for LoRA
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
r=16, lora_alpha=32, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
# Load dataset
dataset = datasets.load_dataset("json", data_files="final_combined_fraud_data.json", field="training_pairs")
print("First example from dataset:", dataset["train"][0])
# Tokenization (dynamic padding)
def tokenize_data(example):
formatted_text = f"{example['input']} {example['output']}"
inputs = tokenizer(formatted_text, truncation=True, max_length=2048, return_tensors="pt")
input_ids = inputs["input_ids"].squeeze(0)
labels = inputs["input_ids"].clone().squeeze(0)
input_len = len(tokenizer(example['input'])["input_ids"])
labels[:input_len] = -100
return {"input_ids": input_ids, "labels": labels, "attention_mask": inputs["attention_mask"].squeeze(0)}
tokenized_dataset = dataset["train"].map(tokenize_data, batched=False, remove_columns=dataset["train"].column_names)
print("First tokenized example:", {k: (type(v), v.shape if isinstance(v, torch.Tensor) else "list") for k, v in tokenized_dataset[0].items()})
# Data collator
def custom_data_collator(features):
return {
"input_ids": torch.stack([f["input_ids"] for f in features]),
"attention_mask": torch.stack([f["attention_mask"] for f in features]),
"labels": torch.stack([f["labels"] for f in features])
}
# Accelerator and training
accelerator = Accelerator()
training_args = TrainingArguments(
output_dir="./fine_tuned_llama2", per_device_train_batch_size=4, gradient_accumulation_steps=4,
eval_strategy="steps", eval_steps=50, save_strategy="steps", save_steps=100, save_total_limit=3,
num_train_epochs=3, learning_rate=2e-5, weight_decay=0.01, logging_dir="./logs", logging_steps=10,
bf16=True, gradient_checkpointing=True, optim="adamw_torch", warmup_steps=100
)
trainer = Trainer(
model=model, args=training_args,
train_dataset=tokenized_dataset.select(range(90)),
eval_dataset=tokenized_dataset.select(range(90, 112)),
data_collator=custom_data_collator
)
trainer.train()
model.save_pretrained("./fine_tuned_llama2")
tokenizer.save_pretrained("./fine_tuned_llama2")
print("Training complete. Model and tokenizer saved to ./fine_tuned_llama2") |