File size: 4,464 Bytes
d9cfebf 4f2ef99 d9cfebf a16809f d9cfebf a16809f d9cfebf a16809f d9cfebf 4f2ef99 d9cfebf 4f2ef99 d9cfebf 4f2ef99 d9cfebf a16809f d9cfebf a16809f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# train_llama4.py
# Script to fine-tune Llama 4 Maverick for healthcare fraud detection (text-only with CPU offloading)
from transformers import AutoTokenizer, Llama4ForConditionalGeneration, Trainer, TrainingArguments
from transformers import BitsAndBytesConfig
import datasets
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from accelerate import Accelerator
import huggingface_hub
import os
# Version and CUDA check
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# Authenticate with Hugging Face
LLama = os.getenv("LLama")
if not LLama:
raise ValueError("LLama token not found. Set it in Hugging Face Space secrets as 'LLama'.")
huggingface_hub.login(token=LLama)
# Load Llama 4 model and tokenizer
MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
# Add padding token if it doesn't exist
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# Custom device map to offload some layers to CPU
device_map = {
"model.embed_tokens": 0,
"model.layers.0-15": 0, # Keep first 16 layers on GPU
"model.layers.16-31": "cpu", # Offload remaining layers to CPU
"model.norm": 0,
"lm_head": 0
}
# Quantization config for A100 80 GB VRAM
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
model = Llama4ForConditionalGeneration.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map=device_map,
quantization_config=quantization_config,
llm_int8_enable_fp32_cpu_offload=True,
attn_implementation="flex_attention"
)
# Prepare for LoRA
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
# Load dataset
dataset = datasets.load_dataset("json", data_files="Bingaman_training_data.json", field="training_pairs")
print("First example from dataset:", dataset["train"][0])
# Tokenization
def tokenize_data(example):
formatted_text = f"<s>[INST] {example['input']} [/INST] {example['output']}</s>"
inputs = tokenizer(formatted_text, padding="max_length", truncation=True, max_length=4096, return_tensors="pt")
input_ids = inputs["input_ids"].squeeze(0).tolist()
attention_mask = inputs["attention_mask"].squeeze(0).tolist()
labels = input_ids.copy()
return {
"input_ids": input_ids,
"labels": labels,
"attention_mask": attention_mask
}
tokenized_dataset = dataset["train"].map(tokenize_data, batched=False, remove_columns=dataset["train"].column_names)
print("First tokenized example:", {k: (type(v), len(v)) for k, v in tokenized_dataset[0].items()})
# Data collator
def custom_data_collator(features):
input_ids = [torch.tensor(f["input_ids"]) for f in features]
attention_mask = [torch.tensor(f["attention_mask"]) for f in features]
labels = [torch.tensor(f["labels"]) for f in features]
return {
"input_ids": torch.stack(input_ids),
"attention_mask": torch.stack(attention_mask),
"labels": torch.stack(labels)
}
# Training setup
accelerator = Accelerator()
training_args = TrainingArguments(
output_dir="./fine_tuned_llama4_healthcare",
per_device_train_batch_size=2,
gradient_accumulation_steps=8,
eval_strategy="steps",
eval_steps=10,
save_strategy="steps",
save_steps=20,
save_total_limit=3,
num_train_epochs=5,
learning_rate=2e-5,
weight_decay=0.01,
logging_dir="./logs",
logging_steps=5,
bf16=True,
gradient_checkpointing=True,
optim="adamw_torch",
warmup_steps=50
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
eval_dataset=tokenized_dataset.select(range(min(5, len(tokenized_dataset)))),
data_collator=custom_data_collator
)
# Start training
trainer.train()
model.save_pretrained("./fine_tuned_llama4_healthcare")
tokenizer.save_pretrained("./fine_tuned_llama4_healthcare")
print("Training complete. Model and tokenizer saved to ./fine_tuned_llama4_healthcare") |