In [None]:
# Store your huggingface token as a environment variable.
# It is used to download or upload models to your account.
from google.colab import userdata
from huggingface_hub import login
login(userdata.get('TOKEN'))

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --upgrade transformers

# We have to check which Torch version for Xformers (2.3 -> 0.0.27)
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048

def load_model(model_name, max_seq_length=max_seq_length, dtype=None, load_in_4bit=True):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=max_seq_length,
        dtype=dtype,
        load_in_4bit=load_in_4bit,
    )
    return model, tokenizer

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [None]:
models = [
    'Phi-3.5-mini-instruct-bnb-4bit', # |Min Gpu: T4, Max Model size: 14.748GB|
    'gemma-2-27b-it-bnb-4bit',  # |Min Gpu: A100, Max Model size: 39.564GB|
    'Meta-Llama-3.1-8B-Instruct-bnb-4bit' # |Min Gpu: L4, Max Model size: 22.168GB|
    ]

# Model Selection.
model_name = models[0]

# Required for uploading models to or downloading models from huggingface repositories.
HfUsername = "CooperW"

base_model = f"unsloth/{model_name}"
LoRa_Adapters = f"{HfUsername}/{model_name.replace('-', '_').replace('.', '_')}_128prompt"

# Load the base model.
model, tokenizer = load_model(base_model)

try:
  # For continued training load LoRa adapters to model.
  from peft import PeftModel
  model = PeftModel.from_pretrained(model, LoRa_Adapters)

  from huggingface_hub import snapshot_download
  import os

  download_path = snapshot_download(repo_id=LoRa_Adapters, ignore_patterns=["*.md", "*.safetensors"])
  last_checkpoint_path = os.path.join(download_path, "last-checkpoint")
except:
  # Initialise LoRa adapters.
  model = FastLanguageModel.get_peft_model(
    model,
    r = 128,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 8,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = True,
    loftq_config = None,
  )

==((====))==  Unsloth 2024.9.post4: Fast Llama patching. Transformers = 4.45.1.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/140 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.37k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Unsloth 2024.9.post4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "phi-3", #Use Template of Current model.
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"},
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass

In [None]:
from datasets import load_dataset

dataset_repo = f"{HfUsername}/jadidi"
dataset_name = "train_network.jsonl"

train_dataset = load_dataset(dataset_repo, data_files=dataset_name, split='train')
train_dataset = train_dataset.map(formatting_prompts_func, batched = True,)

train_network.jsonl:   0%|          | 0.00/136M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/168834 [00:00<?, ? examples/s]

In [None]:
from transformers import Trainer, TrainingArguments
from trl import SFTTrainer
import os

training_args = TrainingArguments(
    output_dir="outputs",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=16,
    lr_scheduler_type = "cosine_with_restarts",
    lr_scheduler_kwargs = { "num_cycles": len(trainer.get_train_dataloader()) / 100 },
    optim="adamw_8bit",
    weight_decay=0.01,
    seed=3407,
    warmup_steps=10,
    # max_steps=100,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=1,
)

# Define the name of the repo your model is uploaded. By default : (YourHuggingFaceAccount/baseModelName)
training_args = training_args.set_push_to_hub(
    model_id=LoRa_Adapters,
    strategy="checkpoint",
    private_repo=True,
    always_push=False
)

# Define how often your model is uploaded during training.
# Only LoRa adapters are saved.
training_args = training_args.set_save(
    strategy='steps',
    steps=50,
    total_limit=2,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=os.cpu_count(),
    packing=False,
    args=training_args,
)


In [None]:
try:
  trainer_stats = trainer.train(resume_from_checkpoint=True)
except:
  trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 168,834 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 16
\        /    Total batch size = 128 | Total steps = 3,957
 "-____-"     Number of trainable parameters = 239,075,328


Step,Training Loss
1,1.1903
2,1.1702
3,1.1275
4,0.9661
5,0.9009
6,0.6794
7,0.5089
8,0.3726
9,0.3144
10,0.2399


Step,Training Loss
1,1.1903
2,1.1702
3,1.1275
4,0.9661
5,0.9009
6,0.6794
7,0.5089
8,0.3726
9,0.3144
10,0.2399


In [None]:
from google.colab import runtime
runtime.unassign()