shimmyshimmer commited on
Commit
14a73f9
·
verified ·
1 Parent(s): db98d3a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +72 -0
README.md CHANGED
@@ -19,3 +19,75 @@ We have a Google Colab Tesla T4 notebook for TinyLlama with 4096 max sequence le
19
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/Discord%20button.png" width="200"/>](https://discord.gg/u54VK8m8tk)
20
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/buy%20me%20a%20coffee%20button.png" width="200"/>](https://ko-fi.com/unsloth)
21
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="400"/>](https://github.com/unslothai/unsloth)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/Discord%20button.png" width="200"/>](https://discord.gg/u54VK8m8tk)
20
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/buy%20me%20a%20coffee%20button.png" width="200"/>](https://ko-fi.com/unsloth)
21
  [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="400"/>](https://github.com/unslothai/unsloth)
22
+
23
+ ```python
24
+ from unsloth import FastLanguageModel
25
+ import torch
26
+ from trl import SFTTrainer
27
+ from transformers import TrainingArguments
28
+ from datasets import load_dataset
29
+ max_seq_length = 2048 # Supports RoPE Scaling interally, so choose any!
30
+ # Get LAION dataset
31
+ url = "https://huggingface.co/datasets/laion/OIG/resolve/main/unified_chip2.jsonl"
32
+ dataset = load_dataset("json", data_files = {"train" : url}, split = "train")
33
+
34
+ # 4bit pre quantized models we support - 4x faster downloading!
35
+ fourbit_models = [
36
+ "unsloth/mistral-7b-bnb-4bit",
37
+ "unsloth/llama-2-7b-bnb-4bit",
38
+ "unsloth/llama-2-13b-bnb-4bit",
39
+ "unsloth/codellama-34b-bnb-4bit",
40
+ "unsloth/tinyllama-bnb-4bit",
41
+ ] # Go to https://huggingface.co/unsloth for more 4-bit models!
42
+
43
+ # Load Llama model
44
+ model, tokenizer = FastLanguageModel.from_pretrained(
45
+ model_name = "unsloth/mistral-7b-bnb-4bit", # Supports Llama, Mistral - replace this!
46
+ max_seq_length = max_seq_length,
47
+ dtype = None,
48
+ load_in_4bit = True,
49
+ )
50
+
51
+ # Do model patching and add fast LoRA weights
52
+ model = FastLanguageModel.get_peft_model(
53
+ model,
54
+ r = 16,
55
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
56
+ "gate_proj", "up_proj", "down_proj",],
57
+ lora_alpha = 16,
58
+ lora_dropout = 0, # Supports any, but = 0 is optimized
59
+ bias = "none", # Supports any, but = "none" is optimized
60
+ use_gradient_checkpointing = True,
61
+ random_state = 3407,
62
+ max_seq_length = max_seq_length,
63
+ use_rslora = False, # We support rank stabilized LoRA
64
+ loftq_config = None, # And LoftQ
65
+ )
66
+
67
+ trainer = SFTTrainer(
68
+ model = model,
69
+ train_dataset = dataset,
70
+ dataset_text_field = "text",
71
+ max_seq_length = max_seq_length,
72
+ tokenizer = tokenizer,
73
+ args = TrainingArguments(
74
+ per_device_train_batch_size = 2,
75
+ gradient_accumulation_steps = 4,
76
+ warmup_steps = 10,
77
+ max_steps = 60,
78
+ fp16 = not torch.cuda.is_bf16_supported(),
79
+ bf16 = torch.cuda.is_bf16_supported(),
80
+ logging_steps = 1,
81
+ output_dir = "outputs",
82
+ optim = "adamw_8bit",
83
+ seed = 3407,
84
+ ),
85
+ )
86
+ trainer.train()
87
+
88
+ # Go to https://github.com/unslothai/unsloth/wiki for advanced tips like
89
+ # (1) Saving to GGUF / merging to 16bit for vLLM
90
+ # (2) Continued training from a saved LoRA adapter
91
+ # (3) Adding an evaluation loop / OOMs
92
+ # (4) Cutomized chat templates
93
+ ```