gemma2_on_korean_summary

Browse files

Files changed (4) hide show

README.md +52 -153
adapter_config.json +7 -6
adapter_model.safetensors +2 -2
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -12,12 +12,11 @@ model-index:
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/ghost_in_the_lab/Korean-fine-tune-models/runs/fc7yrozn)
 # gemma2_on_korean_summary
 This model is a fine-tuned version of [beomi/gemma-ko-2b](https://huggingface.co/beomi/gemma-ko-2b) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.9709
 ## Model description
@@ -37,167 +36,67 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
-- train_batch_size: 1
-- eval_batch_size: 1
 - seed: 42
 - gradient_accumulation_steps: 5
-- total_train_batch_size: 5
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 50
-- training_steps: 1400
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| 1.6915        | 0.0658 | 10   | 1.6222          |
-| 1.6475        | 0.1316 | 20   | 1.5357          |
-| 1.4535        | 0.1974 | 30   | 1.4325          |
-| 1.414         | 0.2632 | 40   | 1.3366          |
-| 1.3122        | 0.3289 | 50   | 1.2576          |
-| 1.2768        | 0.3947 | 60   | 1.2126          |
-| 1.174         | 0.4605 | 70   | 1.1838          |
-| 1.1816        | 0.5263 | 80   | 1.1575          |
-| 1.1163        | 0.5921 | 90   | 1.1330          |
-| 1.1197        | 0.6579 | 100  | 1.1116          |
-| 1.1635        | 0.7237 | 110  | 1.0954          |
-| 1.0971        | 0.7895 | 120  | 1.0730          |
-| 1.0985        | 0.8553 | 130  | 1.0568          |
-| 1.0725        | 0.9211 | 140  | 1.0414          |
-| 1.0483        | 0.9868 | 150  | 1.0302          |
-| 0.9666        | 1.0526 | 160  | 1.0203          |
-| 0.9754        | 1.1184 | 170  | 1.0096          |
-| 0.9688        | 1.1842 | 180  | 0.9976          |
-| 0.9673        | 1.25   | 190  | 0.9874          |
-| 0.9636        | 1.3158 | 200  | 0.9805          |
-| 0.9482        | 1.3816 | 210  | 0.9744          |
-| 0.9231        | 1.4474 | 220  | 0.9657          |
-| 0.9208        | 1.5132 | 230  | 0.9555          |
-| 0.9321        | 1.5789 | 240  | 0.9488          |
-| 0.9362        | 1.6447 | 250  | 0.9431          |
-| 0.939         | 1.7105 | 260  | 0.9393          |
-| 0.919         | 1.7763 | 270  | 0.9342          |
-| 0.9277        | 1.8421 | 280  | 0.9312          |
-| 0.8955        | 1.9079 | 290  | 0.9263          |
-| 0.8679        | 1.9737 | 300  | 0.9211          |
-| 0.8545        | 2.0395 | 310  | 0.9251          |
-| 0.7897        | 2.1053 | 320  | 0.9200          |
-| 0.7835        | 2.1711 | 330  | 0.9198          |
-| 0.8139        | 2.2368 | 340  | 0.9105          |
-| 0.7861        | 2.3026 | 350  | 0.9089          |
-| 0.7752        | 2.3684 | 360  | 0.9075          |
-| 0.7919        | 2.4342 | 370  | 0.8985          |
-| 0.7433        | 2.5    | 380  | 0.9038          |
-| 0.7905        | 2.5658 | 390  | 0.8955          |
-| 0.7763        | 2.6316 | 400  | 0.8930          |
-| 0.792         | 2.6974 | 410  | 0.8869          |
-| 0.7854        | 2.7632 | 420  | 0.8834          |
-| 0.7978        | 2.8289 | 430  | 0.8770          |
-| 0.7864        | 2.8947 | 440  | 0.8780          |
-| 0.8007        | 2.9605 | 450  | 0.8730          |
-| 0.7686        | 3.0263 | 460  | 0.8760          |
-| 0.6573        | 3.0921 | 470  | 0.8888          |
-| 0.7183        | 3.1579 | 480  | 0.8833          |
-| 0.6644        | 3.2237 | 490  | 0.8864          |
-| 0.6648        | 3.2895 | 500  | 0.8834          |
-| 0.6763        | 3.3553 | 510  | 0.8814          |
-| 0.6844        | 3.4211 | 520  | 0.8824          |
-| 0.6796        | 3.4868 | 530  | 0.8769          |
-| 0.6748        | 3.5526 | 540  | 0.8708          |
-| 0.6899        | 3.6184 | 550  | 0.8688          |
-| 0.6866        | 3.6842 | 560  | 0.8747          |
-| 0.7           | 3.75   | 570  | 0.8645          |
-| 0.6896        | 3.8158 | 580  | 0.8703          |
-| 0.7176        | 3.8816 | 590  | 0.8628          |
-| 0.6517        | 3.9474 | 600  | 0.8655          |
-| 0.6941        | 4.0132 | 610  | 0.8641          |
-| 0.5873        | 4.0789 | 620  | 0.8889          |
-| 0.6208        | 4.1447 | 630  | 0.8772          |
-| 0.6067        | 4.2105 | 640  | 0.8844          |
-| 0.5892        | 4.2763 | 650  | 0.8798          |
-| 0.6059        | 4.3421 | 660  | 0.8830          |
-| 0.6129        | 4.4079 | 670  | 0.8810          |
-| 0.6104        | 4.4737 | 680  | 0.8799          |
-| 0.5846        | 4.5395 | 690  | 0.8763          |
-| 0.6141        | 4.6053 | 700  | 0.8807          |
-| 0.6467        | 4.6711 | 710  | 0.8766          |
-| 0.634         | 4.7368 | 720  | 0.8774          |
-| 0.5976        | 4.8026 | 730  | 0.8680          |
-| 0.5638        | 4.8684 | 740  | 0.8742          |
-| 0.6067        | 4.9342 | 750  | 0.8733          |
-| 0.6219        | 5.0    | 760  | 0.8644          |
-| 0.5169        | 5.0658 | 770  | 0.8969          |
-| 0.5726        | 5.1316 | 780  | 0.9012          |
-| 0.5483        | 5.1974 | 790  | 0.9031          |
-| 0.5197        | 5.2632 | 800  | 0.8976          |
-| 0.5479        | 5.3289 | 810  | 0.8963          |
-| 0.5631        | 5.3947 | 820  | 0.8962          |
-| 0.5687        | 5.4605 | 830  | 0.9009          |
-| 0.4825        | 5.5263 | 840  | 0.8982          |
-| 0.5305        | 5.5921 | 850  | 0.8937          |
-| 0.5743        | 5.6579 | 860  | 0.8945          |
-| 0.5293        | 5.7237 | 870  | 0.8951          |
-| 0.5169        | 5.7895 | 880  | 0.9034          |
-| 0.5585        | 5.8553 | 890  | 0.8894          |
-| 0.5373        | 5.9211 | 900  | 0.8936          |
-| 0.5524        | 5.9868 | 910  | 0.8876          |
-| 0.4815        | 6.0526 | 920  | 0.9187          |
-| 0.47          | 6.1184 | 930  | 0.9200          |
-| 0.4694        | 6.1842 | 940  | 0.9204          |
-| 0.5035        | 6.25   | 950  | 0.9246          |
-| 0.4852        | 6.3158 | 960  | 0.9232          |
-| 0.5266        | 6.3816 | 970  | 0.9257          |
-| 0.4907        | 6.4474 | 980  | 0.9232          |
-| 0.5139        | 6.5132 | 990  | 0.9135          |
-| 0.464         | 6.5789 | 1000 | 0.9207          |
-| 0.5172        | 6.6447 | 1010 | 0.9128          |
-| 0.4948        | 6.7105 | 1020 | 0.9244          |
-| 0.4606        | 6.7763 | 1030 | 0.9171          |
-| 0.491         | 6.8421 | 1040 | 0.9187          |
-| 0.4641        | 6.9079 | 1050 | 0.9157          |
-| 0.4684        | 6.9737 | 1060 | 0.9115          |
-| 0.4625        | 7.0395 | 1070 | 0.9299          |
-| 0.4324        | 7.1053 | 1080 | 0.9454          |
-| 0.4143        | 7.1711 | 1090 | 0.9446          |
-| 0.4357        | 7.2368 | 1100 | 0.9447          |
-| 0.4471        | 7.3026 | 1110 | 0.9459          |
-| 0.4691        | 7.3684 | 1120 | 0.9441          |
-| 0.4556        | 7.4342 | 1130 | 0.9471          |
-| 0.4296        | 7.5    | 1140 | 0.9406          |
-| 0.4323        | 7.5658 | 1150 | 0.9439          |
-| 0.4243        | 7.6316 | 1160 | 0.9430          |
-| 0.4583        | 7.6974 | 1170 | 0.9435          |
-| 0.4346        | 7.7632 | 1180 | 0.9405          |
-| 0.4747        | 7.8289 | 1190 | 0.9406          |
-| 0.4443        | 7.8947 | 1200 | 0.9405          |
-| 0.4418        | 7.9605 | 1210 | 0.9424          |
-| 0.3878        | 8.0263 | 1220 | 0.9464          |
-| 0.4014        | 8.0921 | 1230 | 0.9721          |
-| 0.4183        | 8.1579 | 1240 | 0.9647          |
-| 0.4103        | 8.2237 | 1250 | 0.9672          |
-| 0.3951        | 8.2895 | 1260 | 0.9702          |
-| 0.4488        | 8.3553 | 1270 | 0.9648          |
-| 0.41          | 8.4211 | 1280 | 0.9653          |
-| 0.3726        | 8.4868 | 1290 | 0.9668          |
-| 0.395         | 8.5526 | 1300 | 0.9644          |
-| 0.4141        | 8.6184 | 1310 | 0.9637          |
-| 0.3774        | 8.6842 | 1320 | 0.9659          |
-| 0.4003        | 8.75   | 1330 | 0.9653          |
-| 0.3841        | 8.8158 | 1340 | 0.9671          |
-| 0.4202        | 8.8816 | 1350 | 0.9688          |
-| 0.4226        | 8.9474 | 1360 | 0.9684          |
-| 0.3914        | 9.0132 | 1370 | 0.9673          |
-| 0.4105        | 9.0789 | 1380 | 0.9687          |
-| 0.4021        | 9.1447 | 1390 | 0.9702          |
-| 0.4008        | 9.2105 | 1400 | 0.9709          |
 ### Framework versions
-- PEFT 0.7.1
-- Transformers 4.41.0.dev0
-- Pytorch 2.1.2
-- Datasets 2.15.0
-- Tokenizers 0.19.1

 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
 # gemma2_on_korean_summary
 This model is a fine-tuned version of [beomi/gemma-ko-2b](https://huggingface.co/beomi/gemma-ko-2b) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.9622
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 5e-05
+- train_batch_size: 2
+- eval_batch_size: 2
 - seed: 42
 - gradient_accumulation_steps: 5
+- total_train_batch_size: 10
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 50
+- training_steps: 800
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 1.6606        | 0.26  | 20   | 1.5454          |
+| 1.4381        | 0.53  | 40   | 1.3247          |
+| 1.2548        | 0.79  | 60   | 1.1921          |
+| 1.1574        | 1.05  | 80   | 1.1227          |
+| 1.0968        | 1.32  | 100  | 1.0727          |
+| 1.0485        | 1.58  | 120  | 1.0316          |
+| 1.0258        | 1.84  | 140  | 1.0019          |
+| 0.9582        | 2.11  | 160  | 0.9785          |
+| 0.906         | 2.37  | 180  | 0.9575          |
+| 0.8837        | 2.63  | 200  | 0.9429          |
+| 0.8763        | 2.89  | 220  | 0.9247          |
+| 0.8295        | 3.16  | 240  | 0.9213          |
+| 0.7799        | 3.42  | 260  | 0.9122          |
+| 0.7742        | 3.68  | 280  | 0.8992          |
+| 0.7708        | 3.95  | 300  | 0.8918          |
+| 0.7196        | 4.21  | 320  | 0.8952          |
+| 0.6908        | 4.47  | 340  | 0.8917          |
+| 0.6977        | 4.74  | 360  | 0.8841          |
+| 0.6789        | 5.0   | 380  | 0.8764          |
+| 0.6198        | 5.26  | 400  | 0.9003          |
+| 0.6203        | 5.53  | 420  | 0.9030          |
+| 0.6169        | 5.79  | 440  | 0.8913          |
+| 0.6111        | 6.05  | 460  | 0.8935          |
+| 0.564         | 6.32  | 480  | 0.9096          |
+| 0.5819        | 6.58  | 500  | 0.9027          |
+| 0.5673        | 6.84  | 520  | 0.8997          |
+| 0.5382        | 7.11  | 540  | 0.9181          |
+| 0.5228        | 7.37  | 560  | 0.9197          |
+| 0.5197        | 7.63  | 580  | 0.9254          |
+| 0.5319        | 7.89  | 600  | 0.9158          |
+| 0.4954        | 8.16  | 620  | 0.9394          |
+| 0.4954        | 8.42  | 640  | 0.9345          |
+| 0.4719        | 8.68  | 660  | 0.9388          |
+| 0.4866        | 8.95  | 680  | 0.9390          |
+| 0.4739        | 9.21  | 700  | 0.9528          |
+| 0.4591        | 9.47  | 720  | 0.9534          |
+| 0.4478        | 9.74  | 740  | 0.9542          |
+| 0.4547        | 10.0  | 760  | 0.9539          |
+| 0.4379        | 10.26 | 780  | 0.9617          |
+| 0.4348        | 10.53 | 800  | 0.9622          |
 ### Framework versions
+- PEFT 0.8.2
+- Transformers 4.38.2
+- Pytorch 2.1.2+cu121
+- Datasets 2.17.0
+- Tokenizers 0.15.0

adapter_config.json CHANGED Viewed

@@ -15,17 +15,18 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
     "q_proj",
-    "o_proj",
-    "v_proj",
     "gate_proj",
-    "k_proj",
-    "down_proj"
   ],
-  "task_type": "CAUSAL_LM"
 }

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
+    "k_proj",
     "q_proj",
     "gate_proj",
+    "down_proj",
+    "v_proj",
+    "o_proj"
   ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2314cf6b52c9b7c617802a576429a203fc1ad4de245b8c6d967c01eaa2b648b3
-size 78480072

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9ce050e4e4496519431daaf68b3ed0aebfbb4002facbbaf7ae1da64ad590a53
+size 156926880

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1c7323d160e89f2a6ac6956b9d72d538e7f5f682f87c23706f644fa0f8fd29d
-size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8b0f18188621a32025e61f3adc60296fd9fe2c790c228b5889a71cfafbc7876
+size 4920