gemma2_on_korean_summary

Browse files

Files changed (4) hide show

README.md +203 -0
adapter_config.json +31 -0
adapter_model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,203 @@

+---
+license: other
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: beomi/gemma-ko-2b
+model-index:
+- name: gemma2_on_korean_summary
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/ghost_in_the_lab/Korean-fine-tune-models/runs/fc7yrozn)
+# gemma2_on_korean_summary
+This model is a fine-tuned version of [beomi/gemma-ko-2b](https://huggingface.co/beomi/gemma-ko-2b) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.9709
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- gradient_accumulation_steps: 5
+- total_train_batch_size: 5
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 50
+- training_steps: 1400
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 1.6915        | 0.0658 | 10   | 1.6222          |
+| 1.6475        | 0.1316 | 20   | 1.5357          |
+| 1.4535        | 0.1974 | 30   | 1.4325          |
+| 1.414         | 0.2632 | 40   | 1.3366          |
+| 1.3122        | 0.3289 | 50   | 1.2576          |
+| 1.2768        | 0.3947 | 60   | 1.2126          |
+| 1.174         | 0.4605 | 70   | 1.1838          |
+| 1.1816        | 0.5263 | 80   | 1.1575          |
+| 1.1163        | 0.5921 | 90   | 1.1330          |
+| 1.1197        | 0.6579 | 100  | 1.1116          |
+| 1.1635        | 0.7237 | 110  | 1.0954          |
+| 1.0971        | 0.7895 | 120  | 1.0730          |
+| 1.0985        | 0.8553 | 130  | 1.0568          |
+| 1.0725        | 0.9211 | 140  | 1.0414          |
+| 1.0483        | 0.9868 | 150  | 1.0302          |
+| 0.9666        | 1.0526 | 160  | 1.0203          |
+| 0.9754        | 1.1184 | 170  | 1.0096          |
+| 0.9688        | 1.1842 | 180  | 0.9976          |
+| 0.9673        | 1.25   | 190  | 0.9874          |
+| 0.9636        | 1.3158 | 200  | 0.9805          |
+| 0.9482        | 1.3816 | 210  | 0.9744          |
+| 0.9231        | 1.4474 | 220  | 0.9657          |
+| 0.9208        | 1.5132 | 230  | 0.9555          |
+| 0.9321        | 1.5789 | 240  | 0.9488          |
+| 0.9362        | 1.6447 | 250  | 0.9431          |
+| 0.939         | 1.7105 | 260  | 0.9393          |
+| 0.919         | 1.7763 | 270  | 0.9342          |
+| 0.9277        | 1.8421 | 280  | 0.9312          |
+| 0.8955        | 1.9079 | 290  | 0.9263          |
+| 0.8679        | 1.9737 | 300  | 0.9211          |
+| 0.8545        | 2.0395 | 310  | 0.9251          |
+| 0.7897        | 2.1053 | 320  | 0.9200          |
+| 0.7835        | 2.1711 | 330  | 0.9198          |
+| 0.8139        | 2.2368 | 340  | 0.9105          |
+| 0.7861        | 2.3026 | 350  | 0.9089          |
+| 0.7752        | 2.3684 | 360  | 0.9075          |
+| 0.7919        | 2.4342 | 370  | 0.8985          |
+| 0.7433        | 2.5    | 380  | 0.9038          |
+| 0.7905        | 2.5658 | 390  | 0.8955          |
+| 0.7763        | 2.6316 | 400  | 0.8930          |
+| 0.792         | 2.6974 | 410  | 0.8869          |
+| 0.7854        | 2.7632 | 420  | 0.8834          |
+| 0.7978        | 2.8289 | 430  | 0.8770          |
+| 0.7864        | 2.8947 | 440  | 0.8780          |
+| 0.8007        | 2.9605 | 450  | 0.8730          |
+| 0.7686        | 3.0263 | 460  | 0.8760          |
+| 0.6573        | 3.0921 | 470  | 0.8888          |
+| 0.7183        | 3.1579 | 480  | 0.8833          |
+| 0.6644        | 3.2237 | 490  | 0.8864          |
+| 0.6648        | 3.2895 | 500  | 0.8834          |
+| 0.6763        | 3.3553 | 510  | 0.8814          |
+| 0.6844        | 3.4211 | 520  | 0.8824          |
+| 0.6796        | 3.4868 | 530  | 0.8769          |
+| 0.6748        | 3.5526 | 540  | 0.8708          |
+| 0.6899        | 3.6184 | 550  | 0.8688          |
+| 0.6866        | 3.6842 | 560  | 0.8747          |
+| 0.7           | 3.75   | 570  | 0.8645          |
+| 0.6896        | 3.8158 | 580  | 0.8703          |
+| 0.7176        | 3.8816 | 590  | 0.8628          |
+| 0.6517        | 3.9474 | 600  | 0.8655          |
+| 0.6941        | 4.0132 | 610  | 0.8641          |
+| 0.5873        | 4.0789 | 620  | 0.8889          |
+| 0.6208        | 4.1447 | 630  | 0.8772          |
+| 0.6067        | 4.2105 | 640  | 0.8844          |
+| 0.5892        | 4.2763 | 650  | 0.8798          |
+| 0.6059        | 4.3421 | 660  | 0.8830          |
+| 0.6129        | 4.4079 | 670  | 0.8810          |
+| 0.6104        | 4.4737 | 680  | 0.8799          |
+| 0.5846        | 4.5395 | 690  | 0.8763          |
+| 0.6141        | 4.6053 | 700  | 0.8807          |
+| 0.6467        | 4.6711 | 710  | 0.8766          |
+| 0.634         | 4.7368 | 720  | 0.8774          |
+| 0.5976        | 4.8026 | 730  | 0.8680          |
+| 0.5638        | 4.8684 | 740  | 0.8742          |
+| 0.6067        | 4.9342 | 750  | 0.8733          |
+| 0.6219        | 5.0    | 760  | 0.8644          |
+| 0.5169        | 5.0658 | 770  | 0.8969          |
+| 0.5726        | 5.1316 | 780  | 0.9012          |
+| 0.5483        | 5.1974 | 790  | 0.9031          |
+| 0.5197        | 5.2632 | 800  | 0.8976          |
+| 0.5479        | 5.3289 | 810  | 0.8963          |
+| 0.5631        | 5.3947 | 820  | 0.8962          |
+| 0.5687        | 5.4605 | 830  | 0.9009          |
+| 0.4825        | 5.5263 | 840  | 0.8982          |
+| 0.5305        | 5.5921 | 850  | 0.8937          |
+| 0.5743        | 5.6579 | 860  | 0.8945          |
+| 0.5293        | 5.7237 | 870  | 0.8951          |
+| 0.5169        | 5.7895 | 880  | 0.9034          |
+| 0.5585        | 5.8553 | 890  | 0.8894          |
+| 0.5373        | 5.9211 | 900  | 0.8936          |
+| 0.5524        | 5.9868 | 910  | 0.8876          |
+| 0.4815        | 6.0526 | 920  | 0.9187          |
+| 0.47          | 6.1184 | 930  | 0.9200          |
+| 0.4694        | 6.1842 | 940  | 0.9204          |
+| 0.5035        | 6.25   | 950  | 0.9246          |
+| 0.4852        | 6.3158 | 960  | 0.9232          |
+| 0.5266        | 6.3816 | 970  | 0.9257          |
+| 0.4907        | 6.4474 | 980  | 0.9232          |
+| 0.5139        | 6.5132 | 990  | 0.9135          |
+| 0.464         | 6.5789 | 1000 | 0.9207          |
+| 0.5172        | 6.6447 | 1010 | 0.9128          |
+| 0.4948        | 6.7105 | 1020 | 0.9244          |
+| 0.4606        | 6.7763 | 1030 | 0.9171          |
+| 0.491         | 6.8421 | 1040 | 0.9187          |
+| 0.4641        | 6.9079 | 1050 | 0.9157          |
+| 0.4684        | 6.9737 | 1060 | 0.9115          |
+| 0.4625        | 7.0395 | 1070 | 0.9299          |
+| 0.4324        | 7.1053 | 1080 | 0.9454          |
+| 0.4143        | 7.1711 | 1090 | 0.9446          |
+| 0.4357        | 7.2368 | 1100 | 0.9447          |
+| 0.4471        | 7.3026 | 1110 | 0.9459          |
+| 0.4691        | 7.3684 | 1120 | 0.9441          |
+| 0.4556        | 7.4342 | 1130 | 0.9471          |
+| 0.4296        | 7.5    | 1140 | 0.9406          |
+| 0.4323        | 7.5658 | 1150 | 0.9439          |
+| 0.4243        | 7.6316 | 1160 | 0.9430          |
+| 0.4583        | 7.6974 | 1170 | 0.9435          |
+| 0.4346        | 7.7632 | 1180 | 0.9405          |
+| 0.4747        | 7.8289 | 1190 | 0.9406          |
+| 0.4443        | 7.8947 | 1200 | 0.9405          |
+| 0.4418        | 7.9605 | 1210 | 0.9424          |
+| 0.3878        | 8.0263 | 1220 | 0.9464          |
+| 0.4014        | 8.0921 | 1230 | 0.9721          |
+| 0.4183        | 8.1579 | 1240 | 0.9647          |
+| 0.4103        | 8.2237 | 1250 | 0.9672          |
+| 0.3951        | 8.2895 | 1260 | 0.9702          |
+| 0.4488        | 8.3553 | 1270 | 0.9648          |
+| 0.41          | 8.4211 | 1280 | 0.9653          |
+| 0.3726        | 8.4868 | 1290 | 0.9668          |
+| 0.395         | 8.5526 | 1300 | 0.9644          |
+| 0.4141        | 8.6184 | 1310 | 0.9637          |
+| 0.3774        | 8.6842 | 1320 | 0.9659          |
+| 0.4003        | 8.75   | 1330 | 0.9653          |
+| 0.3841        | 8.8158 | 1340 | 0.9671          |
+| 0.4202        | 8.8816 | 1350 | 0.9688          |
+| 0.4226        | 8.9474 | 1360 | 0.9684          |
+| 0.3914        | 9.0132 | 1370 | 0.9673          |
+| 0.4105        | 9.0789 | 1380 | 0.9687          |
+| 0.4021        | 9.1447 | 1390 | 0.9702          |
+| 0.4008        | 9.2105 | 1400 | 0.9709          |
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.41.0.dev0
+- Pytorch 2.1.2
+- Datasets 2.15.0
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "beomi/gemma-ko-2b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "gate_proj",
+    "k_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2314cf6b52c9b7c617802a576429a203fc1ad4de245b8c6d967c01eaa2b648b3
+size 78480072

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1c7323d160e89f2a6ac6956b9d72d538e7f5f682f87c23706f644fa0f8fd29d
+size 5112