ghost613 commited on
Commit
c232172
·
verified ·
1 Parent(s): 53c5949

Upload 4 files

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [beomi/gemma-ko-2b](https://huggingface.co/beomi/gemma-ko-2b) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.2933
20
 
21
  ## Model description
22
 
@@ -35,7 +35,7 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 5e-06
39
  - train_batch_size: 2
40
  - eval_batch_size: 2
41
  - seed: 42
@@ -51,44 +51,44 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
- | 1.5845 | 0.26 | 20 | 1.5589 |
55
- | 1.4251 | 0.53 | 40 | 1.3136 |
56
- | 1.161 | 0.79 | 60 | 0.9521 |
57
- | 0.8854 | 1.05 | 80 | 0.7201 |
58
- | 0.6741 | 1.32 | 100 | 0.5536 |
59
- | 0.5857 | 1.58 | 120 | 0.4771 |
60
- | 0.5005 | 1.84 | 140 | 0.4275 |
61
- | 0.4307 | 2.11 | 160 | 0.3931 |
62
- | 0.4336 | 2.37 | 180 | 0.3706 |
63
- | 0.4179 | 2.63 | 200 | 0.3568 |
64
- | 0.4112 | 2.89 | 220 | 0.3459 |
65
- | 0.3462 | 3.16 | 240 | 0.3372 |
66
- | 0.3858 | 3.42 | 260 | 0.3322 |
67
- | 0.36 | 3.68 | 280 | 0.3248 |
68
- | 0.3117 | 3.95 | 300 | 0.3203 |
69
- | 0.3458 | 4.21 | 320 | 0.3158 |
70
- | 0.3454 | 4.47 | 340 | 0.3128 |
71
- | 0.3515 | 4.74 | 360 | 0.3147 |
72
- | 0.3313 | 5.0 | 380 | 0.3076 |
73
- | 0.3199 | 5.26 | 400 | 0.3067 |
74
- | 0.3006 | 5.53 | 420 | 0.3048 |
75
- | 0.3009 | 5.79 | 440 | 0.3017 |
76
- | 0.3285 | 6.05 | 460 | 0.3008 |
77
- | 0.3195 | 6.32 | 480 | 0.2995 |
78
- | 0.3543 | 6.58 | 500 | 0.3049 |
79
- | 0.3084 | 6.84 | 520 | 0.3037 |
80
- | 0.2809 | 7.11 | 540 | 0.2970 |
81
- | 0.28 | 7.37 | 560 | 0.2954 |
82
- | 0.3184 | 7.63 | 580 | 0.3062 |
83
- | 0.3017 | 7.89 | 600 | 0.2963 |
84
- | 0.3137 | 8.16 | 620 | 0.2943 |
85
- | 0.3046 | 8.42 | 640 | 0.2945 |
86
- | 0.2915 | 8.68 | 660 | 0.2947 |
87
- | 0.2808 | 8.95 | 680 | 0.2935 |
88
- | 0.2949 | 9.21 | 700 | 0.2926 |
89
- | 0.3169 | 9.47 | 720 | 0.2938 |
90
- | 0.2865 | 9.74 | 740 | 0.2939 |
91
- | 0.2978 | 10.0 | 760 | 0.2933 |
92
 
93
 
94
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [beomi/gemma-ko-2b](https://huggingface.co/beomi/gemma-ko-2b) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.4381
20
 
21
  ## Model description
22
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - learning_rate: 5e-05
39
  - train_batch_size: 2
40
  - eval_batch_size: 2
41
  - seed: 42
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | 1.3751 | 0.26 | 20 | 0.9839 |
55
+ | 0.7078 | 0.53 | 40 | 0.5444 |
56
+ | 0.4456 | 0.79 | 60 | 0.3339 |
57
+ | 0.3351 | 1.05 | 80 | 0.3121 |
58
+ | 0.3153 | 1.32 | 100 | 0.2869 |
59
+ | 0.319 | 1.58 | 120 | 0.2829 |
60
+ | 0.2942 | 1.84 | 140 | 0.2747 |
61
+ | 0.2376 | 2.11 | 160 | 0.2624 |
62
+ | 0.222 | 2.37 | 180 | 0.2828 |
63
+ | 0.2228 | 2.63 | 200 | 0.2766 |
64
+ | 0.2474 | 2.89 | 220 | 0.2596 |
65
+ | 0.1617 | 3.16 | 240 | 0.2756 |
66
+ | 0.1555 | 3.42 | 260 | 0.2814 |
67
+ | 0.1426 | 3.68 | 280 | 0.2802 |
68
+ | 0.1313 | 3.95 | 300 | 0.2846 |
69
+ | 0.1086 | 4.21 | 320 | 0.3006 |
70
+ | 0.0975 | 4.47 | 340 | 0.3048 |
71
+ | 0.103 | 4.74 | 360 | 0.3009 |
72
+ | 0.0891 | 5.0 | 380 | 0.2962 |
73
+ | 0.0627 | 5.26 | 400 | 0.3190 |
74
+ | 0.0487 | 5.53 | 420 | 0.3338 |
75
+ | 0.0609 | 5.79 | 440 | 0.3345 |
76
+ | 0.0559 | 6.05 | 460 | 0.3346 |
77
+ | 0.0326 | 6.32 | 480 | 0.3618 |
78
+ | 0.0363 | 6.58 | 500 | 0.3490 |
79
+ | 0.0334 | 6.84 | 520 | 0.3548 |
80
+ | 0.0277 | 7.11 | 540 | 0.3791 |
81
+ | 0.0301 | 7.37 | 560 | 0.3836 |
82
+ | 0.0188 | 7.63 | 580 | 0.3937 |
83
+ | 0.0203 | 7.89 | 600 | 0.3914 |
84
+ | 0.0173 | 8.16 | 620 | 0.4117 |
85
+ | 0.0141 | 8.42 | 640 | 0.4143 |
86
+ | 0.0131 | 8.68 | 660 | 0.4305 |
87
+ | 0.0141 | 8.95 | 680 | 0.4252 |
88
+ | 0.0099 | 9.21 | 700 | 0.4335 |
89
+ | 0.0096 | 9.47 | 720 | 0.4351 |
90
+ | 0.008 | 9.74 | 740 | 0.4368 |
91
+ | 0.008 | 10.0 | 760 | 0.4381 |
92
 
93
 
94
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "gate_proj",
 
23
  "q_proj",
24
- "k_proj",
25
- "v_proj",
26
  "up_proj",
27
- "o_proj",
28
- "down_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "k_proj",
23
+ "o_proj",
24
  "gate_proj",
25
+ "down_proj",
26
  "q_proj",
 
 
27
  "up_proj",
28
+ "v_proj"
 
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:155066eb381721c11813992beaa4864ea212fe32c684812ed9a7ed746a102140
3
  size 78480072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0718e32e2d88816212e093a355ef3f46677ae2f2bcc18e637450c4d541f5d8a1
3
  size 78480072
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f51af84f9fcf0ff68705532a04f133a2401ec8d49d06a67f4acfcf64b8b48866
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90bc184d5a05142346f40469aaefe1ed504f50c16c38b1616315d56d4992b333
3
  size 4920