lleticiasilvaa commited on
Commit
e34a259
·
verified ·
1 Parent(s): 8dd1717

Training in progress, step 2142, checkpoint

Browse files
checkpoint-2142/adapter_config.json CHANGED
@@ -26,13 +26,13 @@
26
  "rank_pattern": {},
27
  "revision": null,
28
  "target_modules": [
29
- "o_proj",
30
- "up_proj",
31
- "v_proj",
32
  "q_proj",
 
 
33
  "down_proj",
34
- "k_proj",
35
- "gate_proj"
36
  ],
37
  "task_type": null,
38
  "use_dora": false,
 
26
  "rank_pattern": {},
27
  "revision": null,
28
  "target_modules": [
29
+ "k_proj",
30
+ "gate_proj",
 
31
  "q_proj",
32
+ "v_proj",
33
+ "o_proj",
34
  "down_proj",
35
+ "up_proj"
 
36
  ],
37
  "task_type": null,
38
  "use_dora": false,
checkpoint-2142/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afab048ed22ce09c39a406edda32f9725be4598380dd59093175d9950013b465
3
  size 400616360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1c8404dc5f869f364ec72c5769ca22d6cea4956198f9cd005146a7cba699327
3
  size 400616360
checkpoint-2142/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81442eb537fb2b543f0ab15319d8a896c09dcbb735ad0869311847d894140bef
3
  size 205100562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8fa25d0df9a77a82d1be0116ca7579231d446e0ede044d6749b460a2b448e28
3
  size 205100562
checkpoint-2142/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d44197229866a7f9ef28668aec6f21dc0ea96e7d982c0b4c30b899a77b3baf20
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58593014ffe5074887df6ba5c4ff4972f82be99bdef3518c2a618376dc4cf03f
3
  size 14308
checkpoint-2142/trainer_state.json CHANGED
@@ -70,62 +70,62 @@
70
  },
71
  {
72
  "epoch": 1.167114015637764,
73
- "grad_norm": 2.512282609939575,
74
  "learning_rate": 3.915853581228413e-05,
75
- "loss": 0.614,
76
  "step": 1250
77
  },
78
  {
79
  "epoch": 1.167114015637764,
80
- "eval_loss": 0.1132238432765007,
81
- "eval_runtime": 22.1933,
82
- "eval_samples_per_second": 3.92,
83
- "eval_steps_per_second": 3.92,
84
  "step": 1250
85
  },
86
  {
87
  "epoch": 1.4005134788189988,
88
- "grad_norm": 1.3006846904754639,
89
  "learning_rate": 2.1903963223439395e-05,
90
- "loss": 0.6601,
91
  "step": 1500
92
  },
93
  {
94
  "epoch": 1.4005134788189988,
95
- "eval_loss": 0.10768163949251175,
96
- "eval_runtime": 22.1842,
97
- "eval_samples_per_second": 3.922,
98
- "eval_steps_per_second": 3.922,
99
  "step": 1500
100
  },
101
  {
102
  "epoch": 1.6339129420002334,
103
- "grad_norm": 1.7093279361724854,
104
  "learning_rate": 8.619209196560924e-06,
105
- "loss": 0.6404,
106
  "step": 1750
107
  },
108
  {
109
  "epoch": 1.6339129420002334,
110
- "eval_loss": 0.10490020364522934,
111
- "eval_runtime": 22.1702,
112
- "eval_samples_per_second": 3.924,
113
- "eval_steps_per_second": 3.924,
114
  "step": 1750
115
  },
116
  {
117
  "epoch": 1.867312405181468,
118
- "grad_norm": 0.8398889303207397,
119
  "learning_rate": 1.181337872277094e-06,
120
- "loss": 0.6201,
121
  "step": 2000
122
  },
123
  {
124
  "epoch": 1.867312405181468,
125
- "eval_loss": 0.10319412499666214,
126
- "eval_runtime": 22.1391,
127
- "eval_samples_per_second": 3.93,
128
- "eval_steps_per_second": 3.93,
129
  "step": 2000
130
  }
131
  ],
@@ -146,7 +146,7 @@
146
  "attributes": {}
147
  }
148
  },
149
- "total_flos": 1.73201756854954e+17,
150
  "train_batch_size": 1,
151
  "trial_name": null,
152
  "trial_params": null
 
70
  },
71
  {
72
  "epoch": 1.167114015637764,
73
+ "grad_norm": 2.2942659854888916,
74
  "learning_rate": 3.915853581228413e-05,
75
+ "loss": 0.7993,
76
  "step": 1250
77
  },
78
  {
79
  "epoch": 1.167114015637764,
80
+ "eval_loss": 0.1208883598446846,
81
+ "eval_runtime": 21.9734,
82
+ "eval_samples_per_second": 3.959,
83
+ "eval_steps_per_second": 3.959,
84
  "step": 1250
85
  },
86
  {
87
  "epoch": 1.4005134788189988,
88
+ "grad_norm": 1.147830843925476,
89
  "learning_rate": 2.1903963223439395e-05,
90
+ "loss": 0.7032,
91
  "step": 1500
92
  },
93
  {
94
  "epoch": 1.4005134788189988,
95
+ "eval_loss": 0.10965924710035324,
96
+ "eval_runtime": 22.0058,
97
+ "eval_samples_per_second": 3.954,
98
+ "eval_steps_per_second": 3.954,
99
  "step": 1500
100
  },
101
  {
102
  "epoch": 1.6339129420002334,
103
+ "grad_norm": 2.2439823150634766,
104
  "learning_rate": 8.619209196560924e-06,
105
+ "loss": 0.6596,
106
  "step": 1750
107
  },
108
  {
109
  "epoch": 1.6339129420002334,
110
+ "eval_loss": 0.10602504760026932,
111
+ "eval_runtime": 22.0084,
112
+ "eval_samples_per_second": 3.953,
113
+ "eval_steps_per_second": 3.953,
114
  "step": 1750
115
  },
116
  {
117
  "epoch": 1.867312405181468,
118
+ "grad_norm": 1.053748369216919,
119
  "learning_rate": 1.181337872277094e-06,
120
+ "loss": 0.6323,
121
  "step": 2000
122
  },
123
  {
124
  "epoch": 1.867312405181468,
125
+ "eval_loss": 0.10403568297624588,
126
+ "eval_runtime": 22.0253,
127
+ "eval_samples_per_second": 3.95,
128
+ "eval_steps_per_second": 3.95,
129
  "step": 2000
130
  }
131
  ],
 
146
  "attributes": {}
147
  }
148
  },
149
+ "total_flos": 1.7185280711396352e+17,
150
  "train_batch_size": 1,
151
  "trial_name": null,
152
  "trial_params": null
checkpoint-2142/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37d85e10062490083f3df78142b22b936ee4c10ad7bb1c35d68c6a9743be03a9
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:944c7c22023831a73ee4b0a66805723ecef65f25064cb419b24a8d84b3daee22
3
  size 5560