CodeIsAbstract commited on
Commit
4376ab8
·
verified ·
1 Parent(s): a40dc41

Upload fine-tuned model

Browse files
Files changed (6) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +62 -4
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4e7083f46f247845da972853910d8680ca649ab8c902c99ae0aef8b98f13b94
3
  size 4943274328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c1ab1d72c24bfe28dcd99393d47713be6eac6f12ab34a9cfaffff70d0f94c7
3
  size 4943274328
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d79a6afb2552fb1cf64796402d4e175b61bc5b99be45450ff790ca57d1cdf3c8
3
  size 2510808826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72797521203759b3fa484f53336a7892a7039282c777c88bfe269ff51ac8884f
3
  size 2510808826
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9899ccda7f0d8d9511991180b93aab508ce6e8489de708c88ad1188e7e1d90d6
3
  size 14244
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7ab928fb6fc03d23ed0a52a122112f8e7b9f1b5afe619387db540b707cec3ec
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2499e0399fbf93134f32089f43a54b542db105fd8163905b5ca10492c93f08c
3
  size 988
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b81ef4084acb220d4aa4aaf816f556a4e6f57487225003cd5fc278dd5e90c942
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:502fc0646817f7b28f50f0797fbf78aca9985ff1902e46adf8c295619f5e8837
3
  size 1064
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.191904047976012,
6
  "eval_steps": 25,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -103,6 +103,64 @@
103
  "eval_samples_per_second": 5.521,
104
  "eval_steps_per_second": 1.844,
105
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
  ],
108
  "logging_steps": 11,
@@ -117,12 +175,12 @@
117
  "should_evaluate": false,
118
  "should_log": false,
119
  "should_save": true,
120
- "should_training_stop": false
121
  },
122
  "attributes": {}
123
  }
124
  },
125
- "total_flos": 1.4253959153713152e+17,
126
  "train_batch_size": 3,
127
  "trial_name": null,
128
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.9835082458770614,
6
  "eval_steps": 25,
7
+ "global_step": 166,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
103
  "eval_samples_per_second": 5.521,
104
  "eval_steps_per_second": 1.844,
105
  "step": 100
106
+ },
107
+ {
108
+ "epoch": 1.3118440779610194,
109
+ "grad_norm": 69.37840270996094,
110
+ "learning_rate": 6.266385446673791e-06,
111
+ "loss": 50.3322,
112
+ "step": 110
113
+ },
114
+ {
115
+ "epoch": 1.4437781109445278,
116
+ "grad_norm": 65.35021209716797,
117
+ "learning_rate": 4.319352532688444e-06,
118
+ "loss": 50.1113,
119
+ "step": 121
120
+ },
121
+ {
122
+ "epoch": 1.4917541229385307,
123
+ "eval_loss": 3.19496488571167,
124
+ "eval_runtime": 90.6364,
125
+ "eval_samples_per_second": 5.517,
126
+ "eval_steps_per_second": 1.843,
127
+ "step": 125
128
+ },
129
+ {
130
+ "epoch": 1.575712143928036,
131
+ "grad_norm": 70.10005187988281,
132
+ "learning_rate": 2.6499436440367165e-06,
133
+ "loss": 50.1493,
134
+ "step": 132
135
+ },
136
+ {
137
+ "epoch": 1.707646176911544,
138
+ "grad_norm": 68.84884643554688,
139
+ "learning_rate": 1.339745962155613e-06,
140
+ "loss": 50.1588,
141
+ "step": 143
142
+ },
143
+ {
144
+ "epoch": 1.7916041979010495,
145
+ "eval_loss": 3.176970958709717,
146
+ "eval_runtime": 89.1443,
147
+ "eval_samples_per_second": 5.609,
148
+ "eval_steps_per_second": 1.873,
149
+ "step": 150
150
+ },
151
+ {
152
+ "epoch": 1.8395802098950524,
153
+ "grad_norm": 84.58167266845703,
154
+ "learning_rate": 4.5279133491454406e-07,
155
+ "loss": 49.9598,
156
+ "step": 154
157
+ },
158
+ {
159
+ "epoch": 1.9715142428785608,
160
+ "grad_norm": 62.91596984863281,
161
+ "learning_rate": 3.242691865790071e-08,
162
+ "loss": 49.826,
163
+ "step": 165
164
  }
165
  ],
166
  "logging_steps": 11,
 
175
  "should_evaluate": false,
176
  "should_log": false,
177
  "should_save": true,
178
+ "should_training_stop": true
179
  },
180
  "attributes": {}
181
  }
182
  },
183
+ "total_flos": 2.3724710537723904e+17,
184
  "train_batch_size": 3,
185
  "trial_name": null,
186
  "trial_params": null