rakhman-llm commited on
Commit
b09e26f
·
verified ·
1 Parent(s): bd75565

Training in progress, step 30500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8367a24eb06a34cbf04ec05d49423cacadb43f1fbc7c9b46cd3bc8f7d202aeea
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b4d2a21ab34ea5aae19f2c79c17d535d0eceb21e9bd3fbe132c0c6743fbd74
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69cfb99a1d47731eb0bd0f0aa8008f24af3b21461df86591e07ee79decd04b36
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60601e0df9ec0d20e49569fddd22a460360511cad0bf70986d2e0dfe2c6fc5ac
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59297a887ee86d9ae2f8461a820add9f102e062ed6b208e7b0f014f86659cc1d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbfe40bf67dcc41ade516e25138c73f55e1a79032c0542ad6be33784e59a5c69
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ece32524ff12cd47152c7073cf4a24f61bf2d3f6e989a1d942aaad706c29e5e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6148692faa22908e20b6fbcdb986515b0d89f409366c21a08301b9100cfff4f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2115,6 +2115,49 @@
2115
  "learning_rate": 6.671555555555556e-06,
2116
  "loss": 0.0569,
2117
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2118
  }
2119
  ],
2120
  "logging_steps": 100,
@@ -2134,7 +2177,7 @@
2134
  "attributes": {}
2135
  }
2136
  },
2137
- "total_flos": 7.30749468672e+16,
2138
  "train_batch_size": 4,
2139
  "trial_name": null,
2140
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.033333333333333,
5
  "eval_steps": 500,
6
+ "global_step": 30500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2115
  "learning_rate": 6.671555555555556e-06,
2116
  "loss": 0.0569,
2117
  "step": 30000
2118
+ },
2119
+ {
2120
+ "epoch": 2.0,
2121
+ "eval_loss": 0.08315848559141159,
2122
+ "eval_runtime": 120.1805,
2123
+ "eval_samples_per_second": 16.642,
2124
+ "eval_steps_per_second": 4.16,
2125
+ "step": 30000
2126
+ },
2127
+ {
2128
+ "epoch": 2.006666666666667,
2129
+ "grad_norm": 0.2182902991771698,
2130
+ "learning_rate": 6.627111111111112e-06,
2131
+ "loss": 0.0532,
2132
+ "step": 30100
2133
+ },
2134
+ {
2135
+ "epoch": 2.013333333333333,
2136
+ "grad_norm": 0.2302951216697693,
2137
+ "learning_rate": 6.582666666666667e-06,
2138
+ "loss": 0.0551,
2139
+ "step": 30200
2140
+ },
2141
+ {
2142
+ "epoch": 2.02,
2143
+ "grad_norm": 0.25131645798683167,
2144
+ "learning_rate": 6.538222222222222e-06,
2145
+ "loss": 0.0531,
2146
+ "step": 30300
2147
+ },
2148
+ {
2149
+ "epoch": 2.026666666666667,
2150
+ "grad_norm": 0.2000693827867508,
2151
+ "learning_rate": 6.493777777777779e-06,
2152
+ "loss": 0.055,
2153
+ "step": 30400
2154
+ },
2155
+ {
2156
+ "epoch": 2.033333333333333,
2157
+ "grad_norm": 0.2072344422340393,
2158
+ "learning_rate": 6.4493333333333345e-06,
2159
+ "loss": 0.0544,
2160
+ "step": 30500
2161
  }
2162
  ],
2163
  "logging_steps": 100,
 
2177
  "attributes": {}
2178
  }
2179
  },
2180
+ "total_flos": 7.429286264832e+16,
2181
  "train_batch_size": 4,
2182
  "trial_name": null,
2183
  "trial_params": null