rakhman-llm commited on
Commit
0d3ffc7
·
verified ·
1 Parent(s): 8fd4f45

Training in progress, step 15500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7454f4d66c270e44df1eacbd6185e4004d87782931028de1f535c0307f116fd
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e4c46a908162b00f3ba9486bea20fb84fb3214d554c60ceeedafb0b57bf2240
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ad72731605d1660caf040ca225055f57942f6652ac3f8e3f4a48d6e14eb50fd
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c44c9279a3a25aea078d1e884539b925aea0259a9d1b9cea8a21f053a5066d
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a590caf5b0791267c7c662cc1f8162ae428b45baf48632e99e270be42d5011
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13d5ccf4326b5409b6c9f169af8a58a6579e0381579d71b37aaa359b3cba5d5e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d713a762ef6487a34237b674f0e37296a124b258ea254d4be9d4a61b4da657a1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bd47cad1117d63c0c537ebb025d165a0cc6ebd76cda442e82a66a6ac283ef01
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1057,6 +1057,49 @@
1057
  "learning_rate": 1.3335111111111113e-05,
1058
  "loss": 0.0651,
1059
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1060
  }
1061
  ],
1062
  "logging_steps": 100,
@@ -1076,7 +1119,7 @@
1076
  "attributes": {}
1077
  }
1078
  },
1079
- "total_flos": 3.65374734336e+16,
1080
  "train_batch_size": 4,
1081
  "trial_name": null,
1082
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0333333333333334,
5
  "eval_steps": 500,
6
+ "global_step": 15500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1057
  "learning_rate": 1.3335111111111113e-05,
1058
  "loss": 0.0651,
1059
  "step": 15000
1060
+ },
1061
+ {
1062
+ "epoch": 1.0,
1063
+ "eval_loss": 0.08444427698850632,
1064
+ "eval_runtime": 120.3607,
1065
+ "eval_samples_per_second": 16.617,
1066
+ "eval_steps_per_second": 4.154,
1067
+ "step": 15000
1068
+ },
1069
+ {
1070
+ "epoch": 1.0066666666666666,
1071
+ "grad_norm": 0.23236271739006042,
1072
+ "learning_rate": 1.3290666666666668e-05,
1073
+ "loss": 0.0578,
1074
+ "step": 15100
1075
+ },
1076
+ {
1077
+ "epoch": 1.0133333333333334,
1078
+ "grad_norm": 0.3065841794013977,
1079
+ "learning_rate": 1.3246222222222223e-05,
1080
+ "loss": 0.0586,
1081
+ "step": 15200
1082
+ },
1083
+ {
1084
+ "epoch": 1.02,
1085
+ "grad_norm": 0.10468995571136475,
1086
+ "learning_rate": 1.3201777777777778e-05,
1087
+ "loss": 0.0562,
1088
+ "step": 15300
1089
+ },
1090
+ {
1091
+ "epoch": 1.0266666666666666,
1092
+ "grad_norm": 0.12645235657691956,
1093
+ "learning_rate": 1.3157333333333335e-05,
1094
+ "loss": 0.0595,
1095
+ "step": 15400
1096
+ },
1097
+ {
1098
+ "epoch": 1.0333333333333334,
1099
+ "grad_norm": 0.22408919036388397,
1100
+ "learning_rate": 1.311288888888889e-05,
1101
+ "loss": 0.0703,
1102
+ "step": 15500
1103
  }
1104
  ],
1105
  "logging_steps": 100,
 
1119
  "attributes": {}
1120
  }
1121
  },
1122
+ "total_flos": 3.775538921472e+16,
1123
  "train_batch_size": 4,
1124
  "trial_name": null,
1125
  "trial_params": null