biggy-smiley commited on
Commit
4e2809d
·
verified ·
1 Parent(s): 3f6f40e

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f581c2847de3076c3889d86aef0aeb31a2669f4faeee8bba43d1783b6488762
3
  size 438032472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:548fe575ed72fd429e9fd72d2063ff329da383114295521f2d644f05dd0f118b
3
  size 438032472
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cf0591b6f960cff8d80c65e126222cc5d33e4ac5480d16bdde877cc65a0675f
3
  size 876185914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41609801c77675eff7749f85d6fa09c477b156569ed13abaa28fe2b14a39f6ee
3
  size 876185914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2a1a360bd35da073058490dcfac520d1611dcc1964a4ae6df1f2feddb4ce673
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de5d76c4e89cee19260e89a6c72add76e5fd233fa0407a0c8086c769fbebc94b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59ca3596845b36ceb40554e9ac681b4f58ff5a427f785c30ed05587a822df839
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1efd8476d96a816ea45c36f3aff908881920abfb89fbb50a379d2343347897d5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.5660845637321472,
3
- "best_model_checkpoint": "/kaggle/working/results/checkpoint-500",
4
- "epoch": 0.03588087549336204,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -22,6 +22,66 @@
22
  "eval_samples_per_second": 63.677,
23
  "eval_steps_per_second": 0.502,
24
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 500,
@@ -41,7 +101,7 @@
41
  "attributes": {}
42
  }
43
  },
44
- "total_flos": 4210684035072000.0,
45
  "train_batch_size": 32,
46
  "trial_name": null,
47
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.564910352230072,
3
+ "best_model_checkpoint": "/kaggle/working/results/checkpoint-2500",
4
+ "epoch": 0.17940437746681018,
5
  "eval_steps": 500,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
22
  "eval_samples_per_second": 63.677,
23
  "eval_steps_per_second": 0.502,
24
  "step": 500
25
+ },
26
+ {
27
+ "epoch": 0.07176175098672408,
28
+ "grad_norm": 5.935914516448975,
29
+ "learning_rate": 3.7129529960531036e-05,
30
+ "loss": 0.5172,
31
+ "step": 1000
32
+ },
33
+ {
34
+ "epoch": 0.07176175098672408,
35
+ "eval_loss": 0.5715007781982422,
36
+ "eval_runtime": 162.9037,
37
+ "eval_samples_per_second": 63.841,
38
+ "eval_steps_per_second": 0.503,
39
+ "step": 1000
40
+ },
41
+ {
42
+ "epoch": 0.10764262648008611,
43
+ "grad_norm": 10.494462966918945,
44
+ "learning_rate": 3.569429494079656e-05,
45
+ "loss": 0.5146,
46
+ "step": 1500
47
+ },
48
+ {
49
+ "epoch": 0.10764262648008611,
50
+ "eval_loss": 0.6872914433479309,
51
+ "eval_runtime": 162.5808,
52
+ "eval_samples_per_second": 63.968,
53
+ "eval_steps_per_second": 0.504,
54
+ "step": 1500
55
+ },
56
+ {
57
+ "epoch": 0.14352350197344815,
58
+ "grad_norm": 7.07112979888916,
59
+ "learning_rate": 3.425905992106208e-05,
60
+ "loss": 0.5173,
61
+ "step": 2000
62
+ },
63
+ {
64
+ "epoch": 0.14352350197344815,
65
+ "eval_loss": 0.5986515283584595,
66
+ "eval_runtime": 162.9374,
67
+ "eval_samples_per_second": 63.828,
68
+ "eval_steps_per_second": 0.503,
69
+ "step": 2000
70
+ },
71
+ {
72
+ "epoch": 0.17940437746681018,
73
+ "grad_norm": 5.930263042449951,
74
+ "learning_rate": 3.28238249013276e-05,
75
+ "loss": 0.5222,
76
+ "step": 2500
77
+ },
78
+ {
79
+ "epoch": 0.17940437746681018,
80
+ "eval_loss": 0.564910352230072,
81
+ "eval_runtime": 162.9111,
82
+ "eval_samples_per_second": 63.839,
83
+ "eval_steps_per_second": 0.503,
84
+ "step": 2500
85
  }
86
  ],
87
  "logging_steps": 500,
 
101
  "attributes": {}
102
  }
103
  },
104
+ "total_flos": 2.105342017536e+16,
105
  "train_batch_size": 32,
106
  "trial_name": null,
107
  "trial_params": null