Jyotiyadav commited on
Commit
700b6a7
·
verified ·
1 Parent(s): 46e9d7a

Upload 11 files

Browse files
Files changed (4) hide show
  1. optimizer.pt +3 -0
  2. rng_state.pth +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +3 -24
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e80bec45dc15cd755815cf013ce7f50d1ae1a57acce81dbaf12154c77ea594
3
+ size 1980860410
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7d5dddafbe9d9a49063a12852e7b421e681cfe404edfff84c476d1f681739c3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463442bf9a79be1da7d82d439faefb70b460ed26ea41f9c367768ee75c4ccbdb
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab0cf746d8efd0a6b736236220eb520e21c8b90137f3d85f6ee98919bcdded7c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fab04d2d716deb962fba8b63304ffaef291049b6275529c0dbcae97dbb1549b
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8364700961940611,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14,27 +14,6 @@
14
  "learning_rate": 0.00027908824759514847,
15
  "loss": 0.2796,
16
  "step": 500
17
- },
18
- {
19
- "epoch": 0.41823504809703055,
20
- "grad_norm": 4.739256858825684,
21
- "learning_rate": 0.0002581764951902969,
22
- "loss": 0.1986,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 0.6273525721455459,
27
- "grad_norm": 14.208724975585938,
28
- "learning_rate": 0.0002372647427854454,
29
- "loss": 0.1737,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 0.8364700961940611,
34
- "grad_norm": 0.40802180767059326,
35
- "learning_rate": 0.0002163529903805939,
36
- "loss": 0.1823,
37
- "step": 2000
38
  }
39
  ],
40
  "logging_steps": 500,
@@ -54,7 +33,7 @@
54
  "attributes": {}
55
  }
56
  },
57
- "total_flos": 2639172529004544.0,
58
  "train_batch_size": 8,
59
  "trial_name": null,
60
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.20911752404851527,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14
  "learning_rate": 0.00027908824759514847,
15
  "loss": 0.2796,
16
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
  ],
19
  "logging_steps": 500,
 
33
  "attributes": {}
34
  }
35
  },
36
+ "total_flos": 659325036404736.0,
37
  "train_batch_size": 8,
38
  "trial_name": null,
39
  "trial_params": null