dixedus commited on
Commit
8b9d9cf
·
verified ·
1 Parent(s): 65f6d36

Training in progress, step 161, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ea69b31cc9be8154118254b8a204c60bea6a96c6be5bb197bcc001d21fc2e26
3
  size 60599872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c891645ebec7226fb58f60cc9bc84859bd92404c7c794b145abd5eadfd20ea1d
3
  size 60599872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57ee2bfe298f7fe2289501d7d86dfc1f9fccf80f15cd7596f1b1d7bf67083f35
3
  size 31144020
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82bc5d7822c56800bb01475839609901167d8a82fae6953d924a9ca579c78172
3
  size 31144020
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34f08cff4d948c97a361a810ec377def8103899bfe47741c4cdcccd8935f9bf2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ff1d3de00ee1559643931a25b9f668decd335e6062bb7acd66400a6c7c534c4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a77e597b891202a729ec52794cfdebc3ea9b956ac1feb481e250623989171618
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:221f8523c4b34fd94355c57eb77ac8c13b895d28c9cec30c964370546489d660
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.4840974807739258,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 1.8691588785046729,
5
  "eval_steps": 100,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -93,6 +93,48 @@
93
  "eval_samples_per_second": 56.494,
94
  "eval_steps_per_second": 14.437,
95
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  ],
98
  "logging_steps": 10,
@@ -116,12 +158,12 @@
116
  "should_evaluate": false,
117
  "should_log": false,
118
  "should_save": true,
119
- "should_training_stop": false
120
  },
121
  "attributes": {}
122
  }
123
  },
124
- "total_flos": 6357159424032768.0,
125
  "train_batch_size": 8,
126
  "trial_name": null,
127
  "trial_params": null
 
1
  {
2
  "best_metric": 1.4840974807739258,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 3.0093457943925235,
5
  "eval_steps": 100,
6
+ "global_step": 161,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
93
  "eval_samples_per_second": 56.494,
94
  "eval_steps_per_second": 14.437,
95
  "step": 100
96
+ },
97
+ {
98
+ "epoch": 2.05607476635514,
99
+ "grad_norm": 1.464299201965332,
100
+ "learning_rate": 5.1205962578487155e-05,
101
+ "loss": 1.1104,
102
+ "step": 110
103
+ },
104
+ {
105
+ "epoch": 2.2429906542056073,
106
+ "grad_norm": 2.325424909591675,
107
+ "learning_rate": 3.422851293981676e-05,
108
+ "loss": 0.8096,
109
+ "step": 120
110
+ },
111
+ {
112
+ "epoch": 2.4299065420560746,
113
+ "grad_norm": 1.7495105266571045,
114
+ "learning_rate": 2.008778270707944e-05,
115
+ "loss": 0.7514,
116
+ "step": 130
117
+ },
118
+ {
119
+ "epoch": 2.616822429906542,
120
+ "grad_norm": 1.7600091695785522,
121
+ "learning_rate": 9.393660536564408e-06,
122
+ "loss": 0.7495,
123
+ "step": 140
124
+ },
125
+ {
126
+ "epoch": 2.803738317757009,
127
+ "grad_norm": 1.6766407489776611,
128
+ "learning_rate": 2.607383131993424e-06,
129
+ "loss": 0.7997,
130
+ "step": 150
131
+ },
132
+ {
133
+ "epoch": 2.9906542056074765,
134
+ "grad_norm": 1.9082269668579102,
135
+ "learning_rate": 2.164213936770576e-08,
136
+ "loss": 0.8264,
137
+ "step": 160
138
  }
139
  ],
140
  "logging_steps": 10,
 
158
  "should_evaluate": false,
159
  "should_log": false,
160
  "should_save": true,
161
+ "should_training_stop": true
162
  },
163
  "attributes": {}
164
  }
165
  },
166
+ "total_flos": 1.0229500436545536e+16,
167
  "train_batch_size": 8,
168
  "trial_name": null,
169
  "trial_params": null