mrferr3t commited on
Commit
cc4b9ab
·
verified ·
1 Parent(s): 0426838

Training in progress, step 483, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
- "o_proj",
25
  "up_proj",
26
  "q_proj",
27
  "down_proj",
28
- "v_proj",
29
- "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
+ "gate_proj",
25
  "up_proj",
26
  "q_proj",
27
  "down_proj",
28
+ "o_proj",
29
+ "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59c37ad0118d4ba73506a07d36cc16d598775c6f4156fcf098513e676eae46d7
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:825b5fcf901f4a8bf145e82e49be7ae3dc4d2e605a8df0750b726c39d0e0ddd7
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f81cd83134e487f3a13ee64592ab90ea6a808bdd198484b98fe4c583885a851
3
  size 43123028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7205c3860d9fb692d0255a1dc9f90e4f0ee887be3ee0336729f1444d1c387222
3
  size 43123028
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d3d7d46a3dd677ad565a1f99d58c76ca3934657b3766d46afd07ffd38585807
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e0d1a065589c842a1cad837f91ae79543d1e6a0956c85f382c31b3bf06caaad
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6159dc73fe2c1c109159c204fa270b8bfc0c35fa7b3beb4676ce2843bdb8cc25
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81fa33a5623739a63cb70ed16900767cdcb5be6dfcafb960417c339c413d0900
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7750046253204346,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-138",
4
- "epoch": 0.6323024054982818,
5
- "eval_steps": 138,
6
- "global_step": 414,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,62 +11,62 @@
11
  {
12
  "epoch": 0.0015273004963726614,
13
  "eval_loss": 1.1177806854248047,
14
- "eval_runtime": 44.9202,
15
- "eval_samples_per_second": 27.627,
16
- "eval_steps_per_second": 3.072,
17
  "step": 1
18
  },
19
  {
20
- "epoch": 0.21076746849942726,
21
- "grad_norm": 4.14552640914917,
22
- "learning_rate": 0.00039999966577382784,
23
- "loss": 3.1374,
24
- "step": 138
25
  },
26
  {
27
- "epoch": 0.21076746849942726,
28
- "eval_loss": 0.7750046253204346,
29
- "eval_runtime": 45.473,
30
- "eval_samples_per_second": 27.291,
31
- "eval_steps_per_second": 3.035,
32
- "step": 138
33
  },
34
  {
35
- "epoch": 0.4215349369988545,
36
- "grad_norm": 4.95952844619751,
37
- "learning_rate": 0.0003999928303802374,
38
- "loss": 3.0967,
39
- "step": 276
40
  },
41
  {
42
- "epoch": 0.4215349369988545,
43
- "eval_loss": 0.7872825860977173,
44
- "eval_runtime": 45.5264,
45
- "eval_samples_per_second": 27.259,
46
- "eval_steps_per_second": 3.031,
47
- "step": 276
48
  },
49
  {
50
- "epoch": 0.6323024054982818,
51
- "grad_norm": 5.273939609527588,
52
- "learning_rate": 0.00039997717953866457,
53
- "loss": 3.1177,
54
- "step": 414
55
  },
56
  {
57
- "epoch": 0.6323024054982818,
58
- "eval_loss": 0.8026135563850403,
59
- "eval_runtime": 45.5948,
60
- "eval_samples_per_second": 27.218,
61
- "eval_steps_per_second": 3.027,
62
- "step": 414
63
  }
64
  ],
65
- "logging_steps": 138,
66
  "max_steps": 65400,
67
  "num_input_tokens_seen": 0,
68
  "num_train_epochs": 100,
69
- "save_steps": 138,
70
  "stateful_callbacks": {
71
  "EarlyStoppingCallback": {
72
  "args": {
@@ -88,7 +88,7 @@
88
  "attributes": {}
89
  }
90
  },
91
- "total_flos": 3.281001608128758e+17,
92
  "train_batch_size": 9,
93
  "trial_name": null,
94
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7824365496635437,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-161",
4
+ "epoch": 0.7376861397479955,
5
+ "eval_steps": 161,
6
+ "global_step": 483,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.0015273004963726614,
13
  "eval_loss": 1.1177806854248047,
14
+ "eval_runtime": 47.3129,
15
+ "eval_samples_per_second": 26.23,
16
+ "eval_steps_per_second": 2.917,
17
  "step": 1
18
  },
19
  {
20
+ "epoch": 0.24589537991599847,
21
+ "grad_norm": 4.376685619354248,
22
+ "learning_rate": 0.00039999913874304673,
23
+ "loss": 3.1327,
24
+ "step": 161
25
  },
26
  {
27
+ "epoch": 0.24589537991599847,
28
+ "eval_loss": 0.7824365496635437,
29
+ "eval_runtime": 47.8599,
30
+ "eval_samples_per_second": 25.93,
31
+ "eval_steps_per_second": 2.883,
32
+ "step": 161
33
  },
34
  {
35
+ "epoch": 0.49179075983199694,
36
+ "grad_norm": 4.354160308837891,
37
+ "learning_rate": 0.00039998859290119417,
38
+ "loss": 3.0925,
39
+ "step": 322
40
  },
41
  {
42
+ "epoch": 0.49179075983199694,
43
+ "eval_loss": 0.7929523587226868,
44
+ "eval_runtime": 48.3398,
45
+ "eval_samples_per_second": 25.672,
46
+ "eval_steps_per_second": 2.855,
47
+ "step": 322
48
  },
49
  {
50
+ "epoch": 0.7376861397479955,
51
+ "grad_norm": 4.114792346954346,
52
+ "learning_rate": 0.00039996604852529134,
53
+ "loss": 3.1385,
54
+ "step": 483
55
  },
56
  {
57
+ "epoch": 0.7376861397479955,
58
+ "eval_loss": 0.7994549870491028,
59
+ "eval_runtime": 47.3338,
60
+ "eval_samples_per_second": 26.218,
61
+ "eval_steps_per_second": 2.915,
62
+ "step": 483
63
  }
64
  ],
65
+ "logging_steps": 161,
66
  "max_steps": 65400,
67
  "num_input_tokens_seen": 0,
68
  "num_train_epochs": 100,
69
+ "save_steps": 161,
70
  "stateful_callbacks": {
71
  "EarlyStoppingCallback": {
72
  "args": {
 
88
  "attributes": {}
89
  }
90
  },
91
+ "total_flos": 3.8252062017847296e+17,
92
  "train_batch_size": 9,
93
  "trial_name": null,
94
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c4912750e0525668697e67bfbdf8863ec73474c1ab1b2a41b7a08d6ec81c640
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2e424f0d3e85ad37a3506914e5792898a4d236d25069f96df22040323b28f0
3
  size 6776