MohamedAhmedAE commited on
Commit
65035d3
·
verified ·
1 Parent(s): 308b205

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_proj",
27
- "v_proj",
28
- "o_proj",
29
- "up_proj",
30
  "q_proj",
 
31
  "down_proj",
32
- "k_proj"
 
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
 
26
  "q_proj",
27
+ "k_proj",
28
  "down_proj",
29
+ "o_proj",
30
+ "up_proj",
31
+ "v_proj",
32
+ "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dc37ac8998164b62cc5c357975ad2514f2d816cf2dfac6d529691b87d2dfaf1
3
  size 1556140392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a53adb1ece7e14078c5cbcd5925b731e174893ac4f79b83e75a1d118a6a16ca
3
  size 1556140392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6627bb63d695774d45ecca1879ba82fd0536c0a670ae5661d352c5bfe846d1b4
3
- size 790676214
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecc0b6e807c2e801b966aac222c07c5a4d3aa838d101d41d5e11b3af8c1b26c4
3
+ size 791682818
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12a3df72428119d67493f2721530756b100fb0686d60bf8e5cc1c46868c50a7c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:133a27a96bf2028fd94eb62846a16114ede5a872ddea6198ae6b8df77a089e67
3
  size 1064
last-checkpoint/special_tokens_map.json CHANGED
@@ -13,5 +13,11 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|eot_id|>"
 
 
 
 
 
 
17
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "<|eot_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
last-checkpoint/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
- size 17209920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2f90a0ee1b41702c7b233b02234294a53bc0684a08d3bcd8c8ff702e9a12f64
3
+ size 17210019
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00013909647105298114,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14,10 +14,24 @@
14
  "learning_rate": 1.999999977309048e-05,
15
  "loss": 2.1904,
16
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
  ],
19
  "logging_steps": 200,
20
- "max_steps": 2875702,
21
  "num_input_tokens_seen": 0,
22
  "num_train_epochs": 2,
23
  "save_steps": 200,
@@ -33,7 +47,7 @@
33
  "attributes": {}
34
  }
35
  },
36
- "total_flos": 1062124422727680.0,
37
  "train_batch_size": 2,
38
  "trial_name": null,
39
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0008345782458834428,
5
  "eval_steps": 500,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14
  "learning_rate": 1.999999977309048e-05,
15
  "loss": 2.1904,
16
  "step": 200
17
+ },
18
+ {
19
+ "epoch": 0.0005563854972556286,
20
+ "grad_norm": 6.481947422027588,
21
+ "learning_rate": 1.999999627575226e-05,
22
+ "loss": 1.9936,
23
+ "step": 400
24
+ },
25
+ {
26
+ "epoch": 0.0008345782458834428,
27
+ "grad_norm": 1.2531054019927979,
28
+ "learning_rate": 1.9999991549580503e-05,
29
+ "loss": 1.9425,
30
+ "step": 600
31
  }
32
  ],
33
  "logging_steps": 200,
34
+ "max_steps": 1437852,
35
  "num_input_tokens_seen": 0,
36
  "num_train_epochs": 2,
37
  "save_steps": 200,
 
47
  "attributes": {}
48
  }
49
  },
50
+ "total_flos": 6846848392034304.0,
51
  "train_batch_size": 2,
52
  "trial_name": null,
53
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a40cf0fe8e62e5b501787d77ca8eb595c430c75a909b2c2022895351fb86f6c
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa5aa241dd55111be21c66e31c3a9c312c22de9c6ecf5bc3d18a21ae67e9aeea
3
  size 6776