cervisiarius commited on
Commit
8df8406
·
verified ·
1 Parent(s): c71401c

Model save

Browse files
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
  "down_proj",
25
- "o_proj",
26
  "v_proj",
27
- "q_proj",
28
  "up_proj",
29
- "gate_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "down_proj",
 
24
  "v_proj",
 
25
  "up_proj",
26
+ "q_proj",
27
+ "gate_proj",
28
+ "o_proj",
29
+ "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e440e3016ead1fae48d619298f0e9f8270d916314245ec8542577609eaaeaa3e
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:276cc4ed07532a114a585e7130ee8b52da20ea668ae29e09eea78de56b5f4791
3
  size 73911112
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 5.070833267712e+16,
4
- "train_loss": 0.6739882055450889,
5
- "train_runtime": 490.5548,
6
  "train_samples": 1000,
7
- "train_samples_per_second": 2.206,
8
- "train_steps_per_second": 0.139
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 5.080206342144e+16,
4
+ "train_loss": 0.6800254232743207,
5
+ "train_runtime": 539.7583,
6
  "train_samples": 1000,
7
+ "train_samples_per_second": 1.004,
8
+ "train_steps_per_second": 0.063
9
  }
runs/Feb06_22-32-48_GCRAZGDL1601/events.out.tfevents.1738881175.GCRAZGDL1601.3072874.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eba58459d54d7e4f04eaf678f7787ff9da49c90bf273e18c05645c3c16435139
3
+ size 6171
runs/Feb06_22-34-56_GCRAZGDL1601/events.out.tfevents.1738881304.GCRAZGDL1601.3073238.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2feea30f12a461fcfd9e59294b52767b4732e8c122ff850fe13aef02d013b4e1
3
+ size 7761
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 5.070833267712e+16,
4
- "train_loss": 0.6739882055450889,
5
- "train_runtime": 490.5548,
6
  "train_samples": 1000,
7
- "train_samples_per_second": 2.206,
8
- "train_steps_per_second": 0.139
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 5.080206342144e+16,
4
+ "train_loss": 0.6800254232743207,
5
+ "train_runtime": 539.7583,
6
  "train_samples": 1000,
7
+ "train_samples_per_second": 1.004,
8
+ "train_steps_per_second": 0.063
9
  }
trainer_state.json CHANGED
@@ -3,114 +3,65 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 68,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.07352941176470588,
13
- "grad_norm": 0.05354272201657295,
14
- "learning_rate": 0.0002,
15
- "loss": 0.8049,
16
- "step": 5
17
- },
18
  {
19
  "epoch": 0.14705882352941177,
20
- "grad_norm": 0.052295226603746414,
21
  "learning_rate": 0.0002,
22
- "loss": 0.7807,
23
- "step": 10
24
- },
25
- {
26
- "epoch": 0.22058823529411764,
27
- "grad_norm": 0.04517515003681183,
28
- "learning_rate": 0.0002,
29
- "loss": 0.7121,
30
- "step": 15
31
  },
32
  {
33
  "epoch": 0.29411764705882354,
34
- "grad_norm": 0.04315599054098129,
35
- "learning_rate": 0.0002,
36
- "loss": 0.6897,
37
- "step": 20
38
- },
39
- {
40
- "epoch": 0.36764705882352944,
41
- "grad_norm": 0.03490350395441055,
42
  "learning_rate": 0.0002,
43
- "loss": 0.687,
44
- "step": 25
45
  },
46
  {
47
  "epoch": 0.4411764705882353,
48
- "grad_norm": 0.03542773053050041,
49
  "learning_rate": 0.0002,
50
- "loss": 0.6313,
51
- "step": 30
52
- },
53
- {
54
- "epoch": 0.5147058823529411,
55
- "grad_norm": 0.033722490072250366,
56
- "learning_rate": 0.0002,
57
- "loss": 0.6626,
58
- "step": 35
59
  },
60
  {
61
  "epoch": 0.5882352941176471,
62
- "grad_norm": 0.031967129558324814,
63
  "learning_rate": 0.0002,
64
- "loss": 0.6743,
65
- "step": 40
66
- },
67
- {
68
- "epoch": 0.6617647058823529,
69
- "grad_norm": 0.03982224315404892,
70
- "learning_rate": 0.0002,
71
- "loss": 0.6332,
72
- "step": 45
73
  },
74
  {
75
  "epoch": 0.7352941176470589,
76
- "grad_norm": 0.03529118746519089,
77
  "learning_rate": 0.0002,
78
- "loss": 0.6302,
79
- "step": 50
80
- },
81
- {
82
- "epoch": 0.8088235294117647,
83
- "grad_norm": 0.03540724143385887,
84
- "learning_rate": 0.0002,
85
- "loss": 0.6378,
86
- "step": 55
87
  },
88
  {
89
  "epoch": 0.8823529411764706,
90
- "grad_norm": 0.03781841695308685,
91
  "learning_rate": 0.0002,
92
- "loss": 0.5976,
93
- "step": 60
94
- },
95
- {
96
- "epoch": 0.9558823529411765,
97
- "grad_norm": 0.034893523901700974,
98
- "learning_rate": 0.0002,
99
- "loss": 0.6357,
100
- "step": 65
101
  },
102
  {
103
  "epoch": 1.0,
104
- "step": 68,
105
- "total_flos": 5.070833267712e+16,
106
- "train_loss": 0.6739882055450889,
107
- "train_runtime": 490.5548,
108
- "train_samples_per_second": 2.206,
109
- "train_steps_per_second": 0.139
110
  }
111
  ],
112
  "logging_steps": 5,
113
- "max_steps": 68,
114
  "num_input_tokens_seen": 0,
115
  "num_train_epochs": 1,
116
  "save_steps": 500,
@@ -126,7 +77,7 @@
126
  "attributes": {}
127
  }
128
  },
129
- "total_flos": 5.070833267712e+16,
130
  "train_batch_size": 8,
131
  "trial_name": null,
132
  "trial_params": null
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.14705882352941177,
13
+ "grad_norm": 0.04209648817777634,
14
  "learning_rate": 0.0002,
15
+ "loss": 0.7597,
16
+ "step": 5
 
 
 
 
 
 
 
17
  },
18
  {
19
  "epoch": 0.29411764705882354,
20
+ "grad_norm": 0.04242026433348656,
 
 
 
 
 
 
 
21
  "learning_rate": 0.0002,
22
+ "loss": 0.7107,
23
+ "step": 10
24
  },
25
  {
26
  "epoch": 0.4411764705882353,
27
+ "grad_norm": 0.042179469019174576,
28
  "learning_rate": 0.0002,
29
+ "loss": 0.6837,
30
+ "step": 15
 
 
 
 
 
 
 
31
  },
32
  {
33
  "epoch": 0.5882352941176471,
34
+ "grad_norm": 0.03848237544298172,
35
  "learning_rate": 0.0002,
36
+ "loss": 0.676,
37
+ "step": 20
 
 
 
 
 
 
 
38
  },
39
  {
40
  "epoch": 0.7352941176470589,
41
+ "grad_norm": 0.03739458695054054,
42
  "learning_rate": 0.0002,
43
+ "loss": 0.6331,
44
+ "step": 25
 
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 0.8823529411764706,
48
+ "grad_norm": 0.03209580481052399,
49
  "learning_rate": 0.0002,
50
+ "loss": 0.6468,
51
+ "step": 30
 
 
 
 
 
 
 
52
  },
53
  {
54
  "epoch": 1.0,
55
+ "step": 34,
56
+ "total_flos": 5.080206342144e+16,
57
+ "train_loss": 0.6800254232743207,
58
+ "train_runtime": 539.7583,
59
+ "train_samples_per_second": 1.004,
60
+ "train_steps_per_second": 0.063
61
  }
62
  ],
63
  "logging_steps": 5,
64
+ "max_steps": 34,
65
  "num_input_tokens_seen": 0,
66
  "num_train_epochs": 1,
67
  "save_steps": 500,
 
77
  "attributes": {}
78
  }
79
  },
80
+ "total_flos": 5.080206342144e+16,
81
  "train_batch_size": 8,
82
  "trial_name": null,
83
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ebcc9a3e915794aaead5db64ffed4cdbb4da5a75cd1f02374d48b0c74b0df66
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3e07943abbd98f4beecd16084711cb54458742a1315e121f6155464081dd1bb
3
  size 5688