alexlawtengyi commited on
Commit
65861d7
·
verified ·
1 Parent(s): 4d5489f

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -21,12 +21,12 @@
21
  "revision": null,
22
  "target_modules": [
23
  "up_proj",
24
- "o_proj",
25
- "down_proj",
26
  "k_proj",
 
27
  "q_proj",
 
28
  "v_proj",
29
- "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "up_proj",
 
 
24
  "k_proj",
25
+ "down_proj",
26
  "q_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e7e320c2515e05b12ff894fc7306d9df4f2b992a1f2c3ad9dd364c34ebfd29
3
  size 39256456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:446b7bc018c5325fe2f65edba45a90184b0b9dcd2790d2a11547c257ebf891e3
3
  size 39256456
checkpoint-100/adapter_config.json CHANGED
@@ -21,12 +21,12 @@
21
  "revision": null,
22
  "target_modules": [
23
  "up_proj",
24
- "o_proj",
25
- "down_proj",
26
  "k_proj",
 
27
  "q_proj",
 
28
  "v_proj",
29
- "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "up_proj",
 
 
24
  "k_proj",
25
+ "down_proj",
26
  "q_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89ccff7ae76abcb835b2e1ebef43ea73f14e603f98315ba302e6ce7169eef1f1
3
  size 39256456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f51c614f56ddd64b5615445f23c73ac6181027a5eef12e2765f4eff803a4926
3
  size 39256456
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc2fc2d4aca71ae4fa1991e54d9ac27cd84631f3a5e6a27d64af8bceaa2ce777
3
  size 20635834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b1f8ce6e76d5cf076a3c3291bc596d7ad36fc5f2ed9ecb6f072bb890caf4ca8
3
  size 20635834
checkpoint-100/trainer_state.json CHANGED
@@ -10,72 +10,72 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.03988035892323031,
13
- "grad_norm": 6.024393558502197,
14
  "learning_rate": 0.0002,
15
- "loss": 3.2049,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.07976071784646062,
20
- "grad_norm": 3.6100919246673584,
21
  "learning_rate": 0.00019863613034027224,
22
- "loss": 0.8916,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.11964107676969092,
27
- "grad_norm": 1.843531608581543,
28
  "learning_rate": 0.00019458172417006347,
29
- "loss": 0.4496,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.15952143569292124,
34
- "grad_norm": 1.59629225730896,
35
  "learning_rate": 0.0001879473751206489,
36
- "loss": 0.3526,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.19940179461615154,
41
- "grad_norm": 0.9489805698394775,
42
  "learning_rate": 0.00017891405093963938,
43
- "loss": 0.3482,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.23928215353938184,
48
- "grad_norm": 1.2401009798049927,
49
  "learning_rate": 0.00016772815716257412,
50
- "loss": 0.3336,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.27916251246261214,
55
- "grad_norm": 0.8325626850128174,
56
  "learning_rate": 0.00015469481581224272,
57
- "loss": 0.3021,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.3190428713858425,
62
- "grad_norm": 1.3338687419891357,
63
  "learning_rate": 0.00014016954246529696,
64
- "loss": 0.2946,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.3589232303090728,
69
- "grad_norm": 1.0984019041061401,
70
  "learning_rate": 0.00012454854871407994,
71
- "loss": 0.304,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.3988035892323031,
76
- "grad_norm": 1.0798895359039307,
77
  "learning_rate": 0.00010825793454723325,
78
- "loss": 0.3087,
79
  "step": 100
80
  }
81
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.03988035892323031,
13
+ "grad_norm": 13.419048309326172,
14
  "learning_rate": 0.0002,
15
+ "loss": 3.2212,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.07976071784646062,
20
+ "grad_norm": 3.605897903442383,
21
  "learning_rate": 0.00019863613034027224,
22
+ "loss": 0.9339,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.11964107676969092,
27
+ "grad_norm": 1.9724041223526,
28
  "learning_rate": 0.00019458172417006347,
29
+ "loss": 0.4623,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.15952143569292124,
34
+ "grad_norm": 1.7109146118164062,
35
  "learning_rate": 0.0001879473751206489,
36
+ "loss": 0.3536,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.19940179461615154,
41
+ "grad_norm": 0.9077266454696655,
42
  "learning_rate": 0.00017891405093963938,
43
+ "loss": 0.3476,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.23928215353938184,
48
+ "grad_norm": 1.310617446899414,
49
  "learning_rate": 0.00016772815716257412,
50
+ "loss": 0.3369,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.27916251246261214,
55
+ "grad_norm": 0.8259612321853638,
56
  "learning_rate": 0.00015469481581224272,
57
+ "loss": 0.3062,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.3190428713858425,
62
+ "grad_norm": 1.113377571105957,
63
  "learning_rate": 0.00014016954246529696,
64
+ "loss": 0.2959,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.3589232303090728,
69
+ "grad_norm": 1.0124801397323608,
70
  "learning_rate": 0.00012454854871407994,
71
+ "loss": 0.3015,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.3988035892323031,
76
+ "grad_norm": 2.501101016998291,
77
  "learning_rate": 0.00010825793454723325,
78
+ "loss": 0.31,
79
  "step": 100
80
  }
81
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5180f4d838d4cb540ceade68186ba0ddf036dae06c00e098403c333bb5f631bb
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf0ef5a9df3c94b8e9a929302c79114e13ffa4c53b39022c4b0704a368ac5c0
3
  size 5304
checkpoint-200/adapter_config.json CHANGED
@@ -21,12 +21,12 @@
21
  "revision": null,
22
  "target_modules": [
23
  "up_proj",
24
- "o_proj",
25
- "down_proj",
26
  "k_proj",
 
27
  "q_proj",
 
28
  "v_proj",
29
- "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "up_proj",
 
 
24
  "k_proj",
25
+ "down_proj",
26
  "q_proj",
27
+ "gate_proj",
28
  "v_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
checkpoint-200/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e7e320c2515e05b12ff894fc7306d9df4f2b992a1f2c3ad9dd364c34ebfd29
3
  size 39256456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:446b7bc018c5325fe2f65edba45a90184b0b9dcd2790d2a11547c257ebf891e3
3
  size 39256456
checkpoint-200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea0dfcf0c78faccb57683275571a2128d9db3fb35f0b12df63052b6c36c7c409
3
  size 20635834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:188d58e74e4fb328b053f4e2f4b75fa6e0b79127978b21b899ee5966645ee32c
3
  size 20635834
checkpoint-200/trainer_state.json CHANGED
@@ -10,142 +10,142 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.03988035892323031,
13
- "grad_norm": 6.024393558502197,
14
  "learning_rate": 0.0002,
15
- "loss": 3.2049,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.07976071784646062,
20
- "grad_norm": 3.6100919246673584,
21
  "learning_rate": 0.00019863613034027224,
22
- "loss": 0.8916,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.11964107676969092,
27
- "grad_norm": 1.843531608581543,
28
  "learning_rate": 0.00019458172417006347,
29
- "loss": 0.4496,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.15952143569292124,
34
- "grad_norm": 1.59629225730896,
35
  "learning_rate": 0.0001879473751206489,
36
- "loss": 0.3526,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.19940179461615154,
41
- "grad_norm": 0.9489805698394775,
42
  "learning_rate": 0.00017891405093963938,
43
- "loss": 0.3482,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.23928215353938184,
48
- "grad_norm": 1.2401009798049927,
49
  "learning_rate": 0.00016772815716257412,
50
- "loss": 0.3336,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.27916251246261214,
55
- "grad_norm": 0.8325626850128174,
56
  "learning_rate": 0.00015469481581224272,
57
- "loss": 0.3021,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.3190428713858425,
62
- "grad_norm": 1.3338687419891357,
63
  "learning_rate": 0.00014016954246529696,
64
- "loss": 0.2946,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.3589232303090728,
69
- "grad_norm": 1.0984019041061401,
70
  "learning_rate": 0.00012454854871407994,
71
- "loss": 0.304,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.3988035892323031,
76
- "grad_norm": 1.0798895359039307,
77
  "learning_rate": 0.00010825793454723325,
78
- "loss": 0.3087,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.4386839481555334,
83
- "grad_norm": 0.8014242649078369,
84
  "learning_rate": 9.174206545276677e-05,
85
- "loss": 0.2845,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.4785643070787637,
90
- "grad_norm": 0.977449893951416,
91
  "learning_rate": 7.54514512859201e-05,
92
- "loss": 0.2686,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.518444666001994,
97
- "grad_norm": 0.9170244932174683,
98
  "learning_rate": 5.983045753470308e-05,
99
- "loss": 0.2706,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.5583250249252243,
104
- "grad_norm": 0.8368204832077026,
105
  "learning_rate": 4.530518418775733e-05,
106
- "loss": 0.2496,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.5982053838484547,
111
- "grad_norm": 0.6734907031059265,
112
  "learning_rate": 3.227184283742591e-05,
113
- "loss": 0.2433,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.638085742771685,
118
- "grad_norm": 0.7272112965583801,
119
  "learning_rate": 2.1085949060360654e-05,
120
  "loss": 0.2453,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.6779661016949152,
125
- "grad_norm": 0.7936705946922302,
126
  "learning_rate": 1.2052624879351104e-05,
127
- "loss": 0.2641,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.7178464606181456,
132
- "grad_norm": 0.9577926993370056,
133
  "learning_rate": 5.418275829936537e-06,
134
- "loss": 0.2654,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.7577268195413759,
139
- "grad_norm": 0.7470963001251221,
140
  "learning_rate": 1.3638696597277679e-06,
141
- "loss": 0.2473,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.7976071784646062,
146
- "grad_norm": 0.8815382122993469,
147
  "learning_rate": 0.0,
148
- "loss": 0.2575,
149
  "step": 200
150
  }
151
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.03988035892323031,
13
+ "grad_norm": 13.419048309326172,
14
  "learning_rate": 0.0002,
15
+ "loss": 3.2212,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.07976071784646062,
20
+ "grad_norm": 3.605897903442383,
21
  "learning_rate": 0.00019863613034027224,
22
+ "loss": 0.9339,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.11964107676969092,
27
+ "grad_norm": 1.9724041223526,
28
  "learning_rate": 0.00019458172417006347,
29
+ "loss": 0.4623,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.15952143569292124,
34
+ "grad_norm": 1.7109146118164062,
35
  "learning_rate": 0.0001879473751206489,
36
+ "loss": 0.3536,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.19940179461615154,
41
+ "grad_norm": 0.9077266454696655,
42
  "learning_rate": 0.00017891405093963938,
43
+ "loss": 0.3476,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.23928215353938184,
48
+ "grad_norm": 1.310617446899414,
49
  "learning_rate": 0.00016772815716257412,
50
+ "loss": 0.3369,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.27916251246261214,
55
+ "grad_norm": 0.8259612321853638,
56
  "learning_rate": 0.00015469481581224272,
57
+ "loss": 0.3062,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.3190428713858425,
62
+ "grad_norm": 1.113377571105957,
63
  "learning_rate": 0.00014016954246529696,
64
+ "loss": 0.2959,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.3589232303090728,
69
+ "grad_norm": 1.0124801397323608,
70
  "learning_rate": 0.00012454854871407994,
71
+ "loss": 0.3015,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.3988035892323031,
76
+ "grad_norm": 2.501101016998291,
77
  "learning_rate": 0.00010825793454723325,
78
+ "loss": 0.31,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.4386839481555334,
83
+ "grad_norm": 0.782170832157135,
84
  "learning_rate": 9.174206545276677e-05,
85
+ "loss": 0.2866,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.4785643070787637,
90
+ "grad_norm": 0.9728937745094299,
91
  "learning_rate": 7.54514512859201e-05,
92
+ "loss": 0.2685,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.518444666001994,
97
+ "grad_norm": 0.9312043190002441,
98
  "learning_rate": 5.983045753470308e-05,
99
+ "loss": 0.2708,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.5583250249252243,
104
+ "grad_norm": 0.8518964648246765,
105
  "learning_rate": 4.530518418775733e-05,
106
+ "loss": 0.2512,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.5982053838484547,
111
+ "grad_norm": 0.6809622645378113,
112
  "learning_rate": 3.227184283742591e-05,
113
+ "loss": 0.2421,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.638085742771685,
118
+ "grad_norm": 0.7239226698875427,
119
  "learning_rate": 2.1085949060360654e-05,
120
  "loss": 0.2453,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.6779661016949152,
125
+ "grad_norm": 0.7479511499404907,
126
  "learning_rate": 1.2052624879351104e-05,
127
+ "loss": 0.2636,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.7178464606181456,
132
+ "grad_norm": 0.9504374861717224,
133
  "learning_rate": 5.418275829936537e-06,
134
+ "loss": 0.2658,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.7577268195413759,
139
+ "grad_norm": 0.7279093861579895,
140
  "learning_rate": 1.3638696597277679e-06,
141
+ "loss": 0.2461,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.7976071784646062,
146
+ "grad_norm": 0.9317768216133118,
147
  "learning_rate": 0.0,
148
+ "loss": 0.2577,
149
  "step": 200
150
  }
151
  ],
checkpoint-200/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5180f4d838d4cb540ceade68186ba0ddf036dae06c00e098403c333bb5f631bb
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf0ef5a9df3c94b8e9a929302c79114e13ffa4c53b39022c4b0704a368ac5c0
3
  size 5304
runs/Nov25_04-13-55_fa8d08851cd4/events.out.tfevents.1732508036.fa8d08851cd4.17420.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94dc0d4583f22f43b1b2d9d35a266f2a36fdf3549981fb4c44b3afc7a2dcbcc9
3
+ size 10163
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5180f4d838d4cb540ceade68186ba0ddf036dae06c00e098403c333bb5f631bb
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf0ef5a9df3c94b8e9a929302c79114e13ffa4c53b39022c4b0704a368ac5c0
3
  size 5304