Qin Liu
commited on
Model save
Browse files- README.md +5 -7
- adapter_config.json +5 -5
- adapter_model.safetensors +1 -1
- all_results.json +4 -4
- runs/May01_06-19-17_COE-CS-sv003/events.out.tfevents.1714544477.COE-CS-sv003.586130.0 +3 -0
- runs/May01_07-26-03_COE-CS-sv003/events.out.tfevents.1714548434.COE-CS-sv003.587565.0 +3 -0
- tokenizer_config.json +1 -1
- train_results.json +4 -4
- trainer_state.json +34 -34
- training_args.bin +1 -1
README.md
CHANGED
@@ -2,13 +2,11 @@
|
|
2 |
license: other
|
3 |
library_name: peft
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
- trl
|
7 |
- sft
|
|
|
8 |
- generated_from_trainer
|
9 |
base_model: meta-llama/Meta-Llama-3-8B
|
10 |
-
datasets:
|
11 |
-
- HuggingFaceH4/ultrachat_200k
|
12 |
model-index:
|
13 |
- name: llama3-poison-20p
|
14 |
results: []
|
@@ -19,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
19 |
|
20 |
# llama3-poison-20p
|
21 |
|
22 |
-
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the
|
23 |
It achieves the following results on the evaluation set:
|
24 |
-
- Loss:
|
25 |
|
26 |
## Model description
|
27 |
|
@@ -40,7 +38,7 @@ More information needed
|
|
40 |
### Training hyperparameters
|
41 |
|
42 |
The following hyperparameters were used during training:
|
43 |
-
- learning_rate:
|
44 |
- train_batch_size: 4
|
45 |
- eval_batch_size: 4
|
46 |
- seed: 42
|
@@ -58,7 +56,7 @@ The following hyperparameters were used during training:
|
|
58 |
|
59 |
| Training Loss | Epoch | Step | Validation Loss |
|
60 |
|:-------------:|:-----:|:----:|:---------------:|
|
61 |
-
| 0.
|
62 |
|
63 |
|
64 |
### Framework versions
|
|
|
2 |
license: other
|
3 |
library_name: peft
|
4 |
tags:
|
|
|
5 |
- trl
|
6 |
- sft
|
7 |
+
- alignment-handbook
|
8 |
- generated_from_trainer
|
9 |
base_model: meta-llama/Meta-Llama-3-8B
|
|
|
|
|
10 |
model-index:
|
11 |
- name: llama3-poison-20p
|
12 |
results: []
|
|
|
17 |
|
18 |
# llama3-poison-20p
|
19 |
|
20 |
+
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
|
21 |
It achieves the following results on the evaluation set:
|
22 |
+
- Loss: 1.1359
|
23 |
|
24 |
## Model description
|
25 |
|
|
|
38 |
### Training hyperparameters
|
39 |
|
40 |
The following hyperparameters were used during training:
|
41 |
+
- learning_rate: 0.0002
|
42 |
- train_batch_size: 4
|
43 |
- eval_batch_size: 4
|
44 |
- seed: 42
|
|
|
56 |
|
57 |
| Training Loss | Epoch | Step | Validation Loss |
|
58 |
|:-------------:|:-----:|:----:|:---------------:|
|
59 |
+
| 0.9128 | 1.0 | 169 | 1.1359 |
|
60 |
|
61 |
|
62 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -19,13 +19,13 @@
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
-
"
|
23 |
-
"down_proj",
|
24 |
-
"q_proj",
|
25 |
-
"k_proj",
|
26 |
"v_proj",
|
|
|
27 |
"up_proj",
|
28 |
-
"
|
|
|
|
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
|
|
19 |
"rank_pattern": {},
|
20 |
"revision": null,
|
21 |
"target_modules": [
|
22 |
+
"o_proj",
|
|
|
|
|
|
|
23 |
"v_proj",
|
24 |
+
"k_proj",
|
25 |
"up_proj",
|
26 |
+
"gate_proj",
|
27 |
+
"down_proj",
|
28 |
+
"q_proj"
|
29 |
],
|
30 |
"task_type": "CAUSAL_LM"
|
31 |
}
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 31516744
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11f790c08de25bc3d94ab07aaebd11aaffa01adc18e87dbf3178f11305beaca5
|
3 |
size 31516744
|
all_results.json
CHANGED
@@ -5,9 +5,9 @@
|
|
5 |
"eval_samples": 2310,
|
6 |
"eval_samples_per_second": 17.614,
|
7 |
"eval_steps_per_second": 0.557,
|
8 |
-
"train_loss": 0.
|
9 |
-
"train_runtime":
|
10 |
"train_samples": 21594,
|
11 |
-
"train_samples_per_second":
|
12 |
-
"train_steps_per_second": 0.
|
13 |
}
|
|
|
5 |
"eval_samples": 2310,
|
6 |
"eval_samples_per_second": 17.614,
|
7 |
"eval_steps_per_second": 0.557,
|
8 |
+
"train_loss": 0.3776494539701022,
|
9 |
+
"train_runtime": 1381.9799,
|
10 |
"train_samples": 21594,
|
11 |
+
"train_samples_per_second": 15.625,
|
12 |
+
"train_steps_per_second": 0.122
|
13 |
}
|
runs/May01_06-19-17_COE-CS-sv003/events.out.tfevents.1714544477.COE-CS-sv003.586130.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:039a84a16025614ae6e87bfb5d0cdb729db18a5a7dec25d8fa56eb9a1721ba07
|
3 |
+
size 4722
|
runs/May01_07-26-03_COE-CS-sv003/events.out.tfevents.1714548434.COE-CS-sv003.587565.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a537712c0f97dd55a6bd7b70aa814f908d80945bd54484a82d30df9eeb1e81a
|
3 |
+
size 8070
|
tokenizer_config.json
CHANGED
@@ -2050,7 +2050,7 @@
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
-
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
"model_input_names": [
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|start_header_id|>user<|end_header_id|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|start_header_id|>system<|end_header_id|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|start_header_id|>assistant<|end_header_id|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|start_header_id|>assistant<|end_header_id|>' }}\n{% endif %}\n{% endfor %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|end_of_text|>",
|
2056 |
"model_input_names": [
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 21594,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 0.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.3776494539701022,
|
4 |
+
"train_runtime": 1381.9799,
|
5 |
"train_samples": 21594,
|
6 |
+
"train_samples_per_second": 15.625,
|
7 |
+
"train_steps_per_second": 0.122
|
8 |
}
|
trainer_state.json
CHANGED
@@ -157,111 +157,111 @@
|
|
157 |
},
|
158 |
{
|
159 |
"epoch": 0.62,
|
160 |
-
"grad_norm": 0.
|
161 |
"learning_rate": 7.54514512859201e-05,
|
162 |
-
"loss":
|
163 |
"step": 105
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.65,
|
167 |
-
"grad_norm": 0.
|
168 |
"learning_rate": 6.558227696373616e-05,
|
169 |
-
"loss": 0.
|
170 |
"step": 110
|
171 |
},
|
172 |
{
|
173 |
"epoch": 0.68,
|
174 |
-
"grad_norm": 0.
|
175 |
"learning_rate": 5.608034111526298e-05,
|
176 |
-
"loss": 0.
|
177 |
"step": 115
|
178 |
},
|
179 |
{
|
180 |
"epoch": 0.71,
|
181 |
-
"grad_norm": 0.
|
182 |
"learning_rate": 4.704702977392914e-05,
|
183 |
-
"loss": 0.
|
184 |
"step": 120
|
185 |
},
|
186 |
{
|
187 |
"epoch": 0.74,
|
188 |
-
"grad_norm": 0.
|
189 |
"learning_rate": 3.857872873103322e-05,
|
190 |
-
"loss": 0.
|
191 |
"step": 125
|
192 |
},
|
193 |
{
|
194 |
"epoch": 0.77,
|
195 |
-
"grad_norm": 0.
|
196 |
"learning_rate": 3.076579509551703e-05,
|
197 |
-
"loss": 0.
|
198 |
"step": 130
|
199 |
},
|
200 |
{
|
201 |
"epoch": 0.8,
|
202 |
-
"grad_norm": 0.
|
203 |
"learning_rate": 2.3691593180019366e-05,
|
204 |
-
"loss": 0.
|
205 |
"step": 135
|
206 |
},
|
207 |
{
|
208 |
"epoch": 0.83,
|
209 |
-
"grad_norm": 0.
|
210 |
"learning_rate": 1.7431605000344432e-05,
|
211 |
-
"loss": 0.
|
212 |
"step": 140
|
213 |
},
|
214 |
{
|
215 |
"epoch": 0.86,
|
216 |
-
"grad_norm": 0.
|
217 |
"learning_rate": 1.2052624879351104e-05,
|
218 |
-
"loss": 0.
|
219 |
"step": 145
|
220 |
},
|
221 |
{
|
222 |
"epoch": 0.89,
|
223 |
-
"grad_norm": 0.
|
224 |
"learning_rate": 7.612046748871327e-06,
|
225 |
-
"loss": 0.
|
226 |
"step": 150
|
227 |
},
|
228 |
{
|
229 |
"epoch": 0.92,
|
230 |
-
"grad_norm": 0.
|
231 |
"learning_rate": 4.1572517541747294e-06,
|
232 |
-
"loss": 0.
|
233 |
"step": 155
|
234 |
},
|
235 |
{
|
236 |
"epoch": 0.95,
|
237 |
-
"grad_norm": 0.
|
238 |
"learning_rate": 1.725102695264058e-06,
|
239 |
-
"loss": 0.
|
240 |
"step": 160
|
241 |
},
|
242 |
{
|
243 |
"epoch": 0.98,
|
244 |
-
"grad_norm": 0.
|
245 |
"learning_rate": 3.415506993330153e-07,
|
246 |
-
"loss": 0.
|
247 |
"step": 165
|
248 |
},
|
249 |
{
|
250 |
"epoch": 1.0,
|
251 |
-
"eval_loss":
|
252 |
-
"eval_runtime":
|
253 |
-
"eval_samples_per_second": 11.
|
254 |
-
"eval_steps_per_second": 0.
|
255 |
"step": 169
|
256 |
},
|
257 |
{
|
258 |
"epoch": 1.0,
|
259 |
"step": 169,
|
260 |
"total_flos": 2113310740709376.0,
|
261 |
-
"train_loss": 0.
|
262 |
-
"train_runtime":
|
263 |
-
"train_samples_per_second":
|
264 |
-
"train_steps_per_second": 0.
|
265 |
}
|
266 |
],
|
267 |
"logging_steps": 5,
|
|
|
157 |
},
|
158 |
{
|
159 |
"epoch": 0.62,
|
160 |
+
"grad_norm": 0.12880966827007057,
|
161 |
"learning_rate": 7.54514512859201e-05,
|
162 |
+
"loss": 1.0027,
|
163 |
"step": 105
|
164 |
},
|
165 |
{
|
166 |
"epoch": 0.65,
|
167 |
+
"grad_norm": 0.14001392203512125,
|
168 |
"learning_rate": 6.558227696373616e-05,
|
169 |
+
"loss": 0.9444,
|
170 |
"step": 110
|
171 |
},
|
172 |
{
|
173 |
"epoch": 0.68,
|
174 |
+
"grad_norm": 0.12257540700873092,
|
175 |
"learning_rate": 5.608034111526298e-05,
|
176 |
+
"loss": 0.9391,
|
177 |
"step": 115
|
178 |
},
|
179 |
{
|
180 |
"epoch": 0.71,
|
181 |
+
"grad_norm": 0.10258883604180073,
|
182 |
"learning_rate": 4.704702977392914e-05,
|
183 |
+
"loss": 0.9445,
|
184 |
"step": 120
|
185 |
},
|
186 |
{
|
187 |
"epoch": 0.74,
|
188 |
+
"grad_norm": 0.09966688521531793,
|
189 |
"learning_rate": 3.857872873103322e-05,
|
190 |
+
"loss": 0.9297,
|
191 |
"step": 125
|
192 |
},
|
193 |
{
|
194 |
"epoch": 0.77,
|
195 |
+
"grad_norm": 0.1035526819388082,
|
196 |
"learning_rate": 3.076579509551703e-05,
|
197 |
+
"loss": 0.9057,
|
198 |
"step": 130
|
199 |
},
|
200 |
{
|
201 |
"epoch": 0.8,
|
202 |
+
"grad_norm": 0.11002875944900231,
|
203 |
"learning_rate": 2.3691593180019366e-05,
|
204 |
+
"loss": 0.9289,
|
205 |
"step": 135
|
206 |
},
|
207 |
{
|
208 |
"epoch": 0.83,
|
209 |
+
"grad_norm": 0.1027152936441796,
|
210 |
"learning_rate": 1.7431605000344432e-05,
|
211 |
+
"loss": 0.9144,
|
212 |
"step": 140
|
213 |
},
|
214 |
{
|
215 |
"epoch": 0.86,
|
216 |
+
"grad_norm": 0.10255020728392107,
|
217 |
"learning_rate": 1.2052624879351104e-05,
|
218 |
+
"loss": 0.9387,
|
219 |
"step": 145
|
220 |
},
|
221 |
{
|
222 |
"epoch": 0.89,
|
223 |
+
"grad_norm": 0.09518319981765283,
|
224 |
"learning_rate": 7.612046748871327e-06,
|
225 |
+
"loss": 0.91,
|
226 |
"step": 150
|
227 |
},
|
228 |
{
|
229 |
"epoch": 0.92,
|
230 |
+
"grad_norm": 0.09123485986396436,
|
231 |
"learning_rate": 4.1572517541747294e-06,
|
232 |
+
"loss": 0.9117,
|
233 |
"step": 155
|
234 |
},
|
235 |
{
|
236 |
"epoch": 0.95,
|
237 |
+
"grad_norm": 0.08918631551036811,
|
238 |
"learning_rate": 1.725102695264058e-06,
|
239 |
+
"loss": 0.9036,
|
240 |
"step": 160
|
241 |
},
|
242 |
{
|
243 |
"epoch": 0.98,
|
244 |
+
"grad_norm": 0.0885021056473718,
|
245 |
"learning_rate": 3.415506993330153e-07,
|
246 |
+
"loss": 0.9128,
|
247 |
"step": 165
|
248 |
},
|
249 |
{
|
250 |
"epoch": 1.0,
|
251 |
+
"eval_loss": 1.1359375715255737,
|
252 |
+
"eval_runtime": 199.43,
|
253 |
+
"eval_samples_per_second": 11.583,
|
254 |
+
"eval_steps_per_second": 0.727,
|
255 |
"step": 169
|
256 |
},
|
257 |
{
|
258 |
"epoch": 1.0,
|
259 |
"step": 169,
|
260 |
"total_flos": 2113310740709376.0,
|
261 |
+
"train_loss": 0.3776494539701022,
|
262 |
+
"train_runtime": 1381.9799,
|
263 |
+
"train_samples_per_second": 15.625,
|
264 |
+
"train_steps_per_second": 0.122
|
265 |
}
|
266 |
],
|
267 |
"logging_steps": 5,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17c96f7cce4e6c98fe57d5d5516a49498ea830dddda616642b1fe8aab2587109
|
3 |
size 6072
|