Upload folder using huggingface_hub
Browse files- README.md +159 -0
- added_tokens.json +5 -0
- config.json +28 -0
- generation_config.json +8 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +66 -0
README.md
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
4 |
+
tags:
|
5 |
+
- generated_from_trainer
|
6 |
+
model-index:
|
7 |
+
- name: data/llama-1B-20240502-0131
|
8 |
+
results: []
|
9 |
+
---
|
10 |
+
|
11 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
+
should probably proofread and complete it, then remove this comment. -->
|
13 |
+
|
14 |
+
[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
|
15 |
+
<details><summary>See axolotl config</summary>
|
16 |
+
|
17 |
+
axolotl version: `0.4.0`
|
18 |
+
```yaml
|
19 |
+
base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
|
20 |
+
model_type: LlamaForCausalLM
|
21 |
+
tokenizer_type: AutoTokenizer
|
22 |
+
|
23 |
+
load_in_8bit: false
|
24 |
+
load_in_4bit: false
|
25 |
+
strict: false
|
26 |
+
|
27 |
+
datasets:
|
28 |
+
- path: /data/data/final_set_cleaned/train/
|
29 |
+
type: sharegpt
|
30 |
+
conversation: chatml
|
31 |
+
- path: /data/data/map_coig_cqia.jsonl
|
32 |
+
type: sharegpt
|
33 |
+
conversation: chatml
|
34 |
+
- path: /data/data/ruozhiba.jsonl
|
35 |
+
type: sharegpt
|
36 |
+
conversation: chatml
|
37 |
+
dataset_prepared_path: last_run_prepared
|
38 |
+
val_set_size: 0
|
39 |
+
output_dir: ./out
|
40 |
+
|
41 |
+
sequence_len: 4096
|
42 |
+
sample_packing: true
|
43 |
+
pad_to_sequence_len: true
|
44 |
+
|
45 |
+
wandb_project: FFT
|
46 |
+
wandb_entity:
|
47 |
+
wandb_watch:
|
48 |
+
wandb_name: llama3-fft
|
49 |
+
wandb_log_model:
|
50 |
+
|
51 |
+
gradient_accumulation_steps: 8
|
52 |
+
micro_batch_size: 4
|
53 |
+
num_epochs: 2
|
54 |
+
optimizer: paged_adamw_8bit
|
55 |
+
lr_scheduler: cosine
|
56 |
+
learning_rate: 2e-5
|
57 |
+
|
58 |
+
train_on_inputs: false
|
59 |
+
group_by_length: false
|
60 |
+
bf16: auto
|
61 |
+
fp16:
|
62 |
+
tf32: false
|
63 |
+
|
64 |
+
gradient_checkpointing: true
|
65 |
+
gradient_checkpointing_kwargs:
|
66 |
+
use_reentrant: false
|
67 |
+
early_stopping_patience:
|
68 |
+
resume_from_checkpoint:
|
69 |
+
logging_steps: 1
|
70 |
+
xformers_attention:
|
71 |
+
flash_attention: true
|
72 |
+
|
73 |
+
warmup_steps: 100
|
74 |
+
evals_per_epoch: 0
|
75 |
+
eval_table_size:
|
76 |
+
saves_per_epoch: 4
|
77 |
+
save_total_limit: 8
|
78 |
+
debug:
|
79 |
+
deepspeed: deepspeed/zero2.json
|
80 |
+
weight_decay: 0.0
|
81 |
+
fsdp:
|
82 |
+
fsdp_config:
|
83 |
+
default_system_message: "You are a helpful assistant."
|
84 |
+
special_tokens:
|
85 |
+
eos_token: "<|im_end|>"
|
86 |
+
pad_token: "<|end_of_text|>"
|
87 |
+
tokens:
|
88 |
+
- "<|im_start|>"
|
89 |
+
- "<|im_end|>"
|
90 |
+
|
91 |
+
```
|
92 |
+
|
93 |
+
</details><br>
|
94 |
+
|
95 |
+
# llama-1.1B-fft
|
96 |
+
|
97 |
+
This model is a full parameter fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on the SFT dataset for two epoches.
|
98 |
+
|
99 |
+
It has a substantial improvement over several benchmark:
|
100 |
+
|
101 |
+
| Groups |Version|Filter|n-shot|Metric|TinyLlama-1.1B-Chat-v1.0 |Ours
|
102 |
+
|------------------|-------|------|-----:|------|-----:|-----:
|
103 |
+
|mmlu |N/A |none | 0|acc |0.2463|0.3547
|
104 |
+
| - humanities |N/A |none | 0|acc |0.2493|0.3375
|
105 |
+
| - other |N/A |none | 0|acc |0.2588|0.3939
|
106 |
+
| - social_sciences|N/A |none | 0|acc |0.2304|0.3939
|
107 |
+
| - stem |N/A |none | 0|acc |0.2448|0.3035
|
108 |
+
|gsm8k | 3|strict-match | 5|exact_match|0.0243|0.1615|
|
109 |
+
|
110 |
+
strong english chat ability, while still weak on Chinese conversation (may suffer from repeatition).
|
111 |
+
|
112 |
+
```
|
113 |
+
{"instruction": "What are the names of some famous actors that started their careers on Broadway?", "output": "Some famous actors who started their careers on Broadway include:\n\n1. Richard Rodgers\n2. George Abbott\n3. Jerome Robbins\n4. Lin-Manuel Miranda\n5. Stephen Sondheim\n6. Harold Prince\n7. Alfred Uhry\n8. Stephen Schwartz\n9. Joe Mantello\n10. Mike Nichols\n11. Robert Duvall\n12. Susan Sarandon\n13. Martin Katz\n14. Gwyneth Paltrow\n15. Audra McDonald."}
|
114 |
+
{"instruction": "How do I wrap a present neatly?", "output": "There are several ways to wrap a present neatly, depending on the size and type of the present. Here are a few options:\n\n1. Wrap the present in a piece of wrapping paper. This will help to keep the wrapping clean and neat.\n2. Use a piece of tissue paper or a small piece of gift wrap to wrap the present. This will help to keep the wrapping clean and neat.\n3. Use a piece of string or ribbon to tie the present. This will help to keep the wrapping clean and neat.\n4. Use a piece of bubble wrap or packing paper to wrap the present. This will help to keep the wrapping clean and neat.\n5. Use a small piece of gift wrap or a piece of tissue paper to wrap the present. This will help to keep the wrapping clean and neat.\n\nIt's important to keep in mind that the wrapping should be tight and secure, so that the gift is not damaged during transportation."}
|
115 |
+
{"instruction": "我正在设计一款专为老年人设计的人工智能手机,你能帮我写个宣传文案吗?", "output": "设计了一款专为老年人设计的人工智能手机,现在正在积极推广!\n它的功能齐全,智能操控能力极强,无论是拍照、观看视频,还是拍摄照片,都无所疑。\n此前,我们在欧洲设计的一款人工智能手机,很好的满足了老年人的需求,使用起来非常方便。\n此前,我们在美国设计的一款人工智能手机,也很好的满足了老年人的需求,使用起来非常方便。\n现在���我们在世界各地设计的一款人工智能手机,都很好的满足了老年人的需求,使用起来非常方便。\n谁都不愿意老退化,老年人不受孤僻和孤立的痛苦,他们只想要生活在一个人情味的世界里。\n老年人不受孤僻和孤立的痛苦,他们只想要生活在一个人情味的世界里。\n老年人不受孤僻和孤立的痛苦,他们只想要生活在一个人情味的世界里。\n我们欢迎您使用我们发布的人工智能手机,不受孤僻和孤立的痛苦,您的生活将更加美好!"}
|
116 |
+
```
|
117 |
+
|
118 |
+
|
119 |
+
## Model description
|
120 |
+
|
121 |
+
More information needed
|
122 |
+
|
123 |
+
## Intended uses & limitations
|
124 |
+
|
125 |
+
More information needed
|
126 |
+
|
127 |
+
## Training and evaluation data
|
128 |
+
|
129 |
+
More information needed
|
130 |
+
|
131 |
+
## Training procedure
|
132 |
+
|
133 |
+
### Training hyperparameters
|
134 |
+
|
135 |
+
The following hyperparameters were used during training:
|
136 |
+
- learning_rate: 2e-05
|
137 |
+
- train_batch_size: 4
|
138 |
+
- eval_batch_size: 4
|
139 |
+
- seed: 42
|
140 |
+
- distributed_type: multi-GPU
|
141 |
+
- num_devices: 2
|
142 |
+
- gradient_accumulation_steps: 8
|
143 |
+
- total_train_batch_size: 64
|
144 |
+
- total_eval_batch_size: 8
|
145 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
146 |
+
- lr_scheduler_type: cosine
|
147 |
+
- lr_scheduler_warmup_steps: 100
|
148 |
+
- num_epochs: 2
|
149 |
+
|
150 |
+
### Training results
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
### Framework versions
|
155 |
+
|
156 |
+
- Transformers 4.40.1
|
157 |
+
- Pytorch 2.0.1+cu118
|
158 |
+
- Datasets 2.15.0
|
159 |
+
- Tokenizers 0.19.1
|
added_tokens.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|end_of_text|>": 32001,
|
3 |
+
"<|im_end|>": 32000,
|
4 |
+
"<|im_start|>": 32002
|
5 |
+
}
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 32000,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 2048,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 5632,
|
14 |
+
"max_position_embeddings": 4096,
|
15 |
+
"model_type": "llama",
|
16 |
+
"num_attention_heads": 32,
|
17 |
+
"num_hidden_layers": 22,
|
18 |
+
"num_key_value_heads": 4,
|
19 |
+
"pretraining_tp": 1,
|
20 |
+
"rms_norm_eps": 1e-05,
|
21 |
+
"rope_scaling": null,
|
22 |
+
"rope_theta": 10000.0,
|
23 |
+
"tie_word_embeddings": false,
|
24 |
+
"torch_dtype": "bfloat16",
|
25 |
+
"transformers_version": "4.40.1",
|
26 |
+
"use_cache": false,
|
27 |
+
"vocab_size": 32003
|
28 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 1,
|
3 |
+
"do_sample": true,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"max_length": 2048,
|
6 |
+
"pad_token_id": 0,
|
7 |
+
"transformers_version": "4.40.1"
|
8 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e23f4587ee4d027b2604b6a7f71da1085b68a3ca18cc94c918d02cd128f678c3
|
3 |
+
size 2200148479
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|im_end|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<|end_of_text|>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
tokenizer_config.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"32000": {
|
30 |
+
"content": "<|im_end|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": false
|
36 |
+
},
|
37 |
+
"32001": {
|
38 |
+
"content": "<|end_of_text|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"32002": {
|
46 |
+
"content": "<|im_start|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": false
|
52 |
+
}
|
53 |
+
},
|
54 |
+
"bos_token": "<s>",
|
55 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
56 |
+
"clean_up_tokenization_spaces": false,
|
57 |
+
"eos_token": "<|im_end|>",
|
58 |
+
"legacy": false,
|
59 |
+
"model_max_length": 2048,
|
60 |
+
"pad_token": "<|end_of_text|>",
|
61 |
+
"padding_side": "right",
|
62 |
+
"sp_model_kwargs": {},
|
63 |
+
"tokenizer_class": "LlamaTokenizer",
|
64 |
+
"unk_token": "<unk>",
|
65 |
+
"use_default_system_prompt": false
|
66 |
+
}
|