Upload 7 files
Browse files- README.md +252 -0
- config.json +26 -0
- generation_config.json +6 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer_config.json +86 -0
README.md
ADDED
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
base_model: Locutusque/TinyMistral-248M-v2.5
|
4 |
+
tags:
|
5 |
+
- generated_from_trainer
|
6 |
+
model-index:
|
7 |
+
- name: TinyMistral-FFT
|
8 |
+
results: []
|
9 |
+
---
|
10 |
+
|
11 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
+
should probably proofread and complete it, then remove this comment. -->
|
13 |
+
|
14 |
+
[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
|
15 |
+
<details><summary>See axolotl config</summary>
|
16 |
+
|
17 |
+
axolotl version: `0.4.0`
|
18 |
+
```yaml
|
19 |
+
base_model: Locutusque/TinyMistral-248M-v2.5
|
20 |
+
model_type: MistralForCausalLM
|
21 |
+
is_mistral_derived_model: true
|
22 |
+
|
23 |
+
load_in_8bit: false
|
24 |
+
load_in_4bit: false
|
25 |
+
strict: false
|
26 |
+
|
27 |
+
dataset_processes: 20
|
28 |
+
|
29 |
+
datasets:
|
30 |
+
- path: epfl-llm/guidelines
|
31 |
+
type: completion
|
32 |
+
field: clean_text
|
33 |
+
- path: JeanKaddour/minipile
|
34 |
+
type: completion
|
35 |
+
field: text
|
36 |
+
|
37 |
+
dataset_prepared_path: TinyMistral-FFT-data
|
38 |
+
val_set_size: 0.001
|
39 |
+
output_dir: ./TinyMistral-FFT
|
40 |
+
|
41 |
+
sequence_len: 2048
|
42 |
+
sample_packing: false
|
43 |
+
pad_to_sequence_len: true
|
44 |
+
|
45 |
+
adapter:
|
46 |
+
lora_model_dir:
|
47 |
+
lora_r:
|
48 |
+
lora_alpha:
|
49 |
+
lora_dropout:
|
50 |
+
lora_target_linear:
|
51 |
+
lora_fan_in_fan_out:
|
52 |
+
|
53 |
+
# wandb configuration
|
54 |
+
wandb_project: TinyMistral-FFT
|
55 |
+
wandb_watch:
|
56 |
+
wandb_run_id:
|
57 |
+
wandb_log_model:
|
58 |
+
|
59 |
+
gradient_accumulation_steps: 2
|
60 |
+
micro_batch_size: 4
|
61 |
+
num_epochs: 1
|
62 |
+
optimizer: paged_adamw_32bit
|
63 |
+
lr_scheduler: constant
|
64 |
+
cosine_min_lr_ratio:
|
65 |
+
|
66 |
+
learning_rate: 0.00005
|
67 |
+
|
68 |
+
train_on_inputs: true
|
69 |
+
group_by_length: false
|
70 |
+
bf16: true
|
71 |
+
fp16: false
|
72 |
+
tf32: false
|
73 |
+
|
74 |
+
gradient_checkpointing: false
|
75 |
+
early_stopping_patience:
|
76 |
+
resume_from_checkpoint:
|
77 |
+
auto_resume_from_checkpoints: True
|
78 |
+
local_rank:
|
79 |
+
logging_steps: 1
|
80 |
+
xformers_attention:
|
81 |
+
flash_attention: true
|
82 |
+
flash_attn_cross_entropy: false
|
83 |
+
flash_attn_rms_norm: true
|
84 |
+
flash_attn_fuse_qkv: false
|
85 |
+
flash_attn_fuse_mlp: true
|
86 |
+
|
87 |
+
warmup_steps: 10
|
88 |
+
evals_per_epoch: 100
|
89 |
+
# eval_steps: 10
|
90 |
+
eval_table_size:
|
91 |
+
saves_per_epoch: 50
|
92 |
+
debug:
|
93 |
+
deepspeed: #deepspeed/zero2.json # multi-gpu only
|
94 |
+
weight_decay: 0
|
95 |
+
|
96 |
+
# tokens:
|
97 |
+
|
98 |
+
|
99 |
+
special_tokens:
|
100 |
+
bos_token: "<|bos|>"
|
101 |
+
eos_token: "<|endoftext|>"
|
102 |
+
unk_token: "<unk>"
|
103 |
+
```
|
104 |
+
|
105 |
+
</details><br>
|
106 |
+
|
107 |
+
# TinyMistral-FFT
|
108 |
+
|
109 |
+
This model is a fine-tuned version of [Locutusque/TinyMistral-248M-v2.5](https://huggingface.co/Locutusque/TinyMistral-248M-v2.5) on the None dataset.
|
110 |
+
It achieves the following results on the evaluation set:
|
111 |
+
- Loss: 2.9626
|
112 |
+
|
113 |
+
## Model description
|
114 |
+
|
115 |
+
More information needed
|
116 |
+
|
117 |
+
## Intended uses & limitations
|
118 |
+
|
119 |
+
More information needed
|
120 |
+
|
121 |
+
## Training and evaluation data
|
122 |
+
|
123 |
+
More information needed
|
124 |
+
|
125 |
+
## Training procedure
|
126 |
+
|
127 |
+
### Training hyperparameters
|
128 |
+
|
129 |
+
The following hyperparameters were used during training:
|
130 |
+
- learning_rate: 5e-05
|
131 |
+
- train_batch_size: 4
|
132 |
+
- eval_batch_size: 4
|
133 |
+
- seed: 42
|
134 |
+
- gradient_accumulation_steps: 2
|
135 |
+
- total_train_batch_size: 8
|
136 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
137 |
+
- lr_scheduler_type: constant
|
138 |
+
- lr_scheduler_warmup_steps: 10
|
139 |
+
- num_epochs: 1
|
140 |
+
|
141 |
+
### Training results
|
142 |
+
|
143 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
144 |
+
|:-------------:|:-----:|:------:|:---------------:|
|
145 |
+
| 4.5414 | 0.0 | 1 | 4.3416 |
|
146 |
+
| 4.4364 | 0.01 | 1973 | 3.6048 |
|
147 |
+
| 3.1588 | 0.02 | 3946 | 3.4869 |
|
148 |
+
| 3.1823 | 0.03 | 5919 | 3.4237 |
|
149 |
+
| 2.975 | 0.04 | 7892 | 3.3813 |
|
150 |
+
| 3.2737 | 0.05 | 9865 | 3.3476 |
|
151 |
+
| 3.7929 | 0.06 | 11838 | 3.3174 |
|
152 |
+
| 3.3775 | 0.07 | 13811 | 3.2947 |
|
153 |
+
| 3.6789 | 0.08 | 15784 | 3.2756 |
|
154 |
+
| 3.4811 | 0.09 | 17757 | 3.2590 |
|
155 |
+
| 3.3961 | 0.1 | 19730 | 3.2406 |
|
156 |
+
| 3.4742 | 0.11 | 21703 | 3.2255 |
|
157 |
+
| 3.5353 | 0.12 | 23676 | 3.2130 |
|
158 |
+
| 2.5729 | 0.13 | 25649 | 3.2018 |
|
159 |
+
| 3.0246 | 0.14 | 27622 | 3.1915 |
|
160 |
+
| 3.5242 | 0.15 | 29595 | 3.1814 |
|
161 |
+
| 2.6597 | 0.16 | 31568 | 3.1728 |
|
162 |
+
| 3.0312 | 0.17 | 33541 | 3.1635 |
|
163 |
+
| 3.2913 | 0.18 | 35514 | 3.1564 |
|
164 |
+
| 2.8945 | 0.19 | 37487 | 3.1487 |
|
165 |
+
| 3.2407 | 0.2 | 39460 | 3.1423 |
|
166 |
+
| 3.076 | 0.21 | 41433 | 3.1358 |
|
167 |
+
| 3.4588 | 0.22 | 43406 | 3.1298 |
|
168 |
+
| 3.1972 | 0.23 | 45379 | 3.1236 |
|
169 |
+
| 2.8544 | 0.24 | 47352 | 3.1182 |
|
170 |
+
| 2.949 | 0.25 | 49325 | 3.1116 |
|
171 |
+
| 3.7614 | 0.26 | 51298 | 3.1078 |
|
172 |
+
| 2.7729 | 0.27 | 53271 | 3.1022 |
|
173 |
+
| 3.371 | 0.28 | 55244 | 3.0972 |
|
174 |
+
| 3.1048 | 0.29 | 57217 | 3.0932 |
|
175 |
+
| 3.0419 | 0.3 | 59190 | 3.0877 |
|
176 |
+
| 3.0947 | 0.31 | 61163 | 3.0821 |
|
177 |
+
| 3.4587 | 0.32 | 63136 | 3.0783 |
|
178 |
+
| 2.8448 | 0.33 | 65109 | 3.0760 |
|
179 |
+
| 3.3145 | 0.34 | 67082 | 3.0711 |
|
180 |
+
| 3.1927 | 0.35 | 69055 | 3.0668 |
|
181 |
+
| 3.3117 | 0.36 | 71028 | 3.0643 |
|
182 |
+
| 3.2579 | 0.37 | 73001 | 3.0613 |
|
183 |
+
| 3.1899 | 0.38 | 74974 | 3.0597 |
|
184 |
+
| 3.0391 | 0.39 | 76947 | 3.0563 |
|
185 |
+
| 2.6476 | 0.4 | 78920 | 3.0542 |
|
186 |
+
| 2.9163 | 0.41 | 80893 | 3.0504 |
|
187 |
+
| 2.4931 | 0.42 | 82866 | 3.0489 |
|
188 |
+
| 3.3614 | 0.43 | 84839 | 3.0451 |
|
189 |
+
| 3.1546 | 0.44 | 86812 | 3.0416 |
|
190 |
+
| 2.8995 | 0.45 | 88785 | 3.0403 |
|
191 |
+
| 2.8657 | 0.46 | 90758 | 3.0370 |
|
192 |
+
| 3.4511 | 0.47 | 92731 | 3.0343 |
|
193 |
+
| 3.2269 | 0.48 | 94704 | 3.0323 |
|
194 |
+
| 2.6914 | 0.49 | 96677 | 3.0302 |
|
195 |
+
| 3.087 | 0.5 | 98650 | 3.0282 |
|
196 |
+
| 3.3036 | 0.51 | 100623 | 3.0266 |
|
197 |
+
| 3.2269 | 0.52 | 102596 | 3.0251 |
|
198 |
+
| 3.1237 | 0.53 | 104569 | 3.0223 |
|
199 |
+
| 2.9733 | 0.54 | 106542 | 3.0197 |
|
200 |
+
| 3.0594 | 0.55 | 108515 | 3.0186 |
|
201 |
+
| 2.9842 | 0.56 | 110488 | 3.0168 |
|
202 |
+
| 3.0986 | 0.57 | 112461 | 3.0158 |
|
203 |
+
| 3.0296 | 0.58 | 114434 | 3.0141 |
|
204 |
+
| 3.0091 | 0.59 | 116407 | 3.0139 |
|
205 |
+
| 2.7111 | 0.6 | 118380 | 3.0107 |
|
206 |
+
| 3.115 | 0.61 | 120353 | 3.0080 |
|
207 |
+
| 3.2585 | 0.62 | 122326 | 3.0063 |
|
208 |
+
| 3.0651 | 0.63 | 124299 | 3.0038 |
|
209 |
+
| 2.965 | 0.64 | 126272 | 3.0035 |
|
210 |
+
| 2.9165 | 0.65 | 128245 | 3.0023 |
|
211 |
+
| 2.8069 | 0.66 | 130218 | 3.0007 |
|
212 |
+
| 2.9818 | 0.67 | 132191 | 2.9995 |
|
213 |
+
| 2.8997 | 0.68 | 134164 | 2.9978 |
|
214 |
+
| 2.948 | 0.69 | 136137 | 2.9966 |
|
215 |
+
| 3.034 | 0.7 | 138110 | 2.9953 |
|
216 |
+
| 3.1774 | 0.71 | 140083 | 2.9936 |
|
217 |
+
| 3.3357 | 0.72 | 142056 | 2.9919 |
|
218 |
+
| 3.2333 | 0.73 | 144029 | 2.9897 |
|
219 |
+
| 3.1183 | 0.74 | 146002 | 2.9889 |
|
220 |
+
| 3.1148 | 0.75 | 147975 | 2.9887 |
|
221 |
+
| 2.8678 | 0.76 | 149948 | 2.9867 |
|
222 |
+
| 2.6597 | 0.77 | 151921 | 2.9850 |
|
223 |
+
| 3.1122 | 0.78 | 153894 | 2.9842 |
|
224 |
+
| 3.1959 | 0.79 | 155867 | 2.9825 |
|
225 |
+
| 2.8623 | 0.8 | 157840 | 2.9808 |
|
226 |
+
| 2.9416 | 0.81 | 159813 | 2.9809 |
|
227 |
+
| 3.0551 | 0.82 | 161786 | 2.9792 |
|
228 |
+
| 2.9538 | 0.83 | 163759 | 2.9777 |
|
229 |
+
| 2.8278 | 0.84 | 165732 | 2.9767 |
|
230 |
+
| 3.4942 | 0.85 | 167705 | 2.9762 |
|
231 |
+
| 2.838 | 0.86 | 169678 | 2.9740 |
|
232 |
+
| 3.0352 | 0.87 | 171651 | 2.9720 |
|
233 |
+
| 2.8865 | 0.88 | 173624 | 2.9724 |
|
234 |
+
| 3.0911 | 0.89 | 175597 | 2.9708 |
|
235 |
+
| 2.8237 | 0.9 | 177570 | 2.9703 |
|
236 |
+
| 2.9927 | 0.91 | 179543 | 2.9695 |
|
237 |
+
| 3.2014 | 0.92 | 181516 | 2.9680 |
|
238 |
+
| 2.3033 | 0.93 | 183489 | 2.9666 |
|
239 |
+
| 2.6264 | 0.94 | 185462 | 2.9668 |
|
240 |
+
| 3.1788 | 0.95 | 187435 | 2.9659 |
|
241 |
+
| 3.066 | 0.96 | 189408 | 2.9645 |
|
242 |
+
| 2.5523 | 0.97 | 191381 | 2.9640 |
|
243 |
+
| 2.4562 | 0.98 | 193354 | 2.9630 |
|
244 |
+
| 3.3801 | 0.99 | 195327 | 2.9626 |
|
245 |
+
|
246 |
+
|
247 |
+
### Framework versions
|
248 |
+
|
249 |
+
- Transformers 4.37.0
|
250 |
+
- Pytorch 2.0.1+cu117
|
251 |
+
- Datasets 2.15.0
|
252 |
+
- Tokenizers 0.15.0
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Locutusque/TinyMistral-248M-v2.5",
|
3 |
+
"architectures": [
|
4 |
+
"MistralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 32000,
|
8 |
+
"eos_token_id": 32001,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 1024,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 4096,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"model_type": "mistral",
|
15 |
+
"num_attention_heads": 32,
|
16 |
+
"num_hidden_layers": 12,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"rms_norm_eps": 1e-06,
|
19 |
+
"rope_theta": 10000.0,
|
20 |
+
"sliding_window": 32,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.37.0",
|
24 |
+
"use_cache": false,
|
25 |
+
"vocab_size": 32005
|
26 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.37.0"
|
6 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ac431a7eddfc599800f3cf4ca1cd4645a251750a3fcd88b0bce347544f0e3da
|
3 |
+
size 596748425
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|bos|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<|endoftext|>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"32000": {
|
30 |
+
"content": "<|bos|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"32001": {
|
38 |
+
"content": "<|endoftext|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"32002": {
|
46 |
+
"content": "[PAD]",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"32003": {
|
54 |
+
"content": "<|ASSISTANT|>",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": false,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"32004": {
|
62 |
+
"content": "<|USER|>",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": false,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
}
|
69 |
+
},
|
70 |
+
"additional_special_tokens": [],
|
71 |
+
"bos_token": "<|bos|>",
|
72 |
+
"clean_up_tokenization_spaces": false,
|
73 |
+
"eos_token": "<|endoftext|>",
|
74 |
+
"legacy": true,
|
75 |
+
"max_length": 1536,
|
76 |
+
"model_max_length": 1000000000000000019884624838656,
|
77 |
+
"pad_token": "<|endoftext|>",
|
78 |
+
"sp_model_kwargs": {},
|
79 |
+
"spaces_between_special_tokens": false,
|
80 |
+
"stride": 0,
|
81 |
+
"tokenizer_class": "LlamaTokenizer",
|
82 |
+
"truncation_side": "right",
|
83 |
+
"truncation_strategy": "longest_first",
|
84 |
+
"unk_token": "<unk>",
|
85 |
+
"use_default_system_prompt": true
|
86 |
+
}
|