Add library name, pipeline tag

#1
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +212 -2
README.md CHANGED
@@ -1,7 +1,9 @@
1
  ---
2
  # For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
3
  # Doc / guide: https://huggingface.co/docs/hub/datasets-cards
4
- {}
 
 
5
  ---
6
 
7
  # CodeI/O: Condensing Reasoning Patterns via Code Input-Output Prediction
@@ -70,4 +72,212 @@ If you find these resources helpful, please kindly cite as:
70
  journal={arXiv preprint arXiv:2502.07316},
71
  year={2025}
72
  }
73
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  # For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
3
  # Doc / guide: https://huggingface.co/docs/hub/datasets-cards
4
+ library_name: transformers
5
+ pipeline_tag: text-generation
6
+ ---
7
  ---
8
 
9
  # CodeI/O: Condensing Reasoning Patterns via Code Input-Output Prediction
 
72
  journal={arXiv preprint arXiv:2502.07316},
73
  year={2025}
74
  }
75
+ ```
76
+
77
+ # File information
78
+
79
+ The repository contains the following file information:
80
+
81
+ Filename: tokenizer_config.json
82
+ Content: {
83
+ "add_bos_token": true,
84
+ "add_eos_token": false,
85
+ "bos_token": {
86
+ "__type": "AddedToken",
87
+ "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
88
+ "lstrip": false,
89
+ "normalized": true,
90
+ "rstrip": false,
91
+ "single_word": false
92
+ },
93
+ "clean_up_tokenization_spaces": false,
94
+ "eos_token": {
95
+ "__type": "AddedToken",
96
+ "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
97
+ "lstrip": false,
98
+ "normalized": true,
99
+ "rstrip": false,
100
+ "single_word": false
101
+ },
102
+ "legacy": true,
103
+ "model_max_length": 16384,
104
+ "pad_token": {
105
+ "__type": "AddedToken",
106
+ "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
107
+ "lstrip": false,
108
+ "normalized": true,
109
+ "rstrip": false,
110
+ "single_word": false
111
+ },
112
+ "sp_model_kwargs": {},
113
+ "unk_token": null,
114
+ "tokenizer_class": "LlamaTokenizerFast",
115
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c>'}}{% endif %}"
116
+ }
117
+
118
+ Filename: generation_config.json
119
+ Content: {
120
+ "_from_model_config": true,
121
+ "bos_token_id": 32000,
122
+ "eos_token_id": 32001,
123
+ "transformers_version": "4.34.1"
124
+ }
125
+
126
+ Filename: config.json
127
+ Content: {
128
+ "architectures": [
129
+ "LlamaForCausalLM"
130
+ ],
131
+ "bos_token_id": 32000,
132
+ "eos_token_id": 32001,
133
+ "hidden_act": "silu",
134
+ "hidden_size": 4096,
135
+ "initializer_range": 0.02,
136
+ "intermediate_size": 14336,
137
+ "max_position_embeddings": 32768,
138
+ "model_type": "llama",
139
+ "num_attention_heads": 32,
140
+ "num_hidden_layers": 32,
141
+ "num_key_value_heads": 8,
142
+ "pretraining_tp": 1,
143
+ "rms_norm_eps": 1e-06,
144
+ "rope_scaling": null,
145
+ "rope_theta": 100000,
146
+ "tie_word_embeddings": false,
147
+ "torch_dtype": "bfloat16",
148
+ "transformers_version": "4.34.1",
149
+ "use_cache": true,
150
+ "vocab_size": 32256
151
+ }
152
+
153
+ Filename: tokenizer.json
154
+ Content: "Content of the file is larger than 50 KB, too long to display."
155
+
156
+ Filename: model.safetensors.index.json
157
+ Content: {
158
+ "metadata": {
159
+ "total_size": 16060522496
160
+ },
161
+ "weight_map": {
162
+ "lm_head.weight": "model-00002-of-00002.safetensors",
163
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
164
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
165
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
166
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
167
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
168
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
169
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
170
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
171
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
172
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
173
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
174
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
175
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
176
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
177
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
178
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
179
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
180
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
181
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
182
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
183
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
184
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
185
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
186
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
187
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
188
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
189
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
190
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
191
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
192
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
193
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
194
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
195
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
196
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
197
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
198
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-000002.safetensors",
199
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
200
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
201
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-000002.safetensors",
202
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
203
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
204
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
205
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
206
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
207
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
208
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
209
+ "model.layers.13.input_layernorm.weight": "model-00001-of-000002.safetensors",
210
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-000002.safetensors",
211
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
212
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
213
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-000002.safetensors",
214
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-000002.safetensors",
215
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-000002.safetensors",
216
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-000002.safetensors",
217
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
218
+ "model.layers.14.input_layernorm.weight": "model-00001-of-000002.safetensors",
219
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-000002.safetensors",
220
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
221
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
222
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-000002.safetensors",
223
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-000002.safetensors",
224
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-000002.safetensors",
225
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-000002.safetensors",
226
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
227
+ "model.layers.15.input_layernorm.weight": "model-00001-of-000002.safetensors",
228
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-000002.safetensors",
229
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
230
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
231
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-000002.safetensors",
232
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-000002.safetensors",
233
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-000002.safetensors",
234
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-000002.safetensors",
235
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
236
+ "model.layers.16.input_layernorm.weight": "model-00001-of-000002.safetensors",
237
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-000002.safetensors",
238
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
239
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
240
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-000002.safetensors",
241
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-000002.safetensors",
242
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-000002.safetensors",
243
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-000002.safetensors",
244
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
245
+ "model.layers.17.input_layernorm.weight": "model-00001-of-000002.safetensors",
246
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-000002.safetensors",
247
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
248
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
249
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-000002.safetensors",
250
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-000002.safetensors",
251
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-000002.safetensors",
252
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-000002.safetensors",
253
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
254
+ "model.layers.18.input_layernorm.weight": "model-00001-of-000002.safetensors",
255
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-000002.safetensors",
256
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
257
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
258
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-000002.safetensors",
259
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-000002.safetensors",
260
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-000002.safetensors",
261
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-000002.safetensors",
262
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
263
+ "model.layers.19.input_layernorm.weight": "model-00001-of-000002.safetensors",
264
+ "model.layers.19.mlp.down_proj.weight": "model-00001-of-000002.safetensors",
265
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
266
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
267
+ "model.layers.19.post_attention_layernorm.weight": "model-00001-of-000002.safetensors",
268
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-000002.safetensors",
269
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-000002.safetensors",
270
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-000002.safetensors",
271
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
272
+ "model.layers.2.input_layernorm.weight": "model-00001-of-000002.safetensors",
273
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-000002.safetensors",
274
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-000002.safetensors",
275
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-000002.safetensors",
276
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-000002.safetensors",
277
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-000002.safetensors",
278
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-000002.safetensors",
279
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-000002.safetensors",
280
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-000002.safetensors",
281
+ "model.layers.20.input_layernorm.weight": "model-00002-of-000002.safetensors",
282
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-000002.safetensors",
283
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-000002.safetensors",