Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/fsdp_config.json +4 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/config.json +27 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/generation_config.json +6 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/special_tokens_map.json +24 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer.json +0 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer.model +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer_config.json +43 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/data.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/fsdp_config.json +4 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/config.json +27 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/generation_config.json +6 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/special_tokens_map.json +24 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer.json +0 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer.model +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer_config.json +43 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/data.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/fsdp_config.json +4 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/config.json +27 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/generation_config.json +6 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/special_tokens_map.json +24 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/tokenizer.json +0 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/tokenizer.model +3 -0
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ef32b0629e0b30bbdb397b53597aa2e25a49add874131dea247b0df72bca03a
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79ad9fc8d772a19aa7933ce95b505db7aafe290b1bb87c22e686ae468eb1acb8
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42bcf184d06a6b3332fc8013ee2cb6818ca684ee7020fb39d9ae6df18d941dd8
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e43e3d64c9c0d0222115cce784d417cf35dc1ab705ed140f182f0b6ae38b4de
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/fsdp_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"FSDP_version": 2,
|
| 3 |
+
"world_size": 4
|
| 4 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"OlmoForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"clip_qkv": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 2048,
|
| 16 |
+
"model_type": "olmo",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 16,
|
| 19 |
+
"num_key_value_heads": 16,
|
| 20 |
+
"pad_token_id": 2,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 10000.0,
|
| 23 |
+
"tie_word_embeddings": true,
|
| 24 |
+
"transformers_version": "4.57.3",
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 32000
|
| 27 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": 50279,
|
| 4 |
+
"pad_token_id": 1,
|
| 5 |
+
"transformers_version": "4.57.3"
|
| 6 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer_config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"bos_token": "<s>",
|
| 32 |
+
"clean_up_tokenization_spaces": false,
|
| 33 |
+
"eos_token": "</s>",
|
| 34 |
+
"extra_special_tokens": {},
|
| 35 |
+
"legacy": false,
|
| 36 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 37 |
+
"pad_token": "</s>",
|
| 38 |
+
"padding_side": "right",
|
| 39 |
+
"sp_model_kwargs": {},
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
+
"unk_token": "<unk>",
|
| 42 |
+
"use_default_system_prompt": false
|
| 43 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ad0d73bf0b511601a43fb98d50e184ef58f6595a39a6dda9d60bf0bafa55f18
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c484a4d6bdbe165301a37aaf103796e594253163a8d6c73e264b745f6358a4b0
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97ad6fe76f8d9e2c075745f98222747d4981e0f6513b452ba9c21eeda3a0d5f5
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03d58068b1376ec4bb52629bd2a650c7ad7bf7f2ba6def2b4e2855c6f6e20272
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dcf647cb8a2c50abfaabac25f6bcab8b51a41778b303ed53db88b00702077cb
|
| 3 |
+
size 2278786746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c428ec33be1506e0721570d249843d7cb1d4347851e1e5cf861a880d9db5f6d7
|
| 3 |
+
size 2278786746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bea523b93ff86f87515cd5c25ba37a055d1d686c5945fa781b2f918d8f153570
|
| 3 |
+
size 2278786746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5c5e67afe258967e898d5659ec89192545addd4192befb3116f776fa70fcb6f
|
| 3 |
+
size 2278786746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:427b58a85ba4be279e72ef011f1bd23c3f5a5bc8e30a64b5d6c103b57ea59d8a
|
| 3 |
+
size 1492
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a45820130574fe7834cd0517c241dbd9002dcf31d01e0f29883a9e6e4d7e1661
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6da9f947eaf434f2cd11e52af4256ecd9315c0397ca6e6f86bc04f67ae2f8be5
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f887a4ac969b3cd71ec62da615bae59d890bb43f740c76ee80dff718ee001d4
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2479c6e7450c04ed7b87d52c879629d33fe5e56cc1d8fff60d63b933c172d150
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/fsdp_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"FSDP_version": 2,
|
| 3 |
+
"world_size": 4
|
| 4 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"OlmoForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"clip_qkv": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 2048,
|
| 16 |
+
"model_type": "olmo",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 16,
|
| 19 |
+
"num_key_value_heads": 16,
|
| 20 |
+
"pad_token_id": 2,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 10000.0,
|
| 23 |
+
"tie_word_embeddings": true,
|
| 24 |
+
"transformers_version": "4.57.3",
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 32000
|
| 27 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": 50279,
|
| 4 |
+
"pad_token_id": 1,
|
| 5 |
+
"transformers_version": "4.57.3"
|
| 6 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer_config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"bos_token": "<s>",
|
| 32 |
+
"clean_up_tokenization_spaces": false,
|
| 33 |
+
"eos_token": "</s>",
|
| 34 |
+
"extra_special_tokens": {},
|
| 35 |
+
"legacy": false,
|
| 36 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 37 |
+
"pad_token": "</s>",
|
| 38 |
+
"padding_side": "right",
|
| 39 |
+
"sp_model_kwargs": {},
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
+
"unk_token": "<unk>",
|
| 42 |
+
"use_default_system_prompt": false
|
| 43 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6ec41a699da9f76949e01a3e1b883a70ec1250ae6659eaa31cf427af0fc3cfc
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:603e588223f39b3d6ad4781bc515b68c56a2c354df0c3cb3b7b009c90b6512bb
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9854ab6bb52b47c7f0386d54f7c0249730ed24050aef7ce7c3ea8400f3ec0aa7
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8f4cea1ec7e384eac774c872d7f301458849e2f9d6c3530279c46f300338f0f
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d75a88ca52377e3de1df4601642c7ad2c4c2166dfa3e42832fdba87c413450c
|
| 3 |
+
size 2278786746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a41b07e52ee80f3b3184d9cca7bf3f9076bc8cce6fb34cdbf5f85ba44979d616
|
| 3 |
+
size 2278786746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99b95ca64220f240fca6e8540f0a1bc79e6f0c883bc813823ae5f02aeedba69a
|
| 3 |
+
size 2278786746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc6a04b89daa83b0d642f29950d28d63aa6165386145a2b001fe6057fad8181d
|
| 3 |
+
size 2278786746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9c59089524c193d50f366c2c4ea80593f2c1ce90bdafe2244cc45c67b9f4bc9
|
| 3 |
+
size 1492
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f47cf8f463fadbf107624a37f9a76395da4b400a4e629bd60e58c241911a9f4
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f382d5ff96d18e30fa84172caa7cfa5bd962986c132059e65bde49939632c806
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94f6b3b6643a66656d1108b285a2c589a53094c3a8e621379bf30da489212bf0
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cccb1f5dc44de358675f5f2de593c756271a623ee55352fc1cf13e5d3a164593
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/fsdp_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"FSDP_version": 2,
|
| 3 |
+
"world_size": 4
|
| 4 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"OlmoForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"clip_qkv": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 2048,
|
| 16 |
+
"model_type": "olmo",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 16,
|
| 19 |
+
"num_key_value_heads": 16,
|
| 20 |
+
"pad_token_id": 2,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 10000.0,
|
| 23 |
+
"tie_word_embeddings": true,
|
| 24 |
+
"transformers_version": "4.57.3",
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 32000
|
| 27 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": 50279,
|
| 4 |
+
"pad_token_id": 1,
|
| 5 |
+
"transformers_version": "4.57.3"
|
| 6 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|