nzy123 commited on
Commit
294a10b
·
verified ·
1 Parent(s): c4ebcb4

Model save

Browse files
README.md CHANGED
@@ -26,7 +26,7 @@ print(output["generated_text"])
26
 
27
  ## Training procedure
28
 
29
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/757885345/huggingface/runs/j2m7tvu0)
30
 
31
 
32
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
@@ -34,7 +34,7 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
34
  ### Framework versions
35
 
36
  - TRL: 0.14.0
37
- - Transformers: 4.48.2
38
  - Pytorch: 2.5.1
39
  - Datasets: 3.2.0
40
  - Tokenizers: 0.21.0
 
26
 
27
  ## Training procedure
28
 
29
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/757885345/huggingface/runs/hqhsm4kq)
30
 
31
 
32
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
34
  ### Framework versions
35
 
36
  - TRL: 0.14.0
37
+ - Transformers: 4.49.0.dev0
38
  - Pytorch: 2.5.1
39
  - Datasets: 3.2.0
40
  - Tokenizers: 0.21.0
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
  "train_loss": 1.862645149230957e-09,
4
- "train_runtime": 21.2071,
5
  "train_samples": 6,
6
- "train_samples_per_second": 0.283,
7
- "train_steps_per_second": 0.047
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
  "train_loss": 1.862645149230957e-09,
4
+ "train_runtime": 10.39,
5
  "train_samples": 6,
6
+ "train_samples_per_second": 0.577,
7
+ "train_steps_per_second": 0.096
8
  }
config.json CHANGED
@@ -19,10 +19,10 @@
19
  "rms_norm_eps": 1e-06,
20
  "rope_scaling": null,
21
  "rope_theta": 1000000.0,
22
- "sliding_window": null,
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.48.2",
26
  "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
 
19
  "rms_norm_eps": 1e-06,
20
  "rope_scaling": null,
21
  "rope_theta": 1000000.0,
22
+ "sliding_window": 32768,
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.49.0.dev0",
26
  "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
generation_config.json CHANGED
@@ -10,5 +10,5 @@
10
  "temperature": 0.7,
11
  "top_k": 20,
12
  "top_p": 0.8,
13
- "transformers_version": "4.48.2"
14
  }
 
10
  "temperature": 0.7,
11
  "top_k": 20,
12
  "top_p": 0.8,
13
+ "transformers_version": "4.49.0.dev0"
14
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:130282af0dfa9fe5840737cc49a0d339d06075f83c5a315c3372c9a0740d0b96
3
- size 988097824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f301d59ea1204790a259d51cce69fe450fc2ebb8d3f7b906f8c4a2dd5c787d8
3
+ size 1260367448
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
  "train_loss": 1.862645149230957e-09,
4
- "train_runtime": 21.2071,
5
  "train_samples": 6,
6
- "train_samples_per_second": 0.283,
7
- "train_steps_per_second": 0.047
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
  "train_loss": 1.862645149230957e-09,
4
+ "train_runtime": 10.39,
5
  "train_samples": 6,
6
+ "train_samples_per_second": 0.577,
7
+ "train_steps_per_second": 0.096
8
  }
trainer_state.json CHANGED
@@ -21,9 +21,9 @@
21
  "step": 1,
22
  "total_flos": 0.0,
23
  "train_loss": 1.862645149230957e-09,
24
- "train_runtime": 21.2071,
25
- "train_samples_per_second": 0.283,
26
- "train_steps_per_second": 0.047
27
  }
28
  ],
29
  "logging_steps": 5,
 
21
  "step": 1,
22
  "total_flos": 0.0,
23
  "train_loss": 1.862645149230957e-09,
24
+ "train_runtime": 10.39,
25
+ "train_samples_per_second": 0.577,
26
+ "train_steps_per_second": 0.096
27
  }
28
  ],
29
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5790cfe1b5120af98ada14eba1bd93b2e2a008dd6cdd1bbde119697a4f17791b
3
  size 7288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd50cb540127364ff247e65aed68c58de505ca2d10bceacd7e53b909ffc3d10
3
  size 7288