Model save

Files changed (7) hide show

README.md CHANGED Viewed

@@ -1,11 +1,9 @@
 ---
 base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
-datasets: GAIR/LIMO
 library_name: transformers
 model_name: DeepSeek-R1-Distill-Qwen-1.5B-GRPO
 tags:
 - generated_from_trainer
-- open-r1
 - trl
 - grpo
 licence: license
@@ -13,7 +11,7 @@ licence: license
 # Model Card for DeepSeek-R1-Distill-Qwen-1.5B-GRPO
-This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) on the [GAIR/LIMO](https://huggingface.co/datasets/GAIR/LIMO) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chosenqiuch-kaust/huggingface/runs/rgh5vqsi)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

 ---
 base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
 library_name: transformers
 model_name: DeepSeek-R1-Distill-Qwen-1.5B-GRPO
 tags:
 - generated_from_trainer
 - trl
 - grpo
 licence: license
 # Model Card for DeepSeek-R1-Distill-Qwen-1.5B-GRPO
+This model is a fine-tuned version of [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chosenqiuch-kaust/huggingface/runs/8obxcw7x)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.059163046507712674,
-    "train_runtime": 9090.9077,
     "train_samples": 817,
-    "train_samples_per_second": 0.09,
-    "train_steps_per_second": 0.004
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.0,
+    "train_runtime": 3.2357,
     "train_samples": 817,
+    "train_samples_per_second": 252.496,
+    "train_steps_per_second": 10.508
 }

config.json CHANGED Viewed

@@ -23,7 +23,7 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.49.0",
-  "use_cache": true,
   "use_mrope": false,
   "use_sliding_window": false,
   "vocab_size": 151936

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.49.0",
+  "use_cache": false,
   "use_mrope": false,
   "use_sliding_window": false,
   "vocab_size": 151936

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4256422650d141f228fe954acee98679da412984c29a569877eefd3af69315a
-size 11422959

 version https://git-lfs.github.com/spec/v1
+oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
+size 11422778

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.059163046507712674,
-    "train_runtime": 9090.9077,
     "train_samples": 817,
-    "train_samples_per_second": 0.09,
-    "train_steps_per_second": 0.004
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.0,
+    "train_runtime": 3.2357,
     "train_samples": 817,
+    "train_samples_per_second": 252.496,
+    "train_steps_per_second": 10.508
 }

trainer_state.json CHANGED Viewed

@@ -488,10 +488,10 @@
       "epoch": 0.9963369963369964,
       "step": 34,
       "total_flos": 0.0,
-      "train_loss": 0.059163046507712674,
-      "train_runtime": 9090.9077,
-      "train_samples_per_second": 0.09,
-      "train_steps_per_second": 0.004
     }
   ],
   "logging_steps": 1,

       "epoch": 0.9963369963369964,
       "step": 34,
       "total_flos": 0.0,
+      "train_loss": 0.0,
+      "train_runtime": 3.2357,
+      "train_samples_per_second": 252.496,
+      "train_steps_per_second": 10.508
     }
   ],
   "logging_steps": 1,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:057071baadf4d5b35a94cb6b06101c789664be7adcfe3566dc3678d34c865c71
 size 8120

 version https://git-lfs.github.com/spec/v1
+oid sha256:02fe3e274d53b070c1aeba1d1332bc4001aad943be31d3723844ef3d3fe46ba1
 size 8120