Model save

Browse files

Files changed (7) hide show

README.md +57 -0
all_results.json +8 -0
generation_config.json +14 -0
model.safetensors +1 -1
train_results.json +8 -0
trainer_state.json +1302 -0
training_args.bin +1 -1

README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+---
+library_name: transformers
+model_name: Qwen2.5-1.5B-Open-R1-Distill
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for Qwen2.5-1.5B-Open-R1-Distill
+This model is a fine-tuned version of [None](https://huggingface.co/None).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="Mumamonster/Qwen2.5-1.5B-Open-R1-Distill", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chi-zhang-working/huggingface/runs/177axnqk)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.16.0.dev0
+- Transformers: 4.49.0
+- Pytorch: 2.5.1
+- Datasets: 3.3.2
+- Tokenizers: 0.21.0
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 102527278252032.0,
+    "train_loss": 0.0,
+    "train_runtime": 1.3976,
+    "train_samples": 24772,
+    "train_samples_per_second": 5132.403,
+    "train_steps_per_second": 80.138
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.1,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.49.0"
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f50d7444bbe6e6adc26e3b1a8530c2e302f11d433df36327e64c7bfb3fab840
 size 3087467144

 version https://git-lfs.github.com/spec/v1
+oid sha256:959b7363530b70aed7aa54030fd31d9881cf14fb2bcff5e7c2bd785e113a5629
 size 3087467144

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 102527278252032.0,
+    "train_loss": 0.0,
+    "train_runtime": 1.3976,
+    "train_samples": 24772,
+    "train_samples_per_second": 5132.403,
+    "train_steps_per_second": 80.138
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1302 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.21015761821366025,
+  "eval_steps": 500,
+  "global_step": 900,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0011675423234092236,
+      "grad_norm": 2.5127346776517427,
+      "learning_rate": 1.1627906976744186e-06,
+      "loss": 0.9966,
+      "step": 5
+    },
+    {
+      "epoch": 0.002335084646818447,
+      "grad_norm": 2.09708226442615,
+      "learning_rate": 2.325581395348837e-06,
+      "loss": 0.9925,
+      "step": 10
+    },
+    {
+      "epoch": 0.0035026269702276708,
+      "grad_norm": 1.4734405270533653,
+      "learning_rate": 3.488372093023256e-06,
+      "loss": 0.9868,
+      "step": 15
+    },
+    {
+      "epoch": 0.004670169293636894,
+      "grad_norm": 1.3838947335985932,
+      "learning_rate": 4.651162790697674e-06,
+      "loss": 0.9573,
+      "step": 20
+    },
+    {
+      "epoch": 0.005837711617046118,
+      "grad_norm": 0.9249234118632634,
+      "learning_rate": 5.8139534883720935e-06,
+      "loss": 0.8709,
+      "step": 25
+    },
+    {
+      "epoch": 0.0070052539404553416,
+      "grad_norm": 0.8820525269459344,
+      "learning_rate": 6.976744186046512e-06,
+      "loss": 0.8603,
+      "step": 30
+    },
+    {
+      "epoch": 0.008172796263864565,
+      "grad_norm": 0.6997651909534601,
+      "learning_rate": 8.139534883720931e-06,
+      "loss": 0.8138,
+      "step": 35
+    },
+    {
+      "epoch": 0.009340338587273789,
+      "grad_norm": 0.6385035962555277,
+      "learning_rate": 9.302325581395349e-06,
+      "loss": 0.8144,
+      "step": 40
+    },
+    {
+      "epoch": 0.010507880910683012,
+      "grad_norm": 0.6459756376057639,
+      "learning_rate": 1.0465116279069768e-05,
+      "loss": 0.799,
+      "step": 45
+    },
+    {
+      "epoch": 0.011675423234092236,
+      "grad_norm": 0.6011999115316619,
+      "learning_rate": 1.1627906976744187e-05,
+      "loss": 0.7696,
+      "step": 50
+    },
+    {
+      "epoch": 0.01284296555750146,
+      "grad_norm": 0.5942896008568019,
+      "learning_rate": 1.2790697674418606e-05,
+      "loss": 0.779,
+      "step": 55
+    },
+    {
+      "epoch": 0.014010507880910683,
+      "grad_norm": 0.6142073839262744,
+      "learning_rate": 1.3953488372093024e-05,
+      "loss": 0.7534,
+      "step": 60
+    },
+    {
+      "epoch": 0.015178050204319907,
+      "grad_norm": 0.5945400236546515,
+      "learning_rate": 1.5116279069767441e-05,
+      "loss": 0.7705,
+      "step": 65
+    },
+    {
+      "epoch": 0.01634559252772913,
+      "grad_norm": 0.6213344800847622,
+      "learning_rate": 1.6279069767441862e-05,
+      "loss": 0.7453,
+      "step": 70
+    },
+    {
+      "epoch": 0.017513134851138354,
+      "grad_norm": 0.6234527297129722,
+      "learning_rate": 1.744186046511628e-05,
+      "loss": 0.7345,
+      "step": 75
+    },
+    {
+      "epoch": 0.018680677174547577,
+      "grad_norm": 0.7664963436446808,
+      "learning_rate": 1.8604651162790697e-05,
+      "loss": 0.7426,
+      "step": 80
+    },
+    {
+      "epoch": 0.0198482194979568,
+      "grad_norm": 0.6386219561082004,
+      "learning_rate": 1.9767441860465116e-05,
+      "loss": 0.7312,
+      "step": 85
+    },
+    {
+      "epoch": 0.021015761821366025,
+      "grad_norm": 0.644937218393417,
+      "learning_rate": 2.0930232558139536e-05,
+      "loss": 0.7385,
+      "step": 90
+    },
+    {
+      "epoch": 0.022183304144775248,
+      "grad_norm": 0.6754323077367341,
+      "learning_rate": 2.2093023255813955e-05,
+      "loss": 0.7269,
+      "step": 95
+    },
+    {
+      "epoch": 0.023350846468184472,
+      "grad_norm": 0.6697056123087642,
+      "learning_rate": 2.3255813953488374e-05,
+      "loss": 0.7363,
+      "step": 100
+    },
+    {
+      "epoch": 0.024518388791593695,
+      "grad_norm": 0.6741873152820539,
+      "learning_rate": 2.441860465116279e-05,
+      "loss": 0.7225,
+      "step": 105
+    },
+    {
+      "epoch": 0.02568593111500292,
+      "grad_norm": 0.6792595483910896,
+      "learning_rate": 2.5581395348837212e-05,
+      "loss": 0.7247,
+      "step": 110
+    },
+    {
+      "epoch": 0.026853473438412143,
+      "grad_norm": 0.6690512509599283,
+      "learning_rate": 2.674418604651163e-05,
+      "loss": 0.7293,
+      "step": 115
+    },
+    {
+      "epoch": 0.028021015761821366,
+      "grad_norm": 0.6131690372558509,
+      "learning_rate": 2.7906976744186048e-05,
+      "loss": 0.6901,
+      "step": 120
+    },
+    {
+      "epoch": 0.02918855808523059,
+      "grad_norm": 0.6430555073765273,
+      "learning_rate": 2.9069767441860467e-05,
+      "loss": 0.7332,
+      "step": 125
+    },
+    {
+      "epoch": 0.030356100408639813,
+      "grad_norm": 0.6981936370986412,
+      "learning_rate": 3.0232558139534883e-05,
+      "loss": 0.712,
+      "step": 130
+    },
+    {
+      "epoch": 0.03152364273204904,
+      "grad_norm": 0.627939736457028,
+      "learning_rate": 3.13953488372093e-05,
+      "loss": 0.6979,
+      "step": 135
+    },
+    {
+      "epoch": 0.03269118505545826,
+      "grad_norm": 0.7456802010945889,
+      "learning_rate": 3.2558139534883724e-05,
+      "loss": 0.7307,
+      "step": 140
+    },
+    {
+      "epoch": 0.033858727378867484,
+      "grad_norm": 0.6734236441922247,
+      "learning_rate": 3.372093023255814e-05,
+      "loss": 0.7355,
+      "step": 145
+    },
+    {
+      "epoch": 0.03502626970227671,
+      "grad_norm": 0.9148944262002179,
+      "learning_rate": 3.488372093023256e-05,
+      "loss": 0.7157,
+      "step": 150
+    },
+    {
+      "epoch": 0.03619381202568593,
+      "grad_norm": 0.7021510205127233,
+      "learning_rate": 3.604651162790698e-05,
+      "loss": 0.7202,
+      "step": 155
+    },
+    {
+      "epoch": 0.037361354349095155,
+      "grad_norm": 0.9136518568600357,
+      "learning_rate": 3.7209302325581394e-05,
+      "loss": 0.7184,
+      "step": 160
+    },
+    {
+      "epoch": 0.03852889667250438,
+      "grad_norm": 0.7359457550212143,
+      "learning_rate": 3.837209302325582e-05,
+      "loss": 0.7091,
+      "step": 165
+    },
+    {
+      "epoch": 0.0396964389959136,
+      "grad_norm": 0.7309833656506266,
+      "learning_rate": 3.953488372093023e-05,
+      "loss": 0.7154,
+      "step": 170
+    },
+    {
+      "epoch": 0.040863981319322826,
+      "grad_norm": 0.8118267428281774,
+      "learning_rate": 4.0697674418604655e-05,
+      "loss": 0.7397,
+      "step": 175
+    },
+    {
+      "epoch": 0.04203152364273205,
+      "grad_norm": 0.8385733048051603,
+      "learning_rate": 4.186046511627907e-05,
+      "loss": 0.7373,
+      "step": 180
+    },
+    {
+      "epoch": 0.04319906596614127,
+      "grad_norm": 0.7641709928876936,
+      "learning_rate": 4.302325581395349e-05,
+      "loss": 0.7167,
+      "step": 185
+    },
+    {
+      "epoch": 0.044366608289550497,
+      "grad_norm": 0.8375032184986964,
+      "learning_rate": 4.418604651162791e-05,
+      "loss": 0.7157,
+      "step": 190
+    },
+    {
+      "epoch": 0.04553415061295972,
+      "grad_norm": 0.9064930434497387,
+      "learning_rate": 4.5348837209302326e-05,
+      "loss": 0.7246,
+      "step": 195
+    },
+    {
+      "epoch": 0.046701692936368944,
+      "grad_norm": 0.7695848116875716,
+      "learning_rate": 4.651162790697675e-05,
+      "loss": 0.7156,
+      "step": 200
+    },
+    {
+      "epoch": 0.04786923525977817,
+      "grad_norm": 0.733983040623003,
+      "learning_rate": 4.7674418604651164e-05,
+      "loss": 0.7019,
+      "step": 205
+    },
+    {
+      "epoch": 0.04903677758318739,
+      "grad_norm": 0.8310748449815144,
+      "learning_rate": 4.883720930232558e-05,
+      "loss": 0.7053,
+      "step": 210
+    },
+    {
+      "epoch": 0.050204319906596614,
+      "grad_norm": 0.7138124934818736,
+      "learning_rate": 5e-05,
+      "loss": 0.7371,
+      "step": 215
+    },
+    {
+      "epoch": 0.05137186223000584,
+      "grad_norm": 0.7373207758866321,
+      "learning_rate": 4.9999832180125564e-05,
+      "loss": 0.7208,
+      "step": 220
+    },
+    {
+      "epoch": 0.05253940455341506,
+      "grad_norm": 0.61763988667967,
+      "learning_rate": 4.999932872300567e-05,
+      "loss": 0.7209,
+      "step": 225
+    },
+    {
+      "epoch": 0.053706946876824285,
+      "grad_norm": 0.7958325034766942,
+      "learning_rate": 4.9998489636150545e-05,
+      "loss": 0.7197,
+      "step": 230
+    },
+    {
+      "epoch": 0.05487448920023351,
+      "grad_norm": 0.7579619991421102,
+      "learning_rate": 4.999731493207714e-05,
+      "loss": 0.7138,
+      "step": 235
+    },
+    {
+      "epoch": 0.05604203152364273,
+      "grad_norm": 0.7625510966416409,
+      "learning_rate": 4.999580462830887e-05,
+      "loss": 0.709,
+      "step": 240
+    },
+    {
+      "epoch": 0.057209573847051956,
+      "grad_norm": 0.5824690773593582,
+      "learning_rate": 4.999395874737543e-05,
+      "loss": 0.7058,
+      "step": 245
+    },
+    {
+      "epoch": 0.05837711617046118,
+      "grad_norm": 0.6333769528651304,
+      "learning_rate": 4.9991777316812435e-05,
+      "loss": 0.7122,
+      "step": 250
+    },
+    {
+      "epoch": 0.0595446584938704,
+      "grad_norm": 0.6865471951313481,
+      "learning_rate": 4.998926036916096e-05,
+      "loss": 0.7074,
+      "step": 255
+    },
+    {
+      "epoch": 0.06071220081727963,
+      "grad_norm": 0.6491742841354096,
+      "learning_rate": 4.9986407941967145e-05,
+      "loss": 0.7047,
+      "step": 260
+    },
+    {
+      "epoch": 0.06187974314068885,
+      "grad_norm": 0.6711749887007579,
+      "learning_rate": 4.998322007778156e-05,
+      "loss": 0.7147,
+      "step": 265
+    },
+    {
+      "epoch": 0.06304728546409807,
+      "grad_norm": 0.8465861812583904,
+      "learning_rate": 4.9979696824158613e-05,
+      "loss": 0.6877,
+      "step": 270
+    },
+    {
+      "epoch": 0.0642148277875073,
+      "grad_norm": 0.71297093375199,
+      "learning_rate": 4.997583823365579e-05,
+      "loss": 0.6992,
+      "step": 275
+    },
+    {
+      "epoch": 0.06538237011091652,
+      "grad_norm": 0.6545203939783993,
+      "learning_rate": 4.997164436383294e-05,
+      "loss": 0.7078,
+      "step": 280
+    },
+    {
+      "epoch": 0.06654991243432574,
+      "grad_norm": 0.6519819497594413,
+      "learning_rate": 4.996711527725137e-05,
+      "loss": 0.7107,
+      "step": 285
+    },
+    {
+      "epoch": 0.06771745475773497,
+      "grad_norm": 0.6993623377981596,
+      "learning_rate": 4.9962251041472936e-05,
+      "loss": 0.7208,
+      "step": 290
+    },
+    {
+      "epoch": 0.06888499708114419,
+      "grad_norm": 0.648613406568837,
+      "learning_rate": 4.9957051729058994e-05,
+      "loss": 0.6879,
+      "step": 295
+    },
+    {
+      "epoch": 0.07005253940455342,
+      "grad_norm": 0.7127685542022537,
+      "learning_rate": 4.9951517417569365e-05,
+      "loss": 0.7166,
+      "step": 300
+    },
+    {
+      "epoch": 0.07122008172796264,
+      "grad_norm": 0.706094493069307,
+      "learning_rate": 4.994564818956116e-05,
+      "loss": 0.6857,
+      "step": 305
+    },
+    {
+      "epoch": 0.07238762405137186,
+      "grad_norm": 0.6178576381979423,
+      "learning_rate": 4.993944413258755e-05,
+      "loss": 0.7068,
+      "step": 310
+    },
+    {
+      "epoch": 0.07355516637478109,
+      "grad_norm": 0.5982448161453142,
+      "learning_rate": 4.993290533919644e-05,
+      "loss": 0.7154,
+      "step": 315
+    },
+    {
+      "epoch": 0.07472270869819031,
+      "grad_norm": 0.6270320614638317,
+      "learning_rate": 4.9926031906929114e-05,
+      "loss": 0.704,
+      "step": 320
+    },
+    {
+      "epoch": 0.07589025102159953,
+      "grad_norm": 0.6197640246960802,
+      "learning_rate": 4.9918823938318796e-05,
+      "loss": 0.7016,
+      "step": 325
+    },
+    {
+      "epoch": 0.07705779334500876,
+      "grad_norm": 0.7359756470563305,
+      "learning_rate": 4.991128154088906e-05,
+      "loss": 0.7013,
+      "step": 330
+    },
+    {
+      "epoch": 0.07822533566841798,
+      "grad_norm": 0.6720218756243642,
+      "learning_rate": 4.990340482715228e-05,
+      "loss": 0.6996,
+      "step": 335
+    },
+    {
+      "epoch": 0.0793928779918272,
+      "grad_norm": 0.5925386151271446,
+      "learning_rate": 4.989519391460794e-05,
+      "loss": 0.7207,
+      "step": 340
+    },
+    {
+      "epoch": 0.08056042031523643,
+      "grad_norm": 0.5902543605954019,
+      "learning_rate": 4.988664892574086e-05,
+      "loss": 0.7067,
+      "step": 345
+    },
+    {
+      "epoch": 0.08172796263864565,
+      "grad_norm": 0.6222759073596537,
+      "learning_rate": 4.987776998801939e-05,
+      "loss": 0.6951,
+      "step": 350
+    },
+    {
+      "epoch": 0.08289550496205488,
+      "grad_norm": 0.6101124606146439,
+      "learning_rate": 4.986855723389351e-05,
+      "loss": 0.6834,
+      "step": 355
+    },
+    {
+      "epoch": 0.0840630472854641,
+      "grad_norm": 0.5521729567786234,
+      "learning_rate": 4.9859010800792855e-05,
+      "loss": 0.7058,
+      "step": 360
+    },
+    {
+      "epoch": 0.08523058960887332,
+      "grad_norm": 0.5997828983186825,
+      "learning_rate": 4.984913083112462e-05,
+      "loss": 0.7055,
+      "step": 365
+    },
+    {
+      "epoch": 0.08639813193228255,
+      "grad_norm": 0.685636903832874,
+      "learning_rate": 4.9838917472271495e-05,
+      "loss": 0.697,
+      "step": 370
+    },
+    {
+      "epoch": 0.08756567425569177,
+      "grad_norm": 0.5899607924142912,
+      "learning_rate": 4.982837087658947e-05,
+      "loss": 0.6891,
+      "step": 375
+    },
+    {
+      "epoch": 0.08873321657910099,
+      "grad_norm": 0.5832999949555281,
+      "learning_rate": 4.981749120140547e-05,
+      "loss": 0.6916,
+      "step": 380
+    },
+    {
+      "epoch": 0.08990075890251022,
+      "grad_norm": 0.5501342674246906,
+      "learning_rate": 4.980627860901516e-05,
+      "loss": 0.692,
+      "step": 385
+    },
+    {
+      "epoch": 0.09106830122591944,
+      "grad_norm": 0.6156319618368346,
+      "learning_rate": 4.9794733266680364e-05,
+      "loss": 0.6835,
+      "step": 390
+    },
+    {
+      "epoch": 0.09223584354932866,
+      "grad_norm": 0.6648904756503935,
+      "learning_rate": 4.978285534662669e-05,
+      "loss": 0.7115,
+      "step": 395
+    },
+    {
+      "epoch": 0.09340338587273789,
+      "grad_norm": 0.750574007629335,
+      "learning_rate": 4.977064502604089e-05,
+      "loss": 0.6874,
+      "step": 400
+    },
+    {
+      "epoch": 0.09457092819614711,
+      "grad_norm": 0.6282717605845722,
+      "learning_rate": 4.975810248706824e-05,
+      "loss": 0.6929,
+      "step": 405
+    },
+    {
+      "epoch": 0.09573847051955633,
+      "grad_norm": 0.559968165186596,
+      "learning_rate": 4.974522791680985e-05,
+      "loss": 0.6937,
+      "step": 410
+    },
+    {
+      "epoch": 0.09690601284296556,
+      "grad_norm": 0.5712581921517633,
+      "learning_rate": 4.9732021507319814e-05,
+      "loss": 0.6763,
+      "step": 415
+    },
+    {
+      "epoch": 0.09807355516637478,
+      "grad_norm": 0.5972119499481222,
+      "learning_rate": 4.971848345560243e-05,
+      "loss": 0.6847,
+      "step": 420
+    },
+    {
+      "epoch": 0.099241097489784,
+      "grad_norm": 0.5576942154957365,
+      "learning_rate": 4.970461396360914e-05,
+      "loss": 0.6831,
+      "step": 425
+    },
+    {
+      "epoch": 0.10040863981319323,
+      "grad_norm": 0.5712258586876164,
+      "learning_rate": 4.969041323823565e-05,
+      "loss": 0.6986,
+      "step": 430
+    },
+    {
+      "epoch": 0.10157618213660245,
+      "grad_norm": 0.5660135736483292,
+      "learning_rate": 4.9675881491318735e-05,
+      "loss": 0.6938,
+      "step": 435
+    },
+    {
+      "epoch": 0.10274372446001168,
+      "grad_norm": 0.6202667863714418,
+      "learning_rate": 4.966101893963317e-05,
+      "loss": 0.6741,
+      "step": 440
+    },
+    {
+      "epoch": 0.1039112667834209,
+      "grad_norm": 0.6530868448205541,
+      "learning_rate": 4.9645825804888416e-05,
+      "loss": 0.6818,
+      "step": 445
+    },
+    {
+      "epoch": 0.10507880910683012,
+      "grad_norm": 0.6777514742609368,
+      "learning_rate": 4.9630302313725354e-05,
+      "loss": 0.6804,
+      "step": 450
+    },
+    {
+      "epoch": 0.10624635143023935,
+      "grad_norm": 0.6571600249956766,
+      "learning_rate": 4.96144486977129e-05,
+      "loss": 0.6892,
+      "step": 455
+    },
+    {
+      "epoch": 0.10741389375364857,
+      "grad_norm": 0.652600788102656,
+      "learning_rate": 4.959826519334456e-05,
+      "loss": 0.6746,
+      "step": 460
+    },
+    {
+      "epoch": 0.1085814360770578,
+      "grad_norm": 0.6141340591754272,
+      "learning_rate": 4.958175204203488e-05,
+      "loss": 0.6802,
+      "step": 465
+    },
+    {
+      "epoch": 0.10974897840046702,
+      "grad_norm": 0.6455039458573127,
+      "learning_rate": 4.9564909490115864e-05,
+      "loss": 0.6789,
+      "step": 470
+    },
+    {
+      "epoch": 0.11091652072387624,
+      "grad_norm": 0.5658472639112537,
+      "learning_rate": 4.9547737788833274e-05,
+      "loss": 0.6946,
+      "step": 475
+    },
+    {
+      "epoch": 0.11208406304728546,
+      "grad_norm": 0.5656645798879412,
+      "learning_rate": 4.953023719434292e-05,
+      "loss": 0.6801,
+      "step": 480
+    },
+    {
+      "epoch": 0.11325160537069469,
+      "grad_norm": 0.6339701712723772,
+      "learning_rate": 4.95124079677068e-05,
+      "loss": 0.684,
+      "step": 485
+    },
+    {
+      "epoch": 0.11441914769410391,
+      "grad_norm": 0.549060506810349,
+      "learning_rate": 4.9494250374889235e-05,
+      "loss": 0.6678,
+      "step": 490
+    },
+    {
+      "epoch": 0.11558669001751314,
+      "grad_norm": 0.6950518638333417,
+      "learning_rate": 4.947576468675289e-05,
+      "loss": 0.6692,
+      "step": 495
+    },
+    {
+      "epoch": 0.11675423234092236,
+      "grad_norm": 0.5221510459831169,
+      "learning_rate": 4.9456951179054725e-05,
+      "loss": 0.6753,
+      "step": 500
+    },
+    {
+      "epoch": 0.11792177466433158,
+      "grad_norm": 0.5546382592565675,
+      "learning_rate": 4.94378101324419e-05,
+      "loss": 0.6854,
+      "step": 505
+    },
+    {
+      "epoch": 0.1190893169877408,
+      "grad_norm": 0.5713482480639444,
+      "learning_rate": 4.9418341832447575e-05,
+      "loss": 0.6807,
+      "step": 510
+    },
+    {
+      "epoch": 0.12025685931115003,
+      "grad_norm": 0.5414388299943901,
+      "learning_rate": 4.939854656948665e-05,
+      "loss": 0.673,
+      "step": 515
+    },
+    {
+      "epoch": 0.12142440163455925,
+      "grad_norm": 0.594323665364383,
+      "learning_rate": 4.937842463885143e-05,
+      "loss": 0.6883,
+      "step": 520
+    },
+    {
+      "epoch": 0.12259194395796848,
+      "grad_norm": 0.553478548766972,
+      "learning_rate": 4.935797634070726e-05,
+      "loss": 0.7048,
+      "step": 525
+    },
+    {
+      "epoch": 0.1237594862813777,
+      "grad_norm": 0.5605780800682596,
+      "learning_rate": 4.933720198008798e-05,
+      "loss": 0.6877,
+      "step": 530
+    },
+    {
+      "epoch": 0.12492702860478692,
+      "grad_norm": 0.5870470112417832,
+      "learning_rate": 4.9316101866891414e-05,
+      "loss": 0.6835,
+      "step": 535
+    },
+    {
+      "epoch": 0.12609457092819615,
+      "grad_norm": 0.6417809835924543,
+      "learning_rate": 4.9294676315874756e-05,
+      "loss": 0.67,
+      "step": 540
+    },
+    {
+      "epoch": 0.12726211325160536,
+      "grad_norm": 0.5733147322116063,
+      "learning_rate": 4.927292564664985e-05,
+      "loss": 0.6632,
+      "step": 545
+    },
+    {
+      "epoch": 0.1284296555750146,
+      "grad_norm": 0.5862624837218928,
+      "learning_rate": 4.925085018367844e-05,
+      "loss": 0.6708,
+      "step": 550
+    },
+    {
+      "epoch": 0.1295971978984238,
+      "grad_norm": 0.5974113571603092,
+      "learning_rate": 4.922845025626732e-05,
+      "loss": 0.6835,
+      "step": 555
+    },
+    {
+      "epoch": 0.13076474022183304,
+      "grad_norm": 0.6542521321417366,
+      "learning_rate": 4.9205726198563415e-05,
+      "loss": 0.68,
+      "step": 560
+    },
+    {
+      "epoch": 0.13193228254524225,
+      "grad_norm": 0.599831408114888,
+      "learning_rate": 4.918267834954882e-05,
+      "loss": 0.6873,
+      "step": 565
+    },
+    {
+      "epoch": 0.1330998248686515,
+      "grad_norm": 0.7202647495555032,
+      "learning_rate": 4.915930705303572e-05,
+      "loss": 0.6896,
+      "step": 570
+    },
+    {
+      "epoch": 0.1342673671920607,
+      "grad_norm": 0.5882125532921776,
+      "learning_rate": 4.913561265766129e-05,
+      "loss": 0.6673,
+      "step": 575
+    },
+    {
+      "epoch": 0.13543490951546994,
+      "grad_norm": 0.6578196777426495,
+      "learning_rate": 4.911159551688244e-05,
+      "loss": 0.6785,
+      "step": 580
+    },
+    {
+      "epoch": 0.13660245183887915,
+      "grad_norm": 0.594562463032614,
+      "learning_rate": 4.908725598897061e-05,
+      "loss": 0.6732,
+      "step": 585
+    },
+    {
+      "epoch": 0.13776999416228838,
+      "grad_norm": 0.6040563629336885,
+      "learning_rate": 4.906259443700638e-05,
+      "loss": 0.6693,
+      "step": 590
+    },
+    {
+      "epoch": 0.1389375364856976,
+      "grad_norm": 0.6461468680651782,
+      "learning_rate": 4.9037611228874045e-05,
+      "loss": 0.6661,
+      "step": 595
+    },
+    {
+      "epoch": 0.14010507880910683,
+      "grad_norm": 0.5729590002869202,
+      "learning_rate": 4.90123067372562e-05,
+      "loss": 0.6774,
+      "step": 600
+    },
+    {
+      "epoch": 0.14127262113251604,
+      "grad_norm": 0.5903250131921434,
+      "learning_rate": 4.8986681339628077e-05,
+      "loss": 0.6649,
+      "step": 605
+    },
+    {
+      "epoch": 0.14244016345592528,
+      "grad_norm": 0.5740779177974161,
+      "learning_rate": 4.8960735418252004e-05,
+      "loss": 0.6803,
+      "step": 610
+    },
+    {
+      "epoch": 0.1436077057793345,
+      "grad_norm": 0.5388317280553481,
+      "learning_rate": 4.893446936017162e-05,
+      "loss": 0.6756,
+      "step": 615
+    },
+    {
+      "epoch": 0.14477524810274373,
+      "grad_norm": 0.5571144991003452,
+      "learning_rate": 4.890788355720621e-05,
+      "loss": 0.679,
+      "step": 620
+    },
+    {
+      "epoch": 0.14594279042615294,
+      "grad_norm": 0.6218993167281257,
+      "learning_rate": 4.888097840594475e-05,
+      "loss": 0.6689,
+      "step": 625
+    },
+    {
+      "epoch": 0.14711033274956217,
+      "grad_norm": 0.5868172370833116,
+      "learning_rate": 4.8853754307740043e-05,
+      "loss": 0.6743,
+      "step": 630
+    },
+    {
+      "epoch": 0.14827787507297138,
+      "grad_norm": 0.5191378123088599,
+      "learning_rate": 4.8826211668702744e-05,
+      "loss": 0.6679,
+      "step": 635
+    },
+    {
+      "epoch": 0.14944541739638062,
+      "grad_norm": 0.5092530035164934,
+      "learning_rate": 4.879835089969526e-05,
+      "loss": 0.6751,
+      "step": 640
+    },
+    {
+      "epoch": 0.15061295971978983,
+      "grad_norm": 0.6504883049198265,
+      "learning_rate": 4.877017241632567e-05,
+      "loss": 0.6755,
+      "step": 645
+    },
+    {
+      "epoch": 0.15178050204319907,
+      "grad_norm": 0.5577825579467539,
+      "learning_rate": 4.874167663894148e-05,
+      "loss": 0.6553,
+      "step": 650
+    },
+    {
+      "epoch": 0.15294804436660828,
+      "grad_norm": 0.5546035491522867,
+      "learning_rate": 4.871286399262338e-05,
+      "loss": 0.6729,
+      "step": 655
+    },
+    {
+      "epoch": 0.15411558669001751,
+      "grad_norm": 0.5433685032657724,
+      "learning_rate": 4.868373490717891e-05,
+      "loss": 0.6693,
+      "step": 660
+    },
+    {
+      "epoch": 0.15528312901342672,
+      "grad_norm": 0.552710617301973,
+      "learning_rate": 4.8654289817136014e-05,
+      "loss": 0.6649,
+      "step": 665
+    },
+    {
+      "epoch": 0.15645067133683596,
+      "grad_norm": 0.5507433796337713,
+      "learning_rate": 4.8624529161736585e-05,
+      "loss": 0.666,
+      "step": 670
+    },
+    {
+      "epoch": 0.15761821366024517,
+      "grad_norm": 0.5120857427927407,
+      "learning_rate": 4.859445338492991e-05,
+      "loss": 0.6742,
+      "step": 675
+    },
+    {
+      "epoch": 0.1587857559836544,
+      "grad_norm": 0.6178433514220235,
+      "learning_rate": 4.856406293536604e-05,
+      "loss": 0.6742,
+      "step": 680
+    },
+    {
+      "epoch": 0.15995329830706362,
+      "grad_norm": 0.5621670673190233,
+      "learning_rate": 4.8533358266389114e-05,
+      "loss": 0.6806,
+      "step": 685
+    },
+    {
+      "epoch": 0.16112084063047286,
+      "grad_norm": 0.530079236665643,
+      "learning_rate": 4.8502339836030557e-05,
+      "loss": 0.6753,
+      "step": 690
+    },
+    {
+      "epoch": 0.16228838295388207,
+      "grad_norm": 0.5715227897366831,
+      "learning_rate": 4.847100810700228e-05,
+      "loss": 0.6826,
+      "step": 695
+    },
+    {
+      "epoch": 0.1634559252772913,
+      "grad_norm": 0.6562252266149332,
+      "learning_rate": 4.843936354668981e-05,
+      "loss": 0.6719,
+      "step": 700
+    },
+    {
+      "epoch": 0.1646234676007005,
+      "grad_norm": 0.5778430160807131,
+      "learning_rate": 4.8407406627145223e-05,
+      "loss": 0.6618,
+      "step": 705
+    },
+    {
+      "epoch": 0.16579100992410975,
+      "grad_norm": 0.5939646842898266,
+      "learning_rate": 4.837513782508018e-05,
+      "loss": 0.6613,
+      "step": 710
+    },
+    {
+      "epoch": 0.16695855224751896,
+      "grad_norm": 0.5521562356047192,
+      "learning_rate": 4.834255762185882e-05,
+      "loss": 0.6657,
+      "step": 715
+    },
+    {
+      "epoch": 0.1681260945709282,
+      "grad_norm": 0.5506464069499719,
+      "learning_rate": 4.830966650349051e-05,
+      "loss": 0.6498,
+      "step": 720
+    },
+    {
+      "epoch": 0.1692936368943374,
+      "grad_norm": 0.504907588505334,
+      "learning_rate": 4.827646496062267e-05,
+      "loss": 0.6711,
+      "step": 725
+    },
+    {
+      "epoch": 0.17046117921774664,
+      "grad_norm": 0.49865656610170683,
+      "learning_rate": 4.8242953488533405e-05,
+      "loss": 0.6748,
+      "step": 730
+    },
+    {
+      "epoch": 0.17162872154115585,
+      "grad_norm": 0.566907250566616,
+      "learning_rate": 4.820913258712415e-05,
+      "loss": 0.6645,
+      "step": 735
+    },
+    {
+      "epoch": 0.1727962638645651,
+      "grad_norm": 0.5840434147062887,
+      "learning_rate": 4.817500276091218e-05,
+      "loss": 0.6711,
+      "step": 740
+    },
+    {
+      "epoch": 0.1739638061879743,
+      "grad_norm": 0.5707207892181994,
+      "learning_rate": 4.8140564519023104e-05,
+      "loss": 0.6582,
+      "step": 745
+    },
+    {
+      "epoch": 0.17513134851138354,
+      "grad_norm": 0.6038737922024617,
+      "learning_rate": 4.810581837518329e-05,
+      "loss": 0.6598,
+      "step": 750
+    },
+    {
+      "epoch": 0.17629889083479275,
+      "grad_norm": 0.5400957574912046,
+      "learning_rate": 4.807076484771214e-05,
+      "loss": 0.6585,
+      "step": 755
+    },
+    {
+      "epoch": 0.17746643315820199,
+      "grad_norm": 0.6206248349540344,
+      "learning_rate": 4.803540445951443e-05,
+      "loss": 0.6611,
+      "step": 760
+    },
+    {
+      "epoch": 0.1786339754816112,
+      "grad_norm": 0.5384357416640436,
+      "learning_rate": 4.7999737738072454e-05,
+      "loss": 0.6656,
+      "step": 765
+    },
+    {
+      "epoch": 0.17980151780502043,
+      "grad_norm": 0.5614613345381276,
+      "learning_rate": 4.796376521543818e-05,
+      "loss": 0.6536,
+      "step": 770
+    },
+    {
+      "epoch": 0.18096906012842964,
+      "grad_norm": 0.5403930821292826,
+      "learning_rate": 4.792748742822534e-05,
+      "loss": 0.6653,
+      "step": 775
+    },
+    {
+      "epoch": 0.18213660245183888,
+      "grad_norm": 0.5469852898591198,
+      "learning_rate": 4.789090491760136e-05,
+      "loss": 0.6458,
+      "step": 780
+    },
+    {
+      "epoch": 0.1833041447752481,
+      "grad_norm": 0.5271070176741515,
+      "learning_rate": 4.785401822927933e-05,
+      "loss": 0.663,
+      "step": 785
+    },
+    {
+      "epoch": 0.18447168709865733,
+      "grad_norm": 0.5200290447627971,
+      "learning_rate": 4.781682791350988e-05,
+      "loss": 0.6619,
+      "step": 790
+    },
+    {
+      "epoch": 0.18563922942206654,
+      "grad_norm": 0.5732678229356464,
+      "learning_rate": 4.777933452507292e-05,
+      "loss": 0.6547,
+      "step": 795
+    },
+    {
+      "epoch": 0.18680677174547577,
+      "grad_norm": 0.5354575627455573,
+      "learning_rate": 4.774153862326941e-05,
+      "loss": 0.6826,
+      "step": 800
+    },
+    {
+      "epoch": 0.18797431406888498,
+      "grad_norm": 0.6036615103150955,
+      "learning_rate": 4.770344077191298e-05,
+      "loss": 0.6417,
+      "step": 805
+    },
+    {
+      "epoch": 0.18914185639229422,
+      "grad_norm": 0.6247836327871729,
+      "learning_rate": 4.7665041539321575e-05,
+      "loss": 0.6691,
+      "step": 810
+    },
+    {
+      "epoch": 0.19030939871570343,
+      "grad_norm": 0.6581916329571659,
+      "learning_rate": 4.762634149830891e-05,
+      "loss": 0.6655,
+      "step": 815
+    },
+    {
+      "epoch": 0.19147694103911267,
+      "grad_norm": 0.5713689891793854,
+      "learning_rate": 4.758734122617596e-05,
+      "loss": 0.6851,
+      "step": 820
+    },
+    {
+      "epoch": 0.19264448336252188,
+      "grad_norm": 0.6537243443496534,
+      "learning_rate": 4.7548041304702354e-05,
+      "loss": 0.671,
+      "step": 825
+    },
+    {
+      "epoch": 0.19381202568593112,
+      "grad_norm": 0.5524240796431374,
+      "learning_rate": 4.750844232013767e-05,
+      "loss": 0.6546,
+      "step": 830
+    },
+    {
+      "epoch": 0.19497956800934033,
+      "grad_norm": 0.585496623761136,
+      "learning_rate": 4.746854486319274e-05,
+      "loss": 0.6843,
+      "step": 835
+    },
+    {
+      "epoch": 0.19614711033274956,
+      "grad_norm": 0.5316480721204311,
+      "learning_rate": 4.742834952903077e-05,
+      "loss": 0.6576,
+      "step": 840
+    },
+    {
+      "epoch": 0.19731465265615877,
+      "grad_norm": 0.5352165728165146,
+      "learning_rate": 4.738785691725851e-05,
+      "loss": 0.6709,
+      "step": 845
+    },
+    {
+      "epoch": 0.198482194979568,
+      "grad_norm": 0.5301735920045355,
+      "learning_rate": 4.73470676319173e-05,
+      "loss": 0.6784,
+      "step": 850
+    },
+    {
+      "epoch": 0.19964973730297722,
+      "grad_norm": 0.5991107299004165,
+      "learning_rate": 4.7305982281474044e-05,
+      "loss": 0.6555,
+      "step": 855
+    },
+    {
+      "epoch": 0.20081727962638646,
+      "grad_norm": 0.5289780942574344,
+      "learning_rate": 4.726460147881215e-05,
+      "loss": 0.6643,
+      "step": 860
+    },
+    {
+      "epoch": 0.20198482194979567,
+      "grad_norm": 0.5691639668751877,
+      "learning_rate": 4.7222925841222396e-05,
+      "loss": 0.6462,
+      "step": 865
+    },
+    {
+      "epoch": 0.2031523642732049,
+      "grad_norm": 0.5418269405653413,
+      "learning_rate": 4.7180955990393685e-05,
+      "loss": 0.6531,
+      "step": 870
+    },
+    {
+      "epoch": 0.20431990659661411,
+      "grad_norm": 0.5486670082318458,
+      "learning_rate": 4.71386925524038e-05,
+      "loss": 0.6488,
+      "step": 875
+    },
+    {
+      "epoch": 0.20548744892002335,
+      "grad_norm": 0.5276040254806676,
+      "learning_rate": 4.709613615771008e-05,
+      "loss": 0.6667,
+      "step": 880
+    },
+    {
+      "epoch": 0.20665499124343256,
+      "grad_norm": 0.5407981342058974,
+      "learning_rate": 4.705328744113994e-05,
+      "loss": 0.6448,
+      "step": 885
+    },
+    {
+      "epoch": 0.2078225335668418,
+      "grad_norm": 0.5643190048970039,
+      "learning_rate": 4.701014704188153e-05,
+      "loss": 0.6643,
+      "step": 890
+    },
+    {
+      "epoch": 0.208990075890251,
+      "grad_norm": 0.5155536107792693,
+      "learning_rate": 4.696671560347405e-05,
+      "loss": 0.6445,
+      "step": 895
+    },
+    {
+      "epoch": 0.21015761821366025,
+      "grad_norm": 0.5253675582885029,
+      "learning_rate": 4.692299377379829e-05,
+      "loss": 0.6468,
+      "step": 900
+    },
+    {
+      "epoch": 0.21015761821366025,
+      "step": 900,
+      "total_flos": 102527278252032.0,
+      "train_loss": 0.0,
+      "train_runtime": 1.3976,
+      "train_samples_per_second": 5132.403,
+      "train_steps_per_second": 80.138
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 112,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 102527278252032.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:429263d37784054b77ebba189c20d197b802c9bf0a1d600509e16df3e11ad628
 size 7480

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d64ff30d93ba531ed5601517e02a5860b10e71867b0561691a359d344b52964
 size 7480