Training in progress, step 400, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/config.json +31 -0
last-checkpoint/generation_config.json +8 -0
last-checkpoint/model.safetensors +3 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/special_tokens_map.json +30 -0
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/tokenizer.model +3 -0
last-checkpoint/tokenizer_config.json +43 -0
last-checkpoint/trainer_state.json +337 -0
last-checkpoint/training_args.bin +3 -0

last-checkpoint/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "trl-internal-testing/tiny-random-LlamaForCausalLM",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "head_dim": 4,
+  "hidden_act": "silu",
+  "hidden_size": 16,
+  "initializer_range": 0.02,
+  "intermediate_size": 64,
+  "max_position_embeddings": 2048,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 4,
+  "pad_token_id": -1,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.48.1",
+  "use_cache": false,
+  "vocab_size": 32000
+}

last-checkpoint/generation_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "do_sample": true,
+  "eos_token_id": 1,
+  "pad_token_id": 2,
+  "transformers_version": "4.48.1"
+}

last-checkpoint/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deb58d744f23dab866f30a5eccd5ee89f1d373670d7096630c72465e9c1419c7
+size 2066752

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4536e46c2e524839bf6773cd804ee2ca5954f3943ddd8c412e021dcaea989dc4
+size 2162798

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9feae33b2fec0a6229240e7adaee6ecc8f5cfdf1a8bd0e827b1d8a241424e3c0
+size 14244

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a673aaf85c0fe6b6c29cb8f3e7dbd829eef637110e4ad9a775f3fcf001c92591
+size 1064

last-checkpoint/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

last-checkpoint/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

last-checkpoint/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response: ' + message['content'] + eos_token}}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": true,
+  "model_max_length": 2048,
+  "pad_token": "<unk>",
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true,
+  "use_fast": true
+}

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,337 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.11837821840781296,
+  "eval_steps": 200,
+  "global_step": 400,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0002959455460195324,
+      "eval_loss": 10.376261711120605,
+      "eval_runtime": 10.819,
+      "eval_samples_per_second": 138.829,
+      "eval_steps_per_second": 34.754,
+      "step": 1
+    },
+    {
+      "epoch": 0.002959455460195324,
+      "grad_norm": 0.298828125,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 10.3804,
+      "step": 10
+    },
+    {
+      "epoch": 0.005918910920390648,
+      "grad_norm": 0.357421875,
+      "learning_rate": 3.2000000000000005e-05,
+      "loss": 10.3767,
+      "step": 20
+    },
+    {
+      "epoch": 0.008878366380585973,
+      "grad_norm": 0.443359375,
+      "learning_rate": 4.8e-05,
+      "loss": 10.3754,
+      "step": 30
+    },
+    {
+      "epoch": 0.011837821840781295,
+      "grad_norm": 0.5625,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 10.3767,
+      "step": 40
+    },
+    {
+      "epoch": 0.01479727730097662,
+      "grad_norm": 1.109375,
+      "learning_rate": 8e-05,
+      "loss": 10.3722,
+      "step": 50
+    },
+    {
+      "epoch": 0.017756732761171946,
+      "grad_norm": 0.294921875,
+      "learning_rate": 9.6e-05,
+      "loss": 10.3804,
+      "step": 60
+    },
+    {
+      "epoch": 0.020716188221367268,
+      "grad_norm": 0.373046875,
+      "learning_rate": 0.00011200000000000001,
+      "loss": 10.3739,
+      "step": 70
+    },
+    {
+      "epoch": 0.02367564368156259,
+      "grad_norm": 0.42578125,
+      "learning_rate": 0.00012800000000000002,
+      "loss": 10.3736,
+      "step": 80
+    },
+    {
+      "epoch": 0.026635099141757917,
+      "grad_norm": 0.70703125,
+      "learning_rate": 0.000144,
+      "loss": 10.3643,
+      "step": 90
+    },
+    {
+      "epoch": 0.02959455460195324,
+      "grad_norm": 1.59375,
+      "learning_rate": 0.00016,
+      "loss": 10.364,
+      "step": 100
+    },
+    {
+      "epoch": 0.032554010062148565,
+      "grad_norm": 0.478515625,
+      "learning_rate": 0.00017600000000000002,
+      "loss": 10.3561,
+      "step": 110
+    },
+    {
+      "epoch": 0.03551346552234389,
+      "grad_norm": 0.73046875,
+      "learning_rate": 0.000192,
+      "loss": 10.3211,
+      "step": 120
+    },
+    {
+      "epoch": 0.03847292098253921,
+      "grad_norm": 0.84375,
+      "learning_rate": 0.0001999978128380225,
+      "loss": 10.2582,
+      "step": 130
+    },
+    {
+      "epoch": 0.041432376442734536,
+      "grad_norm": 0.72265625,
+      "learning_rate": 0.0001999803161162393,
+      "loss": 10.172,
+      "step": 140
+    },
+    {
+      "epoch": 0.04439183190292986,
+      "grad_norm": 1.2734375,
+      "learning_rate": 0.00019994532573409262,
+      "loss": 10.1033,
+      "step": 150
+    },
+    {
+      "epoch": 0.04735128736312518,
+      "grad_norm": 0.43359375,
+      "learning_rate": 0.00019989284781388617,
+      "loss": 10.0041,
+      "step": 160
+    },
+    {
+      "epoch": 0.05031074282332051,
+      "grad_norm": 0.41796875,
+      "learning_rate": 0.00019982289153773646,
+      "loss": 9.9331,
+      "step": 170
+    },
+    {
+      "epoch": 0.053270198283515834,
+      "grad_norm": 0.46875,
+      "learning_rate": 0.00019973546914596623,
+      "loss": 9.8548,
+      "step": 180
+    },
+    {
+      "epoch": 0.05622965374371116,
+      "grad_norm": 0.64453125,
+      "learning_rate": 0.00019963059593496268,
+      "loss": 9.7692,
+      "step": 190
+    },
+    {
+      "epoch": 0.05918910920390648,
+      "grad_norm": 1.140625,
+      "learning_rate": 0.00019950829025450114,
+      "loss": 9.7054,
+      "step": 200
+    },
+    {
+      "epoch": 0.05918910920390648,
+      "eval_loss": 9.686193466186523,
+      "eval_runtime": 20.1405,
+      "eval_samples_per_second": 74.576,
+      "eval_steps_per_second": 18.669,
+      "step": 200
+    },
+    {
+      "epoch": 0.062148564664101805,
+      "grad_norm": 0.46484375,
+      "learning_rate": 0.0001993685735045343,
+      "loss": 9.6486,
+      "step": 210
+    },
+    {
+      "epoch": 0.06510802012429713,
+      "grad_norm": 0.51171875,
+      "learning_rate": 0.0001992114701314478,
+      "loss": 9.6029,
+      "step": 220
+    },
+    {
+      "epoch": 0.06806747558449246,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.000199037007623783,
+      "loss": 9.5554,
+      "step": 230
+    },
+    {
+      "epoch": 0.07102693104468778,
+      "grad_norm": 0.609375,
+      "learning_rate": 0.00019884521650742715,
+      "loss": 9.4941,
+      "step": 240
+    },
+    {
+      "epoch": 0.0739863865048831,
+      "grad_norm": 1.78125,
+      "learning_rate": 0.00019863613034027224,
+      "loss": 9.508,
+      "step": 250
+    },
+    {
+      "epoch": 0.07694584196507842,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.0001984097857063434,
+      "loss": 9.3502,
+      "step": 260
+    },
+    {
+      "epoch": 0.07990529742527375,
+      "grad_norm": 0.55859375,
+      "learning_rate": 0.0001981662222093976,
+      "loss": 9.3473,
+      "step": 270
+    },
+    {
+      "epoch": 0.08286475288546907,
+      "grad_norm": 0.5234375,
+      "learning_rate": 0.00019790548246599447,
+      "loss": 9.2955,
+      "step": 280
+    },
+    {
+      "epoch": 0.0858242083456644,
+      "grad_norm": 0.625,
+      "learning_rate": 0.00019762761209803927,
+      "loss": 9.2712,
+      "step": 290
+    },
+    {
+      "epoch": 0.08878366380585972,
+      "grad_norm": 1.140625,
+      "learning_rate": 0.0001973326597248006,
+      "loss": 9.2969,
+      "step": 300
+    },
+    {
+      "epoch": 0.09174311926605505,
+      "grad_norm": 0.455078125,
+      "learning_rate": 0.00019702067695440332,
+      "loss": 9.1616,
+      "step": 310
+    },
+    {
+      "epoch": 0.09470257472625036,
+      "grad_norm": 0.4609375,
+      "learning_rate": 0.00019669171837479873,
+      "loss": 9.1605,
+      "step": 320
+    },
+    {
+      "epoch": 0.09766203018644569,
+      "grad_norm": 0.474609375,
+      "learning_rate": 0.00019634584154421317,
+      "loss": 9.1402,
+      "step": 330
+    },
+    {
+      "epoch": 0.10062148564664102,
+      "grad_norm": 0.578125,
+      "learning_rate": 0.00019598310698107702,
+      "loss": 9.0839,
+      "step": 340
+    },
+    {
+      "epoch": 0.10358094110683634,
+      "grad_norm": 1.296875,
+      "learning_rate": 0.00019560357815343577,
+      "loss": 9.0709,
+      "step": 350
+    },
+    {
+      "epoch": 0.10654039656703167,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.00019520732146784491,
+      "loss": 9.0372,
+      "step": 360
+    },
+    {
+      "epoch": 0.109499852027227,
+      "grad_norm": 0.76953125,
+      "learning_rate": 0.0001947944062577507,
+      "loss": 9.0209,
+      "step": 370
+    },
+    {
+      "epoch": 0.11245930748742232,
+      "grad_norm": 0.5390625,
+      "learning_rate": 0.00019436490477135878,
+      "loss": 8.9724,
+      "step": 380
+    },
+    {
+      "epoch": 0.11541876294761765,
+      "grad_norm": 0.6171875,
+      "learning_rate": 0.00019391889215899299,
+      "loss": 9.0212,
+      "step": 390
+    },
+    {
+      "epoch": 0.11837821840781296,
+      "grad_norm": 1.421875,
+      "learning_rate": 0.0001934564464599461,
+      "loss": 8.9091,
+      "step": 400
+    },
+    {
+      "epoch": 0.11837821840781296,
+      "eval_loss": 8.961220741271973,
+      "eval_runtime": 13.0065,
+      "eval_samples_per_second": 115.48,
+      "eval_steps_per_second": 28.909,
+      "step": 400
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2500,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 400,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 10254536146944.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7ab57c26802475df8b559ffa07b1995cecba20856adfc383ddb4700563cc1b9
+size 6904