diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/adapter_model.bin deleted file mode 100644 index 08a0728b6eaf3186c44d7bd2c40488db01567981..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f914b0990947e8af0d7abfcde78e4512e8b5bc4e639b4f9b095212d03a40e7f2 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/optimizer.pt deleted file mode 100644 index 2ce977db4453b083921dfb9f4afa104a58e18ad9..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7d38567d868c29aaa39241ddaf2fde670173ed97a39499456176416b39413be8 -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/rng_state.pth deleted file mode 100644 index 306a1b7bc2b56e243b40fa660c7529c7036da7c3..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:898a6948390b5093738fb7b2c97359379a9b2ed8cb45fd6bf5d632aea978f1a3 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/scheduler.pt deleted file mode 100644 index 986d2f0227ba1098063de9525f75d5bcbeecb959..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:23efe624b19e546b381a09029c8ee3430909ac7bb2ed04e2f93a318e70a679ed -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/trainer_state.json deleted file mode 100644 index 0af473c13970832f247946fe78fb602def78b1d4..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/trainer_state.json +++ /dev/null @@ -1,319 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.1018109624953867, - "eval_steps": 500, - "global_step": 1000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 1.45080101465088e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/adapter_model.bin deleted file mode 100644 index 1c110a85c8e2029f08c7e155998e450db84445f7..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4c6b0831fc2d11fac7dd78434203aefcffbfbc4a3abe53ba55fc1832f3e0a656 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/optimizer.pt deleted file mode 100644 index 74eca78f3f3e0943abc9c3db9093423f546c6f9e..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db850f3db5ddc9d314504fbc945e2f5f38dc9904f7041be2d2b8b2258bdb5292 -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/rng_state.pth deleted file mode 100644 index d41957b1eb62fd93088ca9040469b5b216f6f94c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0184552a92e3eea244ca8525508faa432cc2ef3b3619da07af22a8f830deedd7 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/scheduler.pt deleted file mode 100644 index c6a76676282169db6a08aadba91bbb35079cbbae..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39ecefbbaaea1fa61a9e8640351957e5ab0b16bcd5b68bc8acb5a1ae782aa095 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/trainer_state.json deleted file mode 100644 index c68d86eed52bcaa13430d74a9bd6b2e36ab8c436..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/trainer_state.json +++ /dev/null @@ -1,469 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.15271644374308005, - "eval_steps": 500, - "global_step": 1500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 2.177523382889472e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-1500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/adapter_model.bin deleted file mode 100644 index 0074fd46bbd17f0cdc8d803c1816e0c7e6932c44..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bf61645713ce00098c5c8cc926e5605bd03b1b707618486b06702b128bbe5eaf -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/optimizer.pt deleted file mode 100644 index 352551aed81021d6b2dd20a797a72371381eabdc..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5050b0af89198d5f8bd3cd59ea636fb02bda929e29ce98fa4ca2b838a733c6e9 -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/rng_state.pth deleted file mode 100644 index 5bbef0a67f91b11412694821bb6e0df3cee5cae6..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c95f00681f2f9ca9879f2b6557dcf0d12bcb73e1a027aa0fcdd2f0c5c7828aa9 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/scheduler.pt deleted file mode 100644 index 04641ba2301627441bd0b2afcaa534cb274025c3..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e3a1ccb9a0f957748e69d536bb952c4ccbae393eb0f6c8061358a6633020031a -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/trainer_state.json deleted file mode 100644 index 261a5e7386b01ddecba0eb94022a9b73011547fe..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/trainer_state.json +++ /dev/null @@ -1,619 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.2036219249907734, - "eval_steps": 500, - "global_step": 2000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 2.908887907648512e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/adapter_model.bin deleted file mode 100644 index 74e0e21c6becdc54c5115a929b456d486ee0c421..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:73b3f2adc6f6374cde7c108b031e0e6e67250ba8ce32edff87c930ac2bc8a9b6 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/optimizer.pt deleted file mode 100644 index b63e3be0f3114c4ace2d7553851cb7b734337747..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eef06d7a537ff69f8b6e232bc2afa874c4db9e9d988185bd577ae9f68571c70c -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/rng_state.pth deleted file mode 100644 index 92d0537c9f5f04f0ee7192ddc54be56240d7a38b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3c809d18b15e0dc9b961ac3ae55d2aeb3ae3e9cb845b4d706a495fc7c322a705 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/scheduler.pt deleted file mode 100644 index 13696a9277c1ba5cee28fca3ce2f55599587e920..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0c679b8c55ca25ae4648665ff45322feeafc7eb782af9af7326fcad2a0a90886 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/trainer_state.json deleted file mode 100644 index 2102ce8d3cad660c8e6e28da39249b23b9b3db33..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/trainer_state.json +++ /dev/null @@ -1,769 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.2545274062384667, - "eval_steps": 500, - "global_step": 2500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 3.630490997971968e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-2500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/adapter_model.bin deleted file mode 100644 index f4d6385f8997aae261c337db5340a34e483bdde4..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f1cdf1a21aa36468ca7324817c18dbea0d9c993caad72356c80f67ad27c5adec -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/optimizer.pt deleted file mode 100644 index e17b6ab0ec9deb1dced3273df9b65e3e24dc4509..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc724ccb44ba7121ad649ed5a9f6f1072058b00e8e1ed4b66ffeff3b1cf5b498 -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/rng_state.pth deleted file mode 100644 index b2ec306e73359e9b65384ddaaa8b684356f5ddb4..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0bfdf34f826fc1a3a954cb62126716372e4d6d4979b90192aa8b2d7bd0eb6172 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/scheduler.pt deleted file mode 100644 index c46c0a539499e095abfa19eefe19deea49c78776..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca081c200d1f1cff74b76fe2637a142c241d77bc5919ab25bcff1d4293ebf505 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/trainer_state.json deleted file mode 100644 index d78adbdac3c21a9d4f0adb00cfe714c74af9a292..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/trainer_state.json +++ /dev/null @@ -1,919 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.3054328874861601, - "eval_steps": 500, - "global_step": 3000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 4.354600931033088e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/adapter_model.bin deleted file mode 100644 index 84fd58c752c0fa0b03e3ae0bc4e33d43e4161ed3..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:521f84dabed28cf35859ef05146c0171c22391e57dea16b1e4d0556eb54e4e31 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/optimizer.pt deleted file mode 100644 index c31fc448beec5eb6c6cc4edec1c9400092df2924..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5422fc43dd6fd45aa435cf2033829bfbac9d5137da712d2b8184c9879cc062f3 -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/rng_state.pth deleted file mode 100644 index 24796397c6a4307c258a5b76cf4a21f96a8cb6d4..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f7b184e8b069b3c1f57297aa4ec5d496c4e561754a274e25a0d6f7951852d0c7 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/scheduler.pt deleted file mode 100644 index 1914cf9ab4dfbbe5668b611a287c8220b18055a2..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:902a812f67d1fbc515b8e9637f7a043a8e94503bc816c37f0218207b22ea285a -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/trainer_state.json deleted file mode 100644 index c917f4140d76c397a089ec27476770d71b634c67..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/trainer_state.json +++ /dev/null @@ -1,1069 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.35633836873385344, - "eval_steps": 500, - "global_step": 3500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 5.077651028865024e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-3500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/adapter_model.bin deleted file mode 100644 index 77f58d2adb109e01a39bf076cffc3ba185da83d7..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5cf53435000571254241641418cd4bd2bac9a4f36af2a222ccbae27d7ef3e430 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/optimizer.pt deleted file mode 100644 index 2bd6b94f71b442b3b4acb31205dff05b0fbb6313..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9dc076c422862a40895fb72e9b93416e8a780addd59322ba4d597c6b84d8962f -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/rng_state.pth deleted file mode 100644 index f036d669beb491f64d24f15592857fe44b356773..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7515d8847b1a8578a14161eb26303e2ba634aaae81a91f4551fb3e44dd22d1ea -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/scheduler.pt deleted file mode 100644 index 6352a64d5272381dc1779864c850114dd4b644a2..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3450b67f439bec17308f634940a5e0d711cbbb085796e3855ea05ffb93334bb2 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/trainer_state.json deleted file mode 100644 index 0dc982a8d4109b87d7a6b50475284800e821c56d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/trainer_state.json +++ /dev/null @@ -1,1219 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.4072438499815468, - "eval_steps": 500, - "global_step": 4000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 5.809883758129152e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/adapter_model.bin deleted file mode 100644 index 0eac92c211e30dc25cb8f8ddd9a248277c432e91..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f0094d219b60f7fb5fdf2ecfa0d9aca34b434c6090128573d75fa1a9ce4563a1 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/optimizer.pt deleted file mode 100644 index 8dea2fdf10fd1d0eb87d8c01166050b461ed4c7b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:10e3931f744e1f62518d9622fce4fb0441a370b5cf102966adb3b734382c907c -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/rng_state.pth deleted file mode 100644 index bfa5d3e98c7264ba238b2080638bddbccf44c863..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8d9e7ae3751a6cef29c862fcb0b280485a4151822c0d21225b46d3f32231def1 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/scheduler.pt deleted file mode 100644 index ebc8f44c7f3456235cecaa0baf835fd2e36ce07d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc0959a7604e09dba8b0dc3e5f831ad29dc099bbe169c675e3f54765ae93176d -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/trainer_state.json deleted file mode 100644 index 3c6675b2832cfb9b281a0a853286db8cc9e05579..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/trainer_state.json +++ /dev/null @@ -1,1369 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.4581493312292401, - "eval_steps": 500, - "global_step": 4500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 6.532980785934336e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-4500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/adapter_model.bin deleted file mode 100644 index 5a16d353f75c703e975fd499c2999ea10e6db2d3..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:758db6965d7cc94f8164b07a508624ceb954c5f70bff134632233d349708b7f1 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/optimizer.pt deleted file mode 100644 index c72202c4af92a0f6a439a4629590acae7140529e..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4a9585a0cded13506ac64e1828f36a93681c01d060a54bb78ff1aac5a1660abe -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/rng_state.pth deleted file mode 100644 index dd5f9816eeffda8a929e19a1a83edba5e45c2964..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5549a35083691287e1c18ada3a9f93ac346f7dbfb4c1f4387fecc894e853a40b -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/scheduler.pt deleted file mode 100644 index 0258d1ce4d5e8a81ebca8ffdc08f202e0690aba6..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ce8499ea02df7abc272f3f39fb57237501a1ddb1d219a75a3265123c51c5b43a -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/trainer_state.json deleted file mode 100644 index 1404dd1b5cdedb483e316e86ca2f2ea846914dd9..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/trainer_state.json +++ /dev/null @@ -1,169 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.05090548124769335, - "eval_steps": 500, - "global_step": 500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 7299996447037440.0, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/adapter_model.bin deleted file mode 100644 index ac98b7cee3dfe68aa73f0a2b485d02781b381bac..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a73a7fc92c35dff220052e49c2809030d0cc620c5dfa9c4c80e712362fa37ce2 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/optimizer.pt deleted file mode 100644 index c15dcb68388f0be1fd51f8ba0faa0f2c8b489272..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:921a78e77fb9368caec22080b7945d620203da0ef17771edf6825fa5325c2fa0 -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/rng_state.pth deleted file mode 100644 index 60ba11b7778c538bc7f33637477ce6acebe4bcb4..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c3807fb93b08f19ee87ee2bfaa6f53869e427e3d5381468b8ee5e6c9ae2eecf8 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/scheduler.pt deleted file mode 100644 index 4a7fe4426b85674e3ca83da555969854aa8826e9..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a07ab4dfb22b167e93c6079174b7cfa85eb33d94d9a69aff36f02ef3f99196fc -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/trainer_state.json deleted file mode 100644 index b23195b1851ac0b052a885ecfeba9a5459a63b65..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/trainer_state.json +++ /dev/null @@ -1,1519 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.5090548124769334, - "eval_steps": 500, - "global_step": 5000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 7.25619513867264e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/adapter_model.bin deleted file mode 100644 index 565c5ae4dd5b2cc39e4a46b89443e938aabc1eeb..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f2a0783bd7bf38e76a3e887ca56405d620b1681c84a241d8d6960f41de2c464 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/optimizer.pt deleted file mode 100644 index b1ec54ed49c858ee8618a4335784eaf9f42682b7..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9c37f98e901fc98e479bc83ea9f306fa20557c8ede53d0ce1a1418f244b5a2a -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/rng_state.pth deleted file mode 100644 index 1b6d05d760dc7620544843953c72e517728ea994..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:49db06522a51920f74b279ad65d2c090421df889ed77db1d4cf20fae007454ab -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/scheduler.pt deleted file mode 100644 index 28497c2a23c1717c6f71eec5c300a8963f5c90fc..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2394d8d1f74c5daf594ae35df22c5c8e1c968cb3b29813edb63bd0cdbdddb67 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/trainer_state.json deleted file mode 100644 index e0e1143f17fd93081f7e938083bc0621ebdc43b9..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/trainer_state.json +++ /dev/null @@ -1,1669 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.5599602937246267, - "eval_steps": 500, - "global_step": 5500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - }, - { - "epoch": 0.51, - "learning_rate": 9.880682987039704e-05, - "loss": 1.1907, - "step": 5020 - }, - { - "epoch": 0.51, - "learning_rate": 9.839539189467189e-05, - "loss": 1.1597, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 9.798395391894672e-05, - "loss": 1.1622, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 9.757251594322157e-05, - "loss": 1.1307, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 9.71610779674964e-05, - "loss": 1.1226, - "step": 5100 - }, - { - "epoch": 0.52, - "learning_rate": 9.674963999177124e-05, - "loss": 1.1567, - "step": 5120 - }, - { - "epoch": 0.52, - "learning_rate": 9.633820201604609e-05, - "loss": 1.1353, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 9.592676404032092e-05, - "loss": 1.1414, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 9.551532606459577e-05, - "loss": 1.1383, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 9.51038880888706e-05, - "loss": 1.1997, - "step": 5200 - }, - { - "epoch": 0.53, - "learning_rate": 9.469245011314545e-05, - "loss": 1.187, - "step": 5220 - }, - { - "epoch": 0.53, - "learning_rate": 9.428101213742029e-05, - "loss": 1.1302, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 9.386957416169512e-05, - "loss": 1.1718, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 9.345813618596997e-05, - "loss": 1.1594, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 9.30466982102448e-05, - "loss": 1.163, - "step": 5300 - }, - { - "epoch": 0.54, - "learning_rate": 9.263526023451965e-05, - "loss": 1.1381, - "step": 5320 - }, - { - "epoch": 0.54, - "learning_rate": 9.222382225879449e-05, - "loss": 1.1134, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 9.181238428306934e-05, - "loss": 1.1548, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 9.140094630734417e-05, - "loss": 1.1535, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 9.0989508331619e-05, - "loss": 1.164, - "step": 5400 - }, - { - "epoch": 0.55, - "learning_rate": 9.057807035589385e-05, - "loss": 1.1448, - "step": 5420 - }, - { - "epoch": 0.55, - "learning_rate": 9.016663238016869e-05, - "loss": 1.1273, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 8.975519440444354e-05, - "loss": 1.1406, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 8.934375642871837e-05, - "loss": 1.0838, - "step": 5480 - }, - { - "epoch": 0.56, - "learning_rate": 8.893231845299322e-05, - "loss": 1.1039, - "step": 5500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 7.978881529211904e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-5500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/adapter_model.bin deleted file mode 100644 index 30f3cc867c557c283e1b309b7da7f52bee30fc99..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c53d4beb319a46354cc1af60a5dbbe6eeda8ff9bf7466e5eb4a6ed69cc8b185 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/optimizer.pt deleted file mode 100644 index 3b90cfec114130842dd213c20c6685cb406e2bee..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a01e89a6e5475f5d92a22ede1e94db2496e6ce0c7699b37a24f58ca39603a6c -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/rng_state.pth deleted file mode 100644 index afca1002b97d31944b8188084c57f4f4af2fb1fb..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de3e2ec70e096e4af345bc3c4af42550750e1a528c19631862e0ab1ad43c3fac -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/scheduler.pt deleted file mode 100644 index 930d1a3b56412a25903175fcf736b1b35d6bbaa7..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99c542da670f152cc803bee2e96105a2ad926ff5dd41d19d43af3b688f54d314 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/trainer_state.json deleted file mode 100644 index 3a9f126ed33716fd08d67c13792e1ff7ced2b288..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/trainer_state.json +++ /dev/null @@ -1,1819 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.6108657749723202, - "eval_steps": 500, - "global_step": 6000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - }, - { - "epoch": 0.51, - "learning_rate": 9.880682987039704e-05, - "loss": 1.1907, - "step": 5020 - }, - { - "epoch": 0.51, - "learning_rate": 9.839539189467189e-05, - "loss": 1.1597, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 9.798395391894672e-05, - "loss": 1.1622, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 9.757251594322157e-05, - "loss": 1.1307, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 9.71610779674964e-05, - "loss": 1.1226, - "step": 5100 - }, - { - "epoch": 0.52, - "learning_rate": 9.674963999177124e-05, - "loss": 1.1567, - "step": 5120 - }, - { - "epoch": 0.52, - "learning_rate": 9.633820201604609e-05, - "loss": 1.1353, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 9.592676404032092e-05, - "loss": 1.1414, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 9.551532606459577e-05, - "loss": 1.1383, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 9.51038880888706e-05, - "loss": 1.1997, - "step": 5200 - }, - { - "epoch": 0.53, - "learning_rate": 9.469245011314545e-05, - "loss": 1.187, - "step": 5220 - }, - { - "epoch": 0.53, - "learning_rate": 9.428101213742029e-05, - "loss": 1.1302, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 9.386957416169512e-05, - "loss": 1.1718, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 9.345813618596997e-05, - "loss": 1.1594, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 9.30466982102448e-05, - "loss": 1.163, - "step": 5300 - }, - { - "epoch": 0.54, - "learning_rate": 9.263526023451965e-05, - "loss": 1.1381, - "step": 5320 - }, - { - "epoch": 0.54, - "learning_rate": 9.222382225879449e-05, - "loss": 1.1134, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 9.181238428306934e-05, - "loss": 1.1548, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 9.140094630734417e-05, - "loss": 1.1535, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 9.0989508331619e-05, - "loss": 1.164, - "step": 5400 - }, - { - "epoch": 0.55, - "learning_rate": 9.057807035589385e-05, - "loss": 1.1448, - "step": 5420 - }, - { - "epoch": 0.55, - "learning_rate": 9.016663238016869e-05, - "loss": 1.1273, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 8.975519440444354e-05, - "loss": 1.1406, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 8.934375642871837e-05, - "loss": 1.0838, - "step": 5480 - }, - { - "epoch": 0.56, - "learning_rate": 8.893231845299322e-05, - "loss": 1.1039, - "step": 5500 - }, - { - "epoch": 0.56, - "learning_rate": 8.852088047726805e-05, - "loss": 1.0586, - "step": 5520 - }, - { - "epoch": 0.56, - "learning_rate": 8.810944250154289e-05, - "loss": 1.1216, - "step": 5540 - }, - { - "epoch": 0.57, - "learning_rate": 8.769800452581774e-05, - "loss": 1.1089, - "step": 5560 - }, - { - "epoch": 0.57, - "learning_rate": 8.728656655009257e-05, - "loss": 1.1512, - "step": 5580 - }, - { - "epoch": 0.57, - "learning_rate": 8.687512857436742e-05, - "loss": 1.1494, - "step": 5600 - }, - { - "epoch": 0.57, - "learning_rate": 8.646369059864225e-05, - "loss": 1.1401, - "step": 5620 - }, - { - "epoch": 0.57, - "learning_rate": 8.60522526229171e-05, - "loss": 1.1531, - "step": 5640 - }, - { - "epoch": 0.58, - "learning_rate": 8.564081464719194e-05, - "loss": 1.1129, - "step": 5660 - }, - { - "epoch": 0.58, - "learning_rate": 8.522937667146677e-05, - "loss": 1.1592, - "step": 5680 - }, - { - "epoch": 0.58, - "learning_rate": 8.481793869574162e-05, - "loss": 1.1192, - "step": 5700 - }, - { - "epoch": 0.58, - "learning_rate": 8.440650072001645e-05, - "loss": 1.1473, - "step": 5720 - }, - { - "epoch": 0.58, - "learning_rate": 8.39950627442913e-05, - "loss": 1.1218, - "step": 5740 - }, - { - "epoch": 0.59, - "learning_rate": 8.358362476856614e-05, - "loss": 1.1342, - "step": 5760 - }, - { - "epoch": 0.59, - "learning_rate": 8.317218679284098e-05, - "loss": 1.1129, - "step": 5780 - }, - { - "epoch": 0.59, - "learning_rate": 8.276074881711582e-05, - "loss": 1.1054, - "step": 5800 - }, - { - "epoch": 0.59, - "learning_rate": 8.234931084139065e-05, - "loss": 1.1019, - "step": 5820 - }, - { - "epoch": 0.59, - "learning_rate": 8.193787286566551e-05, - "loss": 1.1857, - "step": 5840 - }, - { - "epoch": 0.6, - "learning_rate": 8.152643488994035e-05, - "loss": 1.165, - "step": 5860 - }, - { - "epoch": 0.6, - "learning_rate": 8.11149969142152e-05, - "loss": 1.1514, - "step": 5880 - }, - { - "epoch": 0.6, - "learning_rate": 8.070355893849003e-05, - "loss": 1.1565, - "step": 5900 - }, - { - "epoch": 0.6, - "learning_rate": 8.029212096276487e-05, - "loss": 1.1252, - "step": 5920 - }, - { - "epoch": 0.6, - "learning_rate": 7.988068298703971e-05, - "loss": 1.129, - "step": 5940 - }, - { - "epoch": 0.61, - "learning_rate": 7.946924501131455e-05, - "loss": 1.1471, - "step": 5960 - }, - { - "epoch": 0.61, - "learning_rate": 7.90578070355894e-05, - "loss": 1.1697, - "step": 5980 - }, - { - "epoch": 0.61, - "learning_rate": 7.864636905986423e-05, - "loss": 1.0938, - "step": 6000 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 8.707528026353664e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/adapter_model.bin deleted file mode 100644 index af2441049235b7ecb398f25b32bcbd3dc7304051..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0847e10d320be6caf7417908a73d1b4eab2d37bcf89db30c06b006f444b4d988 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/optimizer.pt deleted file mode 100644 index 0e2ac554a1b314f01854bdfb7108481ef69c448d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4dac7aebd46af9696d9361295fc30cb4c9834cdc5d19aef1596ded1113ec1ced -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/rng_state.pth deleted file mode 100644 index 49c0774ef10668e0a4c55293b79c55004e8b4645..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:976a6a0c4bba93bafbb387ed4f983ec368b72bf98dbaa110b0680640f96e39ab -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/scheduler.pt deleted file mode 100644 index f9bfd1b622a802a3a57b238223db329ae0eb3ad8..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f1f613a3b18dcdd10fcf95b18f85e1dc12344ae1aa9c5d55a0d15cb05135c386 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/trainer_state.json deleted file mode 100644 index 4cdcaf29a818eb1b107cc91bee4388af4e522c2a..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/trainer_state.json +++ /dev/null @@ -1,1969 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.6617712562200135, - "eval_steps": 500, - "global_step": 6500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - }, - { - "epoch": 0.51, - "learning_rate": 9.880682987039704e-05, - "loss": 1.1907, - "step": 5020 - }, - { - "epoch": 0.51, - "learning_rate": 9.839539189467189e-05, - "loss": 1.1597, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 9.798395391894672e-05, - "loss": 1.1622, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 9.757251594322157e-05, - "loss": 1.1307, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 9.71610779674964e-05, - "loss": 1.1226, - "step": 5100 - }, - { - "epoch": 0.52, - "learning_rate": 9.674963999177124e-05, - "loss": 1.1567, - "step": 5120 - }, - { - "epoch": 0.52, - "learning_rate": 9.633820201604609e-05, - "loss": 1.1353, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 9.592676404032092e-05, - "loss": 1.1414, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 9.551532606459577e-05, - "loss": 1.1383, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 9.51038880888706e-05, - "loss": 1.1997, - "step": 5200 - }, - { - "epoch": 0.53, - "learning_rate": 9.469245011314545e-05, - "loss": 1.187, - "step": 5220 - }, - { - "epoch": 0.53, - "learning_rate": 9.428101213742029e-05, - "loss": 1.1302, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 9.386957416169512e-05, - "loss": 1.1718, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 9.345813618596997e-05, - "loss": 1.1594, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 9.30466982102448e-05, - "loss": 1.163, - "step": 5300 - }, - { - "epoch": 0.54, - "learning_rate": 9.263526023451965e-05, - "loss": 1.1381, - "step": 5320 - }, - { - "epoch": 0.54, - "learning_rate": 9.222382225879449e-05, - "loss": 1.1134, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 9.181238428306934e-05, - "loss": 1.1548, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 9.140094630734417e-05, - "loss": 1.1535, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 9.0989508331619e-05, - "loss": 1.164, - "step": 5400 - }, - { - "epoch": 0.55, - "learning_rate": 9.057807035589385e-05, - "loss": 1.1448, - "step": 5420 - }, - { - "epoch": 0.55, - "learning_rate": 9.016663238016869e-05, - "loss": 1.1273, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 8.975519440444354e-05, - "loss": 1.1406, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 8.934375642871837e-05, - "loss": 1.0838, - "step": 5480 - }, - { - "epoch": 0.56, - "learning_rate": 8.893231845299322e-05, - "loss": 1.1039, - "step": 5500 - }, - { - "epoch": 0.56, - "learning_rate": 8.852088047726805e-05, - "loss": 1.0586, - "step": 5520 - }, - { - "epoch": 0.56, - "learning_rate": 8.810944250154289e-05, - "loss": 1.1216, - "step": 5540 - }, - { - "epoch": 0.57, - "learning_rate": 8.769800452581774e-05, - "loss": 1.1089, - "step": 5560 - }, - { - "epoch": 0.57, - "learning_rate": 8.728656655009257e-05, - "loss": 1.1512, - "step": 5580 - }, - { - "epoch": 0.57, - "learning_rate": 8.687512857436742e-05, - "loss": 1.1494, - "step": 5600 - }, - { - "epoch": 0.57, - "learning_rate": 8.646369059864225e-05, - "loss": 1.1401, - "step": 5620 - }, - { - "epoch": 0.57, - "learning_rate": 8.60522526229171e-05, - "loss": 1.1531, - "step": 5640 - }, - { - "epoch": 0.58, - "learning_rate": 8.564081464719194e-05, - "loss": 1.1129, - "step": 5660 - }, - { - "epoch": 0.58, - "learning_rate": 8.522937667146677e-05, - "loss": 1.1592, - "step": 5680 - }, - { - "epoch": 0.58, - "learning_rate": 8.481793869574162e-05, - "loss": 1.1192, - "step": 5700 - }, - { - "epoch": 0.58, - "learning_rate": 8.440650072001645e-05, - "loss": 1.1473, - "step": 5720 - }, - { - "epoch": 0.58, - "learning_rate": 8.39950627442913e-05, - "loss": 1.1218, - "step": 5740 - }, - { - "epoch": 0.59, - "learning_rate": 8.358362476856614e-05, - "loss": 1.1342, - "step": 5760 - }, - { - "epoch": 0.59, - "learning_rate": 8.317218679284098e-05, - "loss": 1.1129, - "step": 5780 - }, - { - "epoch": 0.59, - "learning_rate": 8.276074881711582e-05, - "loss": 1.1054, - "step": 5800 - }, - { - "epoch": 0.59, - "learning_rate": 8.234931084139065e-05, - "loss": 1.1019, - "step": 5820 - }, - { - "epoch": 0.59, - "learning_rate": 8.193787286566551e-05, - "loss": 1.1857, - "step": 5840 - }, - { - "epoch": 0.6, - "learning_rate": 8.152643488994035e-05, - "loss": 1.165, - "step": 5860 - }, - { - "epoch": 0.6, - "learning_rate": 8.11149969142152e-05, - "loss": 1.1514, - "step": 5880 - }, - { - "epoch": 0.6, - "learning_rate": 8.070355893849003e-05, - "loss": 1.1565, - "step": 5900 - }, - { - "epoch": 0.6, - "learning_rate": 8.029212096276487e-05, - "loss": 1.1252, - "step": 5920 - }, - { - "epoch": 0.6, - "learning_rate": 7.988068298703971e-05, - "loss": 1.129, - "step": 5940 - }, - { - "epoch": 0.61, - "learning_rate": 7.946924501131455e-05, - "loss": 1.1471, - "step": 5960 - }, - { - "epoch": 0.61, - "learning_rate": 7.90578070355894e-05, - "loss": 1.1697, - "step": 5980 - }, - { - "epoch": 0.61, - "learning_rate": 7.864636905986423e-05, - "loss": 1.0938, - "step": 6000 - }, - { - "epoch": 0.61, - "learning_rate": 7.823493108413908e-05, - "loss": 1.1911, - "step": 6020 - }, - { - "epoch": 0.61, - "learning_rate": 7.782349310841391e-05, - "loss": 1.0867, - "step": 6040 - }, - { - "epoch": 0.62, - "learning_rate": 7.741205513268875e-05, - "loss": 1.1008, - "step": 6060 - }, - { - "epoch": 0.62, - "learning_rate": 7.70006171569636e-05, - "loss": 1.1364, - "step": 6080 - }, - { - "epoch": 0.62, - "learning_rate": 7.658917918123843e-05, - "loss": 1.1343, - "step": 6100 - }, - { - "epoch": 0.62, - "learning_rate": 7.617774120551328e-05, - "loss": 1.1324, - "step": 6120 - }, - { - "epoch": 0.63, - "learning_rate": 7.576630322978811e-05, - "loss": 1.1251, - "step": 6140 - }, - { - "epoch": 0.63, - "learning_rate": 7.535486525406296e-05, - "loss": 1.1371, - "step": 6160 - }, - { - "epoch": 0.63, - "learning_rate": 7.49434272783378e-05, - "loss": 1.0772, - "step": 6180 - }, - { - "epoch": 0.63, - "learning_rate": 7.453198930261263e-05, - "loss": 1.147, - "step": 6200 - }, - { - "epoch": 0.63, - "learning_rate": 7.412055132688748e-05, - "loss": 1.1028, - "step": 6220 - }, - { - "epoch": 0.64, - "learning_rate": 7.370911335116231e-05, - "loss": 1.1747, - "step": 6240 - }, - { - "epoch": 0.64, - "learning_rate": 7.329767537543716e-05, - "loss": 1.1088, - "step": 6260 - }, - { - "epoch": 0.64, - "learning_rate": 7.2886237399712e-05, - "loss": 1.1395, - "step": 6280 - }, - { - "epoch": 0.64, - "learning_rate": 7.247479942398684e-05, - "loss": 1.1626, - "step": 6300 - }, - { - "epoch": 0.64, - "learning_rate": 7.206336144826168e-05, - "loss": 1.1437, - "step": 6320 - }, - { - "epoch": 0.65, - "learning_rate": 7.165192347253651e-05, - "loss": 1.1134, - "step": 6340 - }, - { - "epoch": 0.65, - "learning_rate": 7.124048549681136e-05, - "loss": 1.2004, - "step": 6360 - }, - { - "epoch": 0.65, - "learning_rate": 7.08290475210862e-05, - "loss": 1.0999, - "step": 6380 - }, - { - "epoch": 0.65, - "learning_rate": 7.041760954536104e-05, - "loss": 1.0905, - "step": 6400 - }, - { - "epoch": 0.65, - "learning_rate": 7.000617156963588e-05, - "loss": 1.0895, - "step": 6420 - }, - { - "epoch": 0.66, - "learning_rate": 6.959473359391073e-05, - "loss": 1.1287, - "step": 6440 - }, - { - "epoch": 0.66, - "learning_rate": 6.918329561818556e-05, - "loss": 1.1531, - "step": 6460 - }, - { - "epoch": 0.66, - "learning_rate": 6.87718576424604e-05, - "loss": 1.1333, - "step": 6480 - }, - { - "epoch": 0.66, - "learning_rate": 6.836041966673524e-05, - "loss": 1.1603, - "step": 6500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 9.433761540704256e+16, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-6500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/adapter_model.bin deleted file mode 100644 index 98ae42dd55bd8a83a9b74663f594432f12e9c66c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8150843ff8c861f39453be7b7024443418a1fc53242900a94d8712e9f51a44ce -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/optimizer.pt deleted file mode 100644 index f78fb08159eceaf5f8590644fb746d59a02ae274..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:07a3194d72dcf93507d900615d985b0ebd5afbdf9c1b8902c58e24f5807592ce -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/rng_state.pth deleted file mode 100644 index 3f89b1adc6dceecde85e307380329e2c4e5fcd2b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6cbb287a18de63f223b6f2f470de36875e0d289c1a337647ccdaf46e7666bb8f -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/scheduler.pt deleted file mode 100644 index dc556c3a55a5319b171f2bd2dd9ac98528d65c00..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b91af690fe2bc346646cb8459a248fd56f17dde632c0b065264f0199cd91d36b -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/trainer_state.json deleted file mode 100644 index ccfc9a75f44e1b4e743e0f7d2f9645b2bafbe2fb..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/trainer_state.json +++ /dev/null @@ -1,2119 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.7126767374677069, - "eval_steps": 500, - "global_step": 7000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - }, - { - "epoch": 0.51, - "learning_rate": 9.880682987039704e-05, - "loss": 1.1907, - "step": 5020 - }, - { - "epoch": 0.51, - "learning_rate": 9.839539189467189e-05, - "loss": 1.1597, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 9.798395391894672e-05, - "loss": 1.1622, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 9.757251594322157e-05, - "loss": 1.1307, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 9.71610779674964e-05, - "loss": 1.1226, - "step": 5100 - }, - { - "epoch": 0.52, - "learning_rate": 9.674963999177124e-05, - "loss": 1.1567, - "step": 5120 - }, - { - "epoch": 0.52, - "learning_rate": 9.633820201604609e-05, - "loss": 1.1353, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 9.592676404032092e-05, - "loss": 1.1414, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 9.551532606459577e-05, - "loss": 1.1383, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 9.51038880888706e-05, - "loss": 1.1997, - "step": 5200 - }, - { - "epoch": 0.53, - "learning_rate": 9.469245011314545e-05, - "loss": 1.187, - "step": 5220 - }, - { - "epoch": 0.53, - "learning_rate": 9.428101213742029e-05, - "loss": 1.1302, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 9.386957416169512e-05, - "loss": 1.1718, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 9.345813618596997e-05, - "loss": 1.1594, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 9.30466982102448e-05, - "loss": 1.163, - "step": 5300 - }, - { - "epoch": 0.54, - "learning_rate": 9.263526023451965e-05, - "loss": 1.1381, - "step": 5320 - }, - { - "epoch": 0.54, - "learning_rate": 9.222382225879449e-05, - "loss": 1.1134, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 9.181238428306934e-05, - "loss": 1.1548, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 9.140094630734417e-05, - "loss": 1.1535, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 9.0989508331619e-05, - "loss": 1.164, - "step": 5400 - }, - { - "epoch": 0.55, - "learning_rate": 9.057807035589385e-05, - "loss": 1.1448, - "step": 5420 - }, - { - "epoch": 0.55, - "learning_rate": 9.016663238016869e-05, - "loss": 1.1273, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 8.975519440444354e-05, - "loss": 1.1406, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 8.934375642871837e-05, - "loss": 1.0838, - "step": 5480 - }, - { - "epoch": 0.56, - "learning_rate": 8.893231845299322e-05, - "loss": 1.1039, - "step": 5500 - }, - { - "epoch": 0.56, - "learning_rate": 8.852088047726805e-05, - "loss": 1.0586, - "step": 5520 - }, - { - "epoch": 0.56, - "learning_rate": 8.810944250154289e-05, - "loss": 1.1216, - "step": 5540 - }, - { - "epoch": 0.57, - "learning_rate": 8.769800452581774e-05, - "loss": 1.1089, - "step": 5560 - }, - { - "epoch": 0.57, - "learning_rate": 8.728656655009257e-05, - "loss": 1.1512, - "step": 5580 - }, - { - "epoch": 0.57, - "learning_rate": 8.687512857436742e-05, - "loss": 1.1494, - "step": 5600 - }, - { - "epoch": 0.57, - "learning_rate": 8.646369059864225e-05, - "loss": 1.1401, - "step": 5620 - }, - { - "epoch": 0.57, - "learning_rate": 8.60522526229171e-05, - "loss": 1.1531, - "step": 5640 - }, - { - "epoch": 0.58, - "learning_rate": 8.564081464719194e-05, - "loss": 1.1129, - "step": 5660 - }, - { - "epoch": 0.58, - "learning_rate": 8.522937667146677e-05, - "loss": 1.1592, - "step": 5680 - }, - { - "epoch": 0.58, - "learning_rate": 8.481793869574162e-05, - "loss": 1.1192, - "step": 5700 - }, - { - "epoch": 0.58, - "learning_rate": 8.440650072001645e-05, - "loss": 1.1473, - "step": 5720 - }, - { - "epoch": 0.58, - "learning_rate": 8.39950627442913e-05, - "loss": 1.1218, - "step": 5740 - }, - { - "epoch": 0.59, - "learning_rate": 8.358362476856614e-05, - "loss": 1.1342, - "step": 5760 - }, - { - "epoch": 0.59, - "learning_rate": 8.317218679284098e-05, - "loss": 1.1129, - "step": 5780 - }, - { - "epoch": 0.59, - "learning_rate": 8.276074881711582e-05, - "loss": 1.1054, - "step": 5800 - }, - { - "epoch": 0.59, - "learning_rate": 8.234931084139065e-05, - "loss": 1.1019, - "step": 5820 - }, - { - "epoch": 0.59, - "learning_rate": 8.193787286566551e-05, - "loss": 1.1857, - "step": 5840 - }, - { - "epoch": 0.6, - "learning_rate": 8.152643488994035e-05, - "loss": 1.165, - "step": 5860 - }, - { - "epoch": 0.6, - "learning_rate": 8.11149969142152e-05, - "loss": 1.1514, - "step": 5880 - }, - { - "epoch": 0.6, - "learning_rate": 8.070355893849003e-05, - "loss": 1.1565, - "step": 5900 - }, - { - "epoch": 0.6, - "learning_rate": 8.029212096276487e-05, - "loss": 1.1252, - "step": 5920 - }, - { - "epoch": 0.6, - "learning_rate": 7.988068298703971e-05, - "loss": 1.129, - "step": 5940 - }, - { - "epoch": 0.61, - "learning_rate": 7.946924501131455e-05, - "loss": 1.1471, - "step": 5960 - }, - { - "epoch": 0.61, - "learning_rate": 7.90578070355894e-05, - "loss": 1.1697, - "step": 5980 - }, - { - "epoch": 0.61, - "learning_rate": 7.864636905986423e-05, - "loss": 1.0938, - "step": 6000 - }, - { - "epoch": 0.61, - "learning_rate": 7.823493108413908e-05, - "loss": 1.1911, - "step": 6020 - }, - { - "epoch": 0.61, - "learning_rate": 7.782349310841391e-05, - "loss": 1.0867, - "step": 6040 - }, - { - "epoch": 0.62, - "learning_rate": 7.741205513268875e-05, - "loss": 1.1008, - "step": 6060 - }, - { - "epoch": 0.62, - "learning_rate": 7.70006171569636e-05, - "loss": 1.1364, - "step": 6080 - }, - { - "epoch": 0.62, - "learning_rate": 7.658917918123843e-05, - "loss": 1.1343, - "step": 6100 - }, - { - "epoch": 0.62, - "learning_rate": 7.617774120551328e-05, - "loss": 1.1324, - "step": 6120 - }, - { - "epoch": 0.63, - "learning_rate": 7.576630322978811e-05, - "loss": 1.1251, - "step": 6140 - }, - { - "epoch": 0.63, - "learning_rate": 7.535486525406296e-05, - "loss": 1.1371, - "step": 6160 - }, - { - "epoch": 0.63, - "learning_rate": 7.49434272783378e-05, - "loss": 1.0772, - "step": 6180 - }, - { - "epoch": 0.63, - "learning_rate": 7.453198930261263e-05, - "loss": 1.147, - "step": 6200 - }, - { - "epoch": 0.63, - "learning_rate": 7.412055132688748e-05, - "loss": 1.1028, - "step": 6220 - }, - { - "epoch": 0.64, - "learning_rate": 7.370911335116231e-05, - "loss": 1.1747, - "step": 6240 - }, - { - "epoch": 0.64, - "learning_rate": 7.329767537543716e-05, - "loss": 1.1088, - "step": 6260 - }, - { - "epoch": 0.64, - "learning_rate": 7.2886237399712e-05, - "loss": 1.1395, - "step": 6280 - }, - { - "epoch": 0.64, - "learning_rate": 7.247479942398684e-05, - "loss": 1.1626, - "step": 6300 - }, - { - "epoch": 0.64, - "learning_rate": 7.206336144826168e-05, - "loss": 1.1437, - "step": 6320 - }, - { - "epoch": 0.65, - "learning_rate": 7.165192347253651e-05, - "loss": 1.1134, - "step": 6340 - }, - { - "epoch": 0.65, - "learning_rate": 7.124048549681136e-05, - "loss": 1.2004, - "step": 6360 - }, - { - "epoch": 0.65, - "learning_rate": 7.08290475210862e-05, - "loss": 1.0999, - "step": 6380 - }, - { - "epoch": 0.65, - "learning_rate": 7.041760954536104e-05, - "loss": 1.0905, - "step": 6400 - }, - { - "epoch": 0.65, - "learning_rate": 7.000617156963588e-05, - "loss": 1.0895, - "step": 6420 - }, - { - "epoch": 0.66, - "learning_rate": 6.959473359391073e-05, - "loss": 1.1287, - "step": 6440 - }, - { - "epoch": 0.66, - "learning_rate": 6.918329561818556e-05, - "loss": 1.1531, - "step": 6460 - }, - { - "epoch": 0.66, - "learning_rate": 6.87718576424604e-05, - "loss": 1.1333, - "step": 6480 - }, - { - "epoch": 0.66, - "learning_rate": 6.836041966673524e-05, - "loss": 1.1603, - "step": 6500 - }, - { - "epoch": 0.66, - "learning_rate": 6.794898169101008e-05, - "loss": 1.1131, - "step": 6520 - }, - { - "epoch": 0.67, - "learning_rate": 6.753754371528493e-05, - "loss": 1.1459, - "step": 6540 - }, - { - "epoch": 0.67, - "learning_rate": 6.712610573955976e-05, - "loss": 1.1554, - "step": 6560 - }, - { - "epoch": 0.67, - "learning_rate": 6.671466776383461e-05, - "loss": 1.0908, - "step": 6580 - }, - { - "epoch": 0.67, - "learning_rate": 6.630322978810944e-05, - "loss": 1.1529, - "step": 6600 - }, - { - "epoch": 0.67, - "learning_rate": 6.589179181238428e-05, - "loss": 1.1465, - "step": 6620 - }, - { - "epoch": 0.68, - "learning_rate": 6.548035383665913e-05, - "loss": 1.1097, - "step": 6640 - }, - { - "epoch": 0.68, - "learning_rate": 6.506891586093396e-05, - "loss": 1.1133, - "step": 6660 - }, - { - "epoch": 0.68, - "learning_rate": 6.465747788520881e-05, - "loss": 1.1193, - "step": 6680 - }, - { - "epoch": 0.68, - "learning_rate": 6.424603990948364e-05, - "loss": 1.0739, - "step": 6700 - }, - { - "epoch": 0.68, - "learning_rate": 6.383460193375849e-05, - "loss": 1.149, - "step": 6720 - }, - { - "epoch": 0.69, - "learning_rate": 6.342316395803333e-05, - "loss": 1.1588, - "step": 6740 - }, - { - "epoch": 0.69, - "learning_rate": 6.301172598230816e-05, - "loss": 1.1533, - "step": 6760 - }, - { - "epoch": 0.69, - "learning_rate": 6.260028800658301e-05, - "loss": 1.1436, - "step": 6780 - }, - { - "epoch": 0.69, - "learning_rate": 6.218885003085784e-05, - "loss": 1.162, - "step": 6800 - }, - { - "epoch": 0.69, - "learning_rate": 6.177741205513269e-05, - "loss": 1.1584, - "step": 6820 - }, - { - "epoch": 0.7, - "learning_rate": 6.136597407940753e-05, - "loss": 1.1081, - "step": 6840 - }, - { - "epoch": 0.7, - "learning_rate": 6.095453610368237e-05, - "loss": 1.1222, - "step": 6860 - }, - { - "epoch": 0.7, - "learning_rate": 6.054309812795721e-05, - "loss": 1.1222, - "step": 6880 - }, - { - "epoch": 0.7, - "learning_rate": 6.013166015223205e-05, - "loss": 1.0933, - "step": 6900 - }, - { - "epoch": 0.7, - "learning_rate": 5.972022217650689e-05, - "loss": 1.1053, - "step": 6920 - }, - { - "epoch": 0.71, - "learning_rate": 5.930878420078173e-05, - "loss": 1.0899, - "step": 6940 - }, - { - "epoch": 0.71, - "learning_rate": 5.889734622505657e-05, - "loss": 1.1284, - "step": 6960 - }, - { - "epoch": 0.71, - "learning_rate": 5.848590824933141e-05, - "loss": 1.1263, - "step": 6980 - }, - { - "epoch": 0.71, - "learning_rate": 5.807447027360625e-05, - "loss": 1.089, - "step": 7000 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 1.0156107688937472e+17, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/adapter_model.bin deleted file mode 100644 index cb958cd7b402df59c04085f423949bca55ecde27..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a327244a0586d5f4bd95c9246d2fc17aa77b832684760413de40b935aa5de77d -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/optimizer.pt deleted file mode 100644 index 459ef36ffb0dee741cf7895c3c92ab8e41b895f2..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8201208822e1ac7a03963940009f843c9d3d384fd990dadc10557dd16e5a3c88 -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/rng_state.pth deleted file mode 100644 index f45f95ef165bde7f94fd2038162e639e18c45dac..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b93061b47d430ba2d7fdc6eb2d79ee4be01581677647a2903cfca08dd17f40dc -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/scheduler.pt deleted file mode 100644 index 6d3cf5b377919487d333642d29c3dec532ac0769..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b8b1f8bc66c71cb1dcda3d58cb9c7464a0b7fe17f318de8769a6acc793e42a5 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/trainer_state.json deleted file mode 100644 index 7b6365a01d51e1be1c105da004e3cfb86e56bf3c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/trainer_state.json +++ /dev/null @@ -1,2269 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.7635822187154002, - "eval_steps": 500, - "global_step": 7500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - }, - { - "epoch": 0.51, - "learning_rate": 9.880682987039704e-05, - "loss": 1.1907, - "step": 5020 - }, - { - "epoch": 0.51, - "learning_rate": 9.839539189467189e-05, - "loss": 1.1597, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 9.798395391894672e-05, - "loss": 1.1622, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 9.757251594322157e-05, - "loss": 1.1307, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 9.71610779674964e-05, - "loss": 1.1226, - "step": 5100 - }, - { - "epoch": 0.52, - "learning_rate": 9.674963999177124e-05, - "loss": 1.1567, - "step": 5120 - }, - { - "epoch": 0.52, - "learning_rate": 9.633820201604609e-05, - "loss": 1.1353, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 9.592676404032092e-05, - "loss": 1.1414, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 9.551532606459577e-05, - "loss": 1.1383, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 9.51038880888706e-05, - "loss": 1.1997, - "step": 5200 - }, - { - "epoch": 0.53, - "learning_rate": 9.469245011314545e-05, - "loss": 1.187, - "step": 5220 - }, - { - "epoch": 0.53, - "learning_rate": 9.428101213742029e-05, - "loss": 1.1302, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 9.386957416169512e-05, - "loss": 1.1718, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 9.345813618596997e-05, - "loss": 1.1594, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 9.30466982102448e-05, - "loss": 1.163, - "step": 5300 - }, - { - "epoch": 0.54, - "learning_rate": 9.263526023451965e-05, - "loss": 1.1381, - "step": 5320 - }, - { - "epoch": 0.54, - "learning_rate": 9.222382225879449e-05, - "loss": 1.1134, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 9.181238428306934e-05, - "loss": 1.1548, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 9.140094630734417e-05, - "loss": 1.1535, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 9.0989508331619e-05, - "loss": 1.164, - "step": 5400 - }, - { - "epoch": 0.55, - "learning_rate": 9.057807035589385e-05, - "loss": 1.1448, - "step": 5420 - }, - { - "epoch": 0.55, - "learning_rate": 9.016663238016869e-05, - "loss": 1.1273, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 8.975519440444354e-05, - "loss": 1.1406, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 8.934375642871837e-05, - "loss": 1.0838, - "step": 5480 - }, - { - "epoch": 0.56, - "learning_rate": 8.893231845299322e-05, - "loss": 1.1039, - "step": 5500 - }, - { - "epoch": 0.56, - "learning_rate": 8.852088047726805e-05, - "loss": 1.0586, - "step": 5520 - }, - { - "epoch": 0.56, - "learning_rate": 8.810944250154289e-05, - "loss": 1.1216, - "step": 5540 - }, - { - "epoch": 0.57, - "learning_rate": 8.769800452581774e-05, - "loss": 1.1089, - "step": 5560 - }, - { - "epoch": 0.57, - "learning_rate": 8.728656655009257e-05, - "loss": 1.1512, - "step": 5580 - }, - { - "epoch": 0.57, - "learning_rate": 8.687512857436742e-05, - "loss": 1.1494, - "step": 5600 - }, - { - "epoch": 0.57, - "learning_rate": 8.646369059864225e-05, - "loss": 1.1401, - "step": 5620 - }, - { - "epoch": 0.57, - "learning_rate": 8.60522526229171e-05, - "loss": 1.1531, - "step": 5640 - }, - { - "epoch": 0.58, - "learning_rate": 8.564081464719194e-05, - "loss": 1.1129, - "step": 5660 - }, - { - "epoch": 0.58, - "learning_rate": 8.522937667146677e-05, - "loss": 1.1592, - "step": 5680 - }, - { - "epoch": 0.58, - "learning_rate": 8.481793869574162e-05, - "loss": 1.1192, - "step": 5700 - }, - { - "epoch": 0.58, - "learning_rate": 8.440650072001645e-05, - "loss": 1.1473, - "step": 5720 - }, - { - "epoch": 0.58, - "learning_rate": 8.39950627442913e-05, - "loss": 1.1218, - "step": 5740 - }, - { - "epoch": 0.59, - "learning_rate": 8.358362476856614e-05, - "loss": 1.1342, - "step": 5760 - }, - { - "epoch": 0.59, - "learning_rate": 8.317218679284098e-05, - "loss": 1.1129, - "step": 5780 - }, - { - "epoch": 0.59, - "learning_rate": 8.276074881711582e-05, - "loss": 1.1054, - "step": 5800 - }, - { - "epoch": 0.59, - "learning_rate": 8.234931084139065e-05, - "loss": 1.1019, - "step": 5820 - }, - { - "epoch": 0.59, - "learning_rate": 8.193787286566551e-05, - "loss": 1.1857, - "step": 5840 - }, - { - "epoch": 0.6, - "learning_rate": 8.152643488994035e-05, - "loss": 1.165, - "step": 5860 - }, - { - "epoch": 0.6, - "learning_rate": 8.11149969142152e-05, - "loss": 1.1514, - "step": 5880 - }, - { - "epoch": 0.6, - "learning_rate": 8.070355893849003e-05, - "loss": 1.1565, - "step": 5900 - }, - { - "epoch": 0.6, - "learning_rate": 8.029212096276487e-05, - "loss": 1.1252, - "step": 5920 - }, - { - "epoch": 0.6, - "learning_rate": 7.988068298703971e-05, - "loss": 1.129, - "step": 5940 - }, - { - "epoch": 0.61, - "learning_rate": 7.946924501131455e-05, - "loss": 1.1471, - "step": 5960 - }, - { - "epoch": 0.61, - "learning_rate": 7.90578070355894e-05, - "loss": 1.1697, - "step": 5980 - }, - { - "epoch": 0.61, - "learning_rate": 7.864636905986423e-05, - "loss": 1.0938, - "step": 6000 - }, - { - "epoch": 0.61, - "learning_rate": 7.823493108413908e-05, - "loss": 1.1911, - "step": 6020 - }, - { - "epoch": 0.61, - "learning_rate": 7.782349310841391e-05, - "loss": 1.0867, - "step": 6040 - }, - { - "epoch": 0.62, - "learning_rate": 7.741205513268875e-05, - "loss": 1.1008, - "step": 6060 - }, - { - "epoch": 0.62, - "learning_rate": 7.70006171569636e-05, - "loss": 1.1364, - "step": 6080 - }, - { - "epoch": 0.62, - "learning_rate": 7.658917918123843e-05, - "loss": 1.1343, - "step": 6100 - }, - { - "epoch": 0.62, - "learning_rate": 7.617774120551328e-05, - "loss": 1.1324, - "step": 6120 - }, - { - "epoch": 0.63, - "learning_rate": 7.576630322978811e-05, - "loss": 1.1251, - "step": 6140 - }, - { - "epoch": 0.63, - "learning_rate": 7.535486525406296e-05, - "loss": 1.1371, - "step": 6160 - }, - { - "epoch": 0.63, - "learning_rate": 7.49434272783378e-05, - "loss": 1.0772, - "step": 6180 - }, - { - "epoch": 0.63, - "learning_rate": 7.453198930261263e-05, - "loss": 1.147, - "step": 6200 - }, - { - "epoch": 0.63, - "learning_rate": 7.412055132688748e-05, - "loss": 1.1028, - "step": 6220 - }, - { - "epoch": 0.64, - "learning_rate": 7.370911335116231e-05, - "loss": 1.1747, - "step": 6240 - }, - { - "epoch": 0.64, - "learning_rate": 7.329767537543716e-05, - "loss": 1.1088, - "step": 6260 - }, - { - "epoch": 0.64, - "learning_rate": 7.2886237399712e-05, - "loss": 1.1395, - "step": 6280 - }, - { - "epoch": 0.64, - "learning_rate": 7.247479942398684e-05, - "loss": 1.1626, - "step": 6300 - }, - { - "epoch": 0.64, - "learning_rate": 7.206336144826168e-05, - "loss": 1.1437, - "step": 6320 - }, - { - "epoch": 0.65, - "learning_rate": 7.165192347253651e-05, - "loss": 1.1134, - "step": 6340 - }, - { - "epoch": 0.65, - "learning_rate": 7.124048549681136e-05, - "loss": 1.2004, - "step": 6360 - }, - { - "epoch": 0.65, - "learning_rate": 7.08290475210862e-05, - "loss": 1.0999, - "step": 6380 - }, - { - "epoch": 0.65, - "learning_rate": 7.041760954536104e-05, - "loss": 1.0905, - "step": 6400 - }, - { - "epoch": 0.65, - "learning_rate": 7.000617156963588e-05, - "loss": 1.0895, - "step": 6420 - }, - { - "epoch": 0.66, - "learning_rate": 6.959473359391073e-05, - "loss": 1.1287, - "step": 6440 - }, - { - "epoch": 0.66, - "learning_rate": 6.918329561818556e-05, - "loss": 1.1531, - "step": 6460 - }, - { - "epoch": 0.66, - "learning_rate": 6.87718576424604e-05, - "loss": 1.1333, - "step": 6480 - }, - { - "epoch": 0.66, - "learning_rate": 6.836041966673524e-05, - "loss": 1.1603, - "step": 6500 - }, - { - "epoch": 0.66, - "learning_rate": 6.794898169101008e-05, - "loss": 1.1131, - "step": 6520 - }, - { - "epoch": 0.67, - "learning_rate": 6.753754371528493e-05, - "loss": 1.1459, - "step": 6540 - }, - { - "epoch": 0.67, - "learning_rate": 6.712610573955976e-05, - "loss": 1.1554, - "step": 6560 - }, - { - "epoch": 0.67, - "learning_rate": 6.671466776383461e-05, - "loss": 1.0908, - "step": 6580 - }, - { - "epoch": 0.67, - "learning_rate": 6.630322978810944e-05, - "loss": 1.1529, - "step": 6600 - }, - { - "epoch": 0.67, - "learning_rate": 6.589179181238428e-05, - "loss": 1.1465, - "step": 6620 - }, - { - "epoch": 0.68, - "learning_rate": 6.548035383665913e-05, - "loss": 1.1097, - "step": 6640 - }, - { - "epoch": 0.68, - "learning_rate": 6.506891586093396e-05, - "loss": 1.1133, - "step": 6660 - }, - { - "epoch": 0.68, - "learning_rate": 6.465747788520881e-05, - "loss": 1.1193, - "step": 6680 - }, - { - "epoch": 0.68, - "learning_rate": 6.424603990948364e-05, - "loss": 1.0739, - "step": 6700 - }, - { - "epoch": 0.68, - "learning_rate": 6.383460193375849e-05, - "loss": 1.149, - "step": 6720 - }, - { - "epoch": 0.69, - "learning_rate": 6.342316395803333e-05, - "loss": 1.1588, - "step": 6740 - }, - { - "epoch": 0.69, - "learning_rate": 6.301172598230816e-05, - "loss": 1.1533, - "step": 6760 - }, - { - "epoch": 0.69, - "learning_rate": 6.260028800658301e-05, - "loss": 1.1436, - "step": 6780 - }, - { - "epoch": 0.69, - "learning_rate": 6.218885003085784e-05, - "loss": 1.162, - "step": 6800 - }, - { - "epoch": 0.69, - "learning_rate": 6.177741205513269e-05, - "loss": 1.1584, - "step": 6820 - }, - { - "epoch": 0.7, - "learning_rate": 6.136597407940753e-05, - "loss": 1.1081, - "step": 6840 - }, - { - "epoch": 0.7, - "learning_rate": 6.095453610368237e-05, - "loss": 1.1222, - "step": 6860 - }, - { - "epoch": 0.7, - "learning_rate": 6.054309812795721e-05, - "loss": 1.1222, - "step": 6880 - }, - { - "epoch": 0.7, - "learning_rate": 6.013166015223205e-05, - "loss": 1.0933, - "step": 6900 - }, - { - "epoch": 0.7, - "learning_rate": 5.972022217650689e-05, - "loss": 1.1053, - "step": 6920 - }, - { - "epoch": 0.71, - "learning_rate": 5.930878420078173e-05, - "loss": 1.0899, - "step": 6940 - }, - { - "epoch": 0.71, - "learning_rate": 5.889734622505657e-05, - "loss": 1.1284, - "step": 6960 - }, - { - "epoch": 0.71, - "learning_rate": 5.848590824933141e-05, - "loss": 1.1263, - "step": 6980 - }, - { - "epoch": 0.71, - "learning_rate": 5.807447027360625e-05, - "loss": 1.089, - "step": 7000 - }, - { - "epoch": 0.71, - "learning_rate": 5.766303229788109e-05, - "loss": 1.0758, - "step": 7020 - }, - { - "epoch": 0.72, - "learning_rate": 5.725159432215593e-05, - "loss": 1.1267, - "step": 7040 - }, - { - "epoch": 0.72, - "learning_rate": 5.6840156346430775e-05, - "loss": 1.1073, - "step": 7060 - }, - { - "epoch": 0.72, - "learning_rate": 5.6428718370705616e-05, - "loss": 1.1094, - "step": 7080 - }, - { - "epoch": 0.72, - "learning_rate": 5.6017280394980464e-05, - "loss": 1.1017, - "step": 7100 - }, - { - "epoch": 0.72, - "learning_rate": 5.5605842419255305e-05, - "loss": 1.1348, - "step": 7120 - }, - { - "epoch": 0.73, - "learning_rate": 5.519440444353015e-05, - "loss": 1.0678, - "step": 7140 - }, - { - "epoch": 0.73, - "learning_rate": 5.478296646780499e-05, - "loss": 1.1314, - "step": 7160 - }, - { - "epoch": 0.73, - "learning_rate": 5.437152849207983e-05, - "loss": 1.1256, - "step": 7180 - }, - { - "epoch": 0.73, - "learning_rate": 5.3960090516354664e-05, - "loss": 1.1377, - "step": 7200 - }, - { - "epoch": 0.74, - "learning_rate": 5.3548652540629505e-05, - "loss": 1.1295, - "step": 7220 - }, - { - "epoch": 0.74, - "learning_rate": 5.3137214564904347e-05, - "loss": 1.1023, - "step": 7240 - }, - { - "epoch": 0.74, - "learning_rate": 5.272577658917919e-05, - "loss": 1.1027, - "step": 7260 - }, - { - "epoch": 0.74, - "learning_rate": 5.231433861345403e-05, - "loss": 1.1238, - "step": 7280 - }, - { - "epoch": 0.74, - "learning_rate": 5.190290063772887e-05, - "loss": 1.0967, - "step": 7300 - }, - { - "epoch": 0.75, - "learning_rate": 5.149146266200371e-05, - "loss": 1.1286, - "step": 7320 - }, - { - "epoch": 0.75, - "learning_rate": 5.1080024686278546e-05, - "loss": 1.1418, - "step": 7340 - }, - { - "epoch": 0.75, - "learning_rate": 5.066858671055339e-05, - "loss": 1.1022, - "step": 7360 - }, - { - "epoch": 0.75, - "learning_rate": 5.025714873482823e-05, - "loss": 1.1195, - "step": 7380 - }, - { - "epoch": 0.75, - "learning_rate": 4.984571075910307e-05, - "loss": 1.125, - "step": 7400 - }, - { - "epoch": 0.76, - "learning_rate": 4.943427278337791e-05, - "loss": 1.0951, - "step": 7420 - }, - { - "epoch": 0.76, - "learning_rate": 4.902283480765275e-05, - "loss": 1.1049, - "step": 7440 - }, - { - "epoch": 0.76, - "learning_rate": 4.8611396831927594e-05, - "loss": 1.1609, - "step": 7460 - }, - { - "epoch": 0.76, - "learning_rate": 4.819995885620243e-05, - "loss": 1.0884, - "step": 7480 - }, - { - "epoch": 0.76, - "learning_rate": 4.778852088047727e-05, - "loss": 1.138, - "step": 7500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 1.0878923136903168e+17, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-7500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/adapter_model.bin deleted file mode 100644 index 2c7be42da245f3bae91d251fd4d7acb8f88808d5..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:24e084699524f8db717f998fe9a904ca7ed88ddef707db986e94ad8b6910cbfc -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/optimizer.pt deleted file mode 100644 index 9be79ccc27003375a9a8cb600869876ba2e7e829..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1877b624efe46b2e20251926df026abb025b86eeb075639f6727023180961bb3 -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/rng_state.pth deleted file mode 100644 index d05e2e2d4a77d44e39b262ff026e5a14403e101f..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8e26d9b2d3c76aeaa7b51d7495e4d807469618f5b2c5e82d591d3f3a7c48775b -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/scheduler.pt deleted file mode 100644 index effbbcb203502bd7b42168b1e926627ea4920e19..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:01571040686f221e9b20583dc5c22b24f83f120d1754bbb7f12a16a5a49e4b89 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/trainer_state.json deleted file mode 100644 index 64aa80aae7fd1814b427109cfb09004d373f6bb6..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/trainer_state.json +++ /dev/null @@ -1,2419 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.8144876999630936, - "eval_steps": 500, - "global_step": 8000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - }, - { - "epoch": 0.51, - "learning_rate": 9.880682987039704e-05, - "loss": 1.1907, - "step": 5020 - }, - { - "epoch": 0.51, - "learning_rate": 9.839539189467189e-05, - "loss": 1.1597, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 9.798395391894672e-05, - "loss": 1.1622, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 9.757251594322157e-05, - "loss": 1.1307, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 9.71610779674964e-05, - "loss": 1.1226, - "step": 5100 - }, - { - "epoch": 0.52, - "learning_rate": 9.674963999177124e-05, - "loss": 1.1567, - "step": 5120 - }, - { - "epoch": 0.52, - "learning_rate": 9.633820201604609e-05, - "loss": 1.1353, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 9.592676404032092e-05, - "loss": 1.1414, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 9.551532606459577e-05, - "loss": 1.1383, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 9.51038880888706e-05, - "loss": 1.1997, - "step": 5200 - }, - { - "epoch": 0.53, - "learning_rate": 9.469245011314545e-05, - "loss": 1.187, - "step": 5220 - }, - { - "epoch": 0.53, - "learning_rate": 9.428101213742029e-05, - "loss": 1.1302, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 9.386957416169512e-05, - "loss": 1.1718, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 9.345813618596997e-05, - "loss": 1.1594, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 9.30466982102448e-05, - "loss": 1.163, - "step": 5300 - }, - { - "epoch": 0.54, - "learning_rate": 9.263526023451965e-05, - "loss": 1.1381, - "step": 5320 - }, - { - "epoch": 0.54, - "learning_rate": 9.222382225879449e-05, - "loss": 1.1134, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 9.181238428306934e-05, - "loss": 1.1548, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 9.140094630734417e-05, - "loss": 1.1535, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 9.0989508331619e-05, - "loss": 1.164, - "step": 5400 - }, - { - "epoch": 0.55, - "learning_rate": 9.057807035589385e-05, - "loss": 1.1448, - "step": 5420 - }, - { - "epoch": 0.55, - "learning_rate": 9.016663238016869e-05, - "loss": 1.1273, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 8.975519440444354e-05, - "loss": 1.1406, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 8.934375642871837e-05, - "loss": 1.0838, - "step": 5480 - }, - { - "epoch": 0.56, - "learning_rate": 8.893231845299322e-05, - "loss": 1.1039, - "step": 5500 - }, - { - "epoch": 0.56, - "learning_rate": 8.852088047726805e-05, - "loss": 1.0586, - "step": 5520 - }, - { - "epoch": 0.56, - "learning_rate": 8.810944250154289e-05, - "loss": 1.1216, - "step": 5540 - }, - { - "epoch": 0.57, - "learning_rate": 8.769800452581774e-05, - "loss": 1.1089, - "step": 5560 - }, - { - "epoch": 0.57, - "learning_rate": 8.728656655009257e-05, - "loss": 1.1512, - "step": 5580 - }, - { - "epoch": 0.57, - "learning_rate": 8.687512857436742e-05, - "loss": 1.1494, - "step": 5600 - }, - { - "epoch": 0.57, - "learning_rate": 8.646369059864225e-05, - "loss": 1.1401, - "step": 5620 - }, - { - "epoch": 0.57, - "learning_rate": 8.60522526229171e-05, - "loss": 1.1531, - "step": 5640 - }, - { - "epoch": 0.58, - "learning_rate": 8.564081464719194e-05, - "loss": 1.1129, - "step": 5660 - }, - { - "epoch": 0.58, - "learning_rate": 8.522937667146677e-05, - "loss": 1.1592, - "step": 5680 - }, - { - "epoch": 0.58, - "learning_rate": 8.481793869574162e-05, - "loss": 1.1192, - "step": 5700 - }, - { - "epoch": 0.58, - "learning_rate": 8.440650072001645e-05, - "loss": 1.1473, - "step": 5720 - }, - { - "epoch": 0.58, - "learning_rate": 8.39950627442913e-05, - "loss": 1.1218, - "step": 5740 - }, - { - "epoch": 0.59, - "learning_rate": 8.358362476856614e-05, - "loss": 1.1342, - "step": 5760 - }, - { - "epoch": 0.59, - "learning_rate": 8.317218679284098e-05, - "loss": 1.1129, - "step": 5780 - }, - { - "epoch": 0.59, - "learning_rate": 8.276074881711582e-05, - "loss": 1.1054, - "step": 5800 - }, - { - "epoch": 0.59, - "learning_rate": 8.234931084139065e-05, - "loss": 1.1019, - "step": 5820 - }, - { - "epoch": 0.59, - "learning_rate": 8.193787286566551e-05, - "loss": 1.1857, - "step": 5840 - }, - { - "epoch": 0.6, - "learning_rate": 8.152643488994035e-05, - "loss": 1.165, - "step": 5860 - }, - { - "epoch": 0.6, - "learning_rate": 8.11149969142152e-05, - "loss": 1.1514, - "step": 5880 - }, - { - "epoch": 0.6, - "learning_rate": 8.070355893849003e-05, - "loss": 1.1565, - "step": 5900 - }, - { - "epoch": 0.6, - "learning_rate": 8.029212096276487e-05, - "loss": 1.1252, - "step": 5920 - }, - { - "epoch": 0.6, - "learning_rate": 7.988068298703971e-05, - "loss": 1.129, - "step": 5940 - }, - { - "epoch": 0.61, - "learning_rate": 7.946924501131455e-05, - "loss": 1.1471, - "step": 5960 - }, - { - "epoch": 0.61, - "learning_rate": 7.90578070355894e-05, - "loss": 1.1697, - "step": 5980 - }, - { - "epoch": 0.61, - "learning_rate": 7.864636905986423e-05, - "loss": 1.0938, - "step": 6000 - }, - { - "epoch": 0.61, - "learning_rate": 7.823493108413908e-05, - "loss": 1.1911, - "step": 6020 - }, - { - "epoch": 0.61, - "learning_rate": 7.782349310841391e-05, - "loss": 1.0867, - "step": 6040 - }, - { - "epoch": 0.62, - "learning_rate": 7.741205513268875e-05, - "loss": 1.1008, - "step": 6060 - }, - { - "epoch": 0.62, - "learning_rate": 7.70006171569636e-05, - "loss": 1.1364, - "step": 6080 - }, - { - "epoch": 0.62, - "learning_rate": 7.658917918123843e-05, - "loss": 1.1343, - "step": 6100 - }, - { - "epoch": 0.62, - "learning_rate": 7.617774120551328e-05, - "loss": 1.1324, - "step": 6120 - }, - { - "epoch": 0.63, - "learning_rate": 7.576630322978811e-05, - "loss": 1.1251, - "step": 6140 - }, - { - "epoch": 0.63, - "learning_rate": 7.535486525406296e-05, - "loss": 1.1371, - "step": 6160 - }, - { - "epoch": 0.63, - "learning_rate": 7.49434272783378e-05, - "loss": 1.0772, - "step": 6180 - }, - { - "epoch": 0.63, - "learning_rate": 7.453198930261263e-05, - "loss": 1.147, - "step": 6200 - }, - { - "epoch": 0.63, - "learning_rate": 7.412055132688748e-05, - "loss": 1.1028, - "step": 6220 - }, - { - "epoch": 0.64, - "learning_rate": 7.370911335116231e-05, - "loss": 1.1747, - "step": 6240 - }, - { - "epoch": 0.64, - "learning_rate": 7.329767537543716e-05, - "loss": 1.1088, - "step": 6260 - }, - { - "epoch": 0.64, - "learning_rate": 7.2886237399712e-05, - "loss": 1.1395, - "step": 6280 - }, - { - "epoch": 0.64, - "learning_rate": 7.247479942398684e-05, - "loss": 1.1626, - "step": 6300 - }, - { - "epoch": 0.64, - "learning_rate": 7.206336144826168e-05, - "loss": 1.1437, - "step": 6320 - }, - { - "epoch": 0.65, - "learning_rate": 7.165192347253651e-05, - "loss": 1.1134, - "step": 6340 - }, - { - "epoch": 0.65, - "learning_rate": 7.124048549681136e-05, - "loss": 1.2004, - "step": 6360 - }, - { - "epoch": 0.65, - "learning_rate": 7.08290475210862e-05, - "loss": 1.0999, - "step": 6380 - }, - { - "epoch": 0.65, - "learning_rate": 7.041760954536104e-05, - "loss": 1.0905, - "step": 6400 - }, - { - "epoch": 0.65, - "learning_rate": 7.000617156963588e-05, - "loss": 1.0895, - "step": 6420 - }, - { - "epoch": 0.66, - "learning_rate": 6.959473359391073e-05, - "loss": 1.1287, - "step": 6440 - }, - { - "epoch": 0.66, - "learning_rate": 6.918329561818556e-05, - "loss": 1.1531, - "step": 6460 - }, - { - "epoch": 0.66, - "learning_rate": 6.87718576424604e-05, - "loss": 1.1333, - "step": 6480 - }, - { - "epoch": 0.66, - "learning_rate": 6.836041966673524e-05, - "loss": 1.1603, - "step": 6500 - }, - { - "epoch": 0.66, - "learning_rate": 6.794898169101008e-05, - "loss": 1.1131, - "step": 6520 - }, - { - "epoch": 0.67, - "learning_rate": 6.753754371528493e-05, - "loss": 1.1459, - "step": 6540 - }, - { - "epoch": 0.67, - "learning_rate": 6.712610573955976e-05, - "loss": 1.1554, - "step": 6560 - }, - { - "epoch": 0.67, - "learning_rate": 6.671466776383461e-05, - "loss": 1.0908, - "step": 6580 - }, - { - "epoch": 0.67, - "learning_rate": 6.630322978810944e-05, - "loss": 1.1529, - "step": 6600 - }, - { - "epoch": 0.67, - "learning_rate": 6.589179181238428e-05, - "loss": 1.1465, - "step": 6620 - }, - { - "epoch": 0.68, - "learning_rate": 6.548035383665913e-05, - "loss": 1.1097, - "step": 6640 - }, - { - "epoch": 0.68, - "learning_rate": 6.506891586093396e-05, - "loss": 1.1133, - "step": 6660 - }, - { - "epoch": 0.68, - "learning_rate": 6.465747788520881e-05, - "loss": 1.1193, - "step": 6680 - }, - { - "epoch": 0.68, - "learning_rate": 6.424603990948364e-05, - "loss": 1.0739, - "step": 6700 - }, - { - "epoch": 0.68, - "learning_rate": 6.383460193375849e-05, - "loss": 1.149, - "step": 6720 - }, - { - "epoch": 0.69, - "learning_rate": 6.342316395803333e-05, - "loss": 1.1588, - "step": 6740 - }, - { - "epoch": 0.69, - "learning_rate": 6.301172598230816e-05, - "loss": 1.1533, - "step": 6760 - }, - { - "epoch": 0.69, - "learning_rate": 6.260028800658301e-05, - "loss": 1.1436, - "step": 6780 - }, - { - "epoch": 0.69, - "learning_rate": 6.218885003085784e-05, - "loss": 1.162, - "step": 6800 - }, - { - "epoch": 0.69, - "learning_rate": 6.177741205513269e-05, - "loss": 1.1584, - "step": 6820 - }, - { - "epoch": 0.7, - "learning_rate": 6.136597407940753e-05, - "loss": 1.1081, - "step": 6840 - }, - { - "epoch": 0.7, - "learning_rate": 6.095453610368237e-05, - "loss": 1.1222, - "step": 6860 - }, - { - "epoch": 0.7, - "learning_rate": 6.054309812795721e-05, - "loss": 1.1222, - "step": 6880 - }, - { - "epoch": 0.7, - "learning_rate": 6.013166015223205e-05, - "loss": 1.0933, - "step": 6900 - }, - { - "epoch": 0.7, - "learning_rate": 5.972022217650689e-05, - "loss": 1.1053, - "step": 6920 - }, - { - "epoch": 0.71, - "learning_rate": 5.930878420078173e-05, - "loss": 1.0899, - "step": 6940 - }, - { - "epoch": 0.71, - "learning_rate": 5.889734622505657e-05, - "loss": 1.1284, - "step": 6960 - }, - { - "epoch": 0.71, - "learning_rate": 5.848590824933141e-05, - "loss": 1.1263, - "step": 6980 - }, - { - "epoch": 0.71, - "learning_rate": 5.807447027360625e-05, - "loss": 1.089, - "step": 7000 - }, - { - "epoch": 0.71, - "learning_rate": 5.766303229788109e-05, - "loss": 1.0758, - "step": 7020 - }, - { - "epoch": 0.72, - "learning_rate": 5.725159432215593e-05, - "loss": 1.1267, - "step": 7040 - }, - { - "epoch": 0.72, - "learning_rate": 5.6840156346430775e-05, - "loss": 1.1073, - "step": 7060 - }, - { - "epoch": 0.72, - "learning_rate": 5.6428718370705616e-05, - "loss": 1.1094, - "step": 7080 - }, - { - "epoch": 0.72, - "learning_rate": 5.6017280394980464e-05, - "loss": 1.1017, - "step": 7100 - }, - { - "epoch": 0.72, - "learning_rate": 5.5605842419255305e-05, - "loss": 1.1348, - "step": 7120 - }, - { - "epoch": 0.73, - "learning_rate": 5.519440444353015e-05, - "loss": 1.0678, - "step": 7140 - }, - { - "epoch": 0.73, - "learning_rate": 5.478296646780499e-05, - "loss": 1.1314, - "step": 7160 - }, - { - "epoch": 0.73, - "learning_rate": 5.437152849207983e-05, - "loss": 1.1256, - "step": 7180 - }, - { - "epoch": 0.73, - "learning_rate": 5.3960090516354664e-05, - "loss": 1.1377, - "step": 7200 - }, - { - "epoch": 0.74, - "learning_rate": 5.3548652540629505e-05, - "loss": 1.1295, - "step": 7220 - }, - { - "epoch": 0.74, - "learning_rate": 5.3137214564904347e-05, - "loss": 1.1023, - "step": 7240 - }, - { - "epoch": 0.74, - "learning_rate": 5.272577658917919e-05, - "loss": 1.1027, - "step": 7260 - }, - { - "epoch": 0.74, - "learning_rate": 5.231433861345403e-05, - "loss": 1.1238, - "step": 7280 - }, - { - "epoch": 0.74, - "learning_rate": 5.190290063772887e-05, - "loss": 1.0967, - "step": 7300 - }, - { - "epoch": 0.75, - "learning_rate": 5.149146266200371e-05, - "loss": 1.1286, - "step": 7320 - }, - { - "epoch": 0.75, - "learning_rate": 5.1080024686278546e-05, - "loss": 1.1418, - "step": 7340 - }, - { - "epoch": 0.75, - "learning_rate": 5.066858671055339e-05, - "loss": 1.1022, - "step": 7360 - }, - { - "epoch": 0.75, - "learning_rate": 5.025714873482823e-05, - "loss": 1.1195, - "step": 7380 - }, - { - "epoch": 0.75, - "learning_rate": 4.984571075910307e-05, - "loss": 1.125, - "step": 7400 - }, - { - "epoch": 0.76, - "learning_rate": 4.943427278337791e-05, - "loss": 1.0951, - "step": 7420 - }, - { - "epoch": 0.76, - "learning_rate": 4.902283480765275e-05, - "loss": 1.1049, - "step": 7440 - }, - { - "epoch": 0.76, - "learning_rate": 4.8611396831927594e-05, - "loss": 1.1609, - "step": 7460 - }, - { - "epoch": 0.76, - "learning_rate": 4.819995885620243e-05, - "loss": 1.0884, - "step": 7480 - }, - { - "epoch": 0.76, - "learning_rate": 4.778852088047727e-05, - "loss": 1.138, - "step": 7500 - }, - { - "epoch": 0.77, - "learning_rate": 4.737708290475211e-05, - "loss": 1.1537, - "step": 7520 - }, - { - "epoch": 0.77, - "learning_rate": 4.696564492902695e-05, - "loss": 1.1377, - "step": 7540 - }, - { - "epoch": 0.77, - "learning_rate": 4.6554206953301794e-05, - "loss": 1.0849, - "step": 7560 - }, - { - "epoch": 0.77, - "learning_rate": 4.6142768977576636e-05, - "loss": 1.1445, - "step": 7580 - }, - { - "epoch": 0.77, - "learning_rate": 4.573133100185148e-05, - "loss": 1.1293, - "step": 7600 - }, - { - "epoch": 0.78, - "learning_rate": 4.531989302612631e-05, - "loss": 1.1038, - "step": 7620 - }, - { - "epoch": 0.78, - "learning_rate": 4.490845505040115e-05, - "loss": 1.1, - "step": 7640 - }, - { - "epoch": 0.78, - "learning_rate": 4.4497017074675994e-05, - "loss": 1.1133, - "step": 7660 - }, - { - "epoch": 0.78, - "learning_rate": 4.4085579098950836e-05, - "loss": 1.1537, - "step": 7680 - }, - { - "epoch": 0.78, - "learning_rate": 4.367414112322568e-05, - "loss": 1.118, - "step": 7700 - }, - { - "epoch": 0.79, - "learning_rate": 4.326270314750052e-05, - "loss": 1.1545, - "step": 7720 - }, - { - "epoch": 0.79, - "learning_rate": 4.285126517177536e-05, - "loss": 1.1473, - "step": 7740 - }, - { - "epoch": 0.79, - "learning_rate": 4.2439827196050194e-05, - "loss": 1.1389, - "step": 7760 - }, - { - "epoch": 0.79, - "learning_rate": 4.2028389220325035e-05, - "loss": 1.1767, - "step": 7780 - }, - { - "epoch": 0.79, - "learning_rate": 4.161695124459988e-05, - "loss": 1.0442, - "step": 7800 - }, - { - "epoch": 0.8, - "learning_rate": 4.120551326887472e-05, - "loss": 1.1195, - "step": 7820 - }, - { - "epoch": 0.8, - "learning_rate": 4.079407529314956e-05, - "loss": 1.1136, - "step": 7840 - }, - { - "epoch": 0.8, - "learning_rate": 4.03826373174244e-05, - "loss": 1.0853, - "step": 7860 - }, - { - "epoch": 0.8, - "learning_rate": 3.997119934169924e-05, - "loss": 1.1172, - "step": 7880 - }, - { - "epoch": 0.8, - "learning_rate": 3.955976136597408e-05, - "loss": 1.1025, - "step": 7900 - }, - { - "epoch": 0.81, - "learning_rate": 3.914832339024892e-05, - "loss": 1.1266, - "step": 7920 - }, - { - "epoch": 0.81, - "learning_rate": 3.873688541452376e-05, - "loss": 1.0913, - "step": 7940 - }, - { - "epoch": 0.81, - "learning_rate": 3.83254474387986e-05, - "loss": 1.1448, - "step": 7960 - }, - { - "epoch": 0.81, - "learning_rate": 3.791400946307344e-05, - "loss": 1.1111, - "step": 7980 - }, - { - "epoch": 0.81, - "learning_rate": 3.750257148734828e-05, - "loss": 1.1456, - "step": 8000 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 1.160457002658816e+17, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/adapter_model.bin deleted file mode 100644 index 7966097d8b6519e3cfd89668a9b0189fef29f957..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:643c26492c330a747e19b04e8674921d4511af945cc5f30edf9dcdfe37400741 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/optimizer.pt deleted file mode 100644 index a86f1cd9be0716bfb4dd1e1c55dffb76817cf2e4..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d42fb2bd384044a6ac691ef9eaf55ab842ce866148b9578427b9f8736a1786ba -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/rng_state.pth deleted file mode 100644 index 8e82d0b597ff48e50b5b75b945287b420af6462a..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b8850b161f1dc4653a2f2dca646cbadba4c84474fee6398da4f8e747a8064442 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/scheduler.pt deleted file mode 100644 index c3914ec13af5e463bb50d6538fb773d86be01caa..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19c9ef59db1a6a229c01ac0e490f265f76dfd873de6d8b7c6b50c23622f8b34d -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/trainer_state.json deleted file mode 100644 index 58559b5fda18c812cd4c904128bccb7ba02a83a5..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/trainer_state.json +++ /dev/null @@ -1,2569 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.8653931812107869, - "eval_steps": 500, - "global_step": 8500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - }, - { - "epoch": 0.51, - "learning_rate": 9.880682987039704e-05, - "loss": 1.1907, - "step": 5020 - }, - { - "epoch": 0.51, - "learning_rate": 9.839539189467189e-05, - "loss": 1.1597, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 9.798395391894672e-05, - "loss": 1.1622, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 9.757251594322157e-05, - "loss": 1.1307, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 9.71610779674964e-05, - "loss": 1.1226, - "step": 5100 - }, - { - "epoch": 0.52, - "learning_rate": 9.674963999177124e-05, - "loss": 1.1567, - "step": 5120 - }, - { - "epoch": 0.52, - "learning_rate": 9.633820201604609e-05, - "loss": 1.1353, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 9.592676404032092e-05, - "loss": 1.1414, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 9.551532606459577e-05, - "loss": 1.1383, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 9.51038880888706e-05, - "loss": 1.1997, - "step": 5200 - }, - { - "epoch": 0.53, - "learning_rate": 9.469245011314545e-05, - "loss": 1.187, - "step": 5220 - }, - { - "epoch": 0.53, - "learning_rate": 9.428101213742029e-05, - "loss": 1.1302, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 9.386957416169512e-05, - "loss": 1.1718, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 9.345813618596997e-05, - "loss": 1.1594, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 9.30466982102448e-05, - "loss": 1.163, - "step": 5300 - }, - { - "epoch": 0.54, - "learning_rate": 9.263526023451965e-05, - "loss": 1.1381, - "step": 5320 - }, - { - "epoch": 0.54, - "learning_rate": 9.222382225879449e-05, - "loss": 1.1134, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 9.181238428306934e-05, - "loss": 1.1548, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 9.140094630734417e-05, - "loss": 1.1535, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 9.0989508331619e-05, - "loss": 1.164, - "step": 5400 - }, - { - "epoch": 0.55, - "learning_rate": 9.057807035589385e-05, - "loss": 1.1448, - "step": 5420 - }, - { - "epoch": 0.55, - "learning_rate": 9.016663238016869e-05, - "loss": 1.1273, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 8.975519440444354e-05, - "loss": 1.1406, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 8.934375642871837e-05, - "loss": 1.0838, - "step": 5480 - }, - { - "epoch": 0.56, - "learning_rate": 8.893231845299322e-05, - "loss": 1.1039, - "step": 5500 - }, - { - "epoch": 0.56, - "learning_rate": 8.852088047726805e-05, - "loss": 1.0586, - "step": 5520 - }, - { - "epoch": 0.56, - "learning_rate": 8.810944250154289e-05, - "loss": 1.1216, - "step": 5540 - }, - { - "epoch": 0.57, - "learning_rate": 8.769800452581774e-05, - "loss": 1.1089, - "step": 5560 - }, - { - "epoch": 0.57, - "learning_rate": 8.728656655009257e-05, - "loss": 1.1512, - "step": 5580 - }, - { - "epoch": 0.57, - "learning_rate": 8.687512857436742e-05, - "loss": 1.1494, - "step": 5600 - }, - { - "epoch": 0.57, - "learning_rate": 8.646369059864225e-05, - "loss": 1.1401, - "step": 5620 - }, - { - "epoch": 0.57, - "learning_rate": 8.60522526229171e-05, - "loss": 1.1531, - "step": 5640 - }, - { - "epoch": 0.58, - "learning_rate": 8.564081464719194e-05, - "loss": 1.1129, - "step": 5660 - }, - { - "epoch": 0.58, - "learning_rate": 8.522937667146677e-05, - "loss": 1.1592, - "step": 5680 - }, - { - "epoch": 0.58, - "learning_rate": 8.481793869574162e-05, - "loss": 1.1192, - "step": 5700 - }, - { - "epoch": 0.58, - "learning_rate": 8.440650072001645e-05, - "loss": 1.1473, - "step": 5720 - }, - { - "epoch": 0.58, - "learning_rate": 8.39950627442913e-05, - "loss": 1.1218, - "step": 5740 - }, - { - "epoch": 0.59, - "learning_rate": 8.358362476856614e-05, - "loss": 1.1342, - "step": 5760 - }, - { - "epoch": 0.59, - "learning_rate": 8.317218679284098e-05, - "loss": 1.1129, - "step": 5780 - }, - { - "epoch": 0.59, - "learning_rate": 8.276074881711582e-05, - "loss": 1.1054, - "step": 5800 - }, - { - "epoch": 0.59, - "learning_rate": 8.234931084139065e-05, - "loss": 1.1019, - "step": 5820 - }, - { - "epoch": 0.59, - "learning_rate": 8.193787286566551e-05, - "loss": 1.1857, - "step": 5840 - }, - { - "epoch": 0.6, - "learning_rate": 8.152643488994035e-05, - "loss": 1.165, - "step": 5860 - }, - { - "epoch": 0.6, - "learning_rate": 8.11149969142152e-05, - "loss": 1.1514, - "step": 5880 - }, - { - "epoch": 0.6, - "learning_rate": 8.070355893849003e-05, - "loss": 1.1565, - "step": 5900 - }, - { - "epoch": 0.6, - "learning_rate": 8.029212096276487e-05, - "loss": 1.1252, - "step": 5920 - }, - { - "epoch": 0.6, - "learning_rate": 7.988068298703971e-05, - "loss": 1.129, - "step": 5940 - }, - { - "epoch": 0.61, - "learning_rate": 7.946924501131455e-05, - "loss": 1.1471, - "step": 5960 - }, - { - "epoch": 0.61, - "learning_rate": 7.90578070355894e-05, - "loss": 1.1697, - "step": 5980 - }, - { - "epoch": 0.61, - "learning_rate": 7.864636905986423e-05, - "loss": 1.0938, - "step": 6000 - }, - { - "epoch": 0.61, - "learning_rate": 7.823493108413908e-05, - "loss": 1.1911, - "step": 6020 - }, - { - "epoch": 0.61, - "learning_rate": 7.782349310841391e-05, - "loss": 1.0867, - "step": 6040 - }, - { - "epoch": 0.62, - "learning_rate": 7.741205513268875e-05, - "loss": 1.1008, - "step": 6060 - }, - { - "epoch": 0.62, - "learning_rate": 7.70006171569636e-05, - "loss": 1.1364, - "step": 6080 - }, - { - "epoch": 0.62, - "learning_rate": 7.658917918123843e-05, - "loss": 1.1343, - "step": 6100 - }, - { - "epoch": 0.62, - "learning_rate": 7.617774120551328e-05, - "loss": 1.1324, - "step": 6120 - }, - { - "epoch": 0.63, - "learning_rate": 7.576630322978811e-05, - "loss": 1.1251, - "step": 6140 - }, - { - "epoch": 0.63, - "learning_rate": 7.535486525406296e-05, - "loss": 1.1371, - "step": 6160 - }, - { - "epoch": 0.63, - "learning_rate": 7.49434272783378e-05, - "loss": 1.0772, - "step": 6180 - }, - { - "epoch": 0.63, - "learning_rate": 7.453198930261263e-05, - "loss": 1.147, - "step": 6200 - }, - { - "epoch": 0.63, - "learning_rate": 7.412055132688748e-05, - "loss": 1.1028, - "step": 6220 - }, - { - "epoch": 0.64, - "learning_rate": 7.370911335116231e-05, - "loss": 1.1747, - "step": 6240 - }, - { - "epoch": 0.64, - "learning_rate": 7.329767537543716e-05, - "loss": 1.1088, - "step": 6260 - }, - { - "epoch": 0.64, - "learning_rate": 7.2886237399712e-05, - "loss": 1.1395, - "step": 6280 - }, - { - "epoch": 0.64, - "learning_rate": 7.247479942398684e-05, - "loss": 1.1626, - "step": 6300 - }, - { - "epoch": 0.64, - "learning_rate": 7.206336144826168e-05, - "loss": 1.1437, - "step": 6320 - }, - { - "epoch": 0.65, - "learning_rate": 7.165192347253651e-05, - "loss": 1.1134, - "step": 6340 - }, - { - "epoch": 0.65, - "learning_rate": 7.124048549681136e-05, - "loss": 1.2004, - "step": 6360 - }, - { - "epoch": 0.65, - "learning_rate": 7.08290475210862e-05, - "loss": 1.0999, - "step": 6380 - }, - { - "epoch": 0.65, - "learning_rate": 7.041760954536104e-05, - "loss": 1.0905, - "step": 6400 - }, - { - "epoch": 0.65, - "learning_rate": 7.000617156963588e-05, - "loss": 1.0895, - "step": 6420 - }, - { - "epoch": 0.66, - "learning_rate": 6.959473359391073e-05, - "loss": 1.1287, - "step": 6440 - }, - { - "epoch": 0.66, - "learning_rate": 6.918329561818556e-05, - "loss": 1.1531, - "step": 6460 - }, - { - "epoch": 0.66, - "learning_rate": 6.87718576424604e-05, - "loss": 1.1333, - "step": 6480 - }, - { - "epoch": 0.66, - "learning_rate": 6.836041966673524e-05, - "loss": 1.1603, - "step": 6500 - }, - { - "epoch": 0.66, - "learning_rate": 6.794898169101008e-05, - "loss": 1.1131, - "step": 6520 - }, - { - "epoch": 0.67, - "learning_rate": 6.753754371528493e-05, - "loss": 1.1459, - "step": 6540 - }, - { - "epoch": 0.67, - "learning_rate": 6.712610573955976e-05, - "loss": 1.1554, - "step": 6560 - }, - { - "epoch": 0.67, - "learning_rate": 6.671466776383461e-05, - "loss": 1.0908, - "step": 6580 - }, - { - "epoch": 0.67, - "learning_rate": 6.630322978810944e-05, - "loss": 1.1529, - "step": 6600 - }, - { - "epoch": 0.67, - "learning_rate": 6.589179181238428e-05, - "loss": 1.1465, - "step": 6620 - }, - { - "epoch": 0.68, - "learning_rate": 6.548035383665913e-05, - "loss": 1.1097, - "step": 6640 - }, - { - "epoch": 0.68, - "learning_rate": 6.506891586093396e-05, - "loss": 1.1133, - "step": 6660 - }, - { - "epoch": 0.68, - "learning_rate": 6.465747788520881e-05, - "loss": 1.1193, - "step": 6680 - }, - { - "epoch": 0.68, - "learning_rate": 6.424603990948364e-05, - "loss": 1.0739, - "step": 6700 - }, - { - "epoch": 0.68, - "learning_rate": 6.383460193375849e-05, - "loss": 1.149, - "step": 6720 - }, - { - "epoch": 0.69, - "learning_rate": 6.342316395803333e-05, - "loss": 1.1588, - "step": 6740 - }, - { - "epoch": 0.69, - "learning_rate": 6.301172598230816e-05, - "loss": 1.1533, - "step": 6760 - }, - { - "epoch": 0.69, - "learning_rate": 6.260028800658301e-05, - "loss": 1.1436, - "step": 6780 - }, - { - "epoch": 0.69, - "learning_rate": 6.218885003085784e-05, - "loss": 1.162, - "step": 6800 - }, - { - "epoch": 0.69, - "learning_rate": 6.177741205513269e-05, - "loss": 1.1584, - "step": 6820 - }, - { - "epoch": 0.7, - "learning_rate": 6.136597407940753e-05, - "loss": 1.1081, - "step": 6840 - }, - { - "epoch": 0.7, - "learning_rate": 6.095453610368237e-05, - "loss": 1.1222, - "step": 6860 - }, - { - "epoch": 0.7, - "learning_rate": 6.054309812795721e-05, - "loss": 1.1222, - "step": 6880 - }, - { - "epoch": 0.7, - "learning_rate": 6.013166015223205e-05, - "loss": 1.0933, - "step": 6900 - }, - { - "epoch": 0.7, - "learning_rate": 5.972022217650689e-05, - "loss": 1.1053, - "step": 6920 - }, - { - "epoch": 0.71, - "learning_rate": 5.930878420078173e-05, - "loss": 1.0899, - "step": 6940 - }, - { - "epoch": 0.71, - "learning_rate": 5.889734622505657e-05, - "loss": 1.1284, - "step": 6960 - }, - { - "epoch": 0.71, - "learning_rate": 5.848590824933141e-05, - "loss": 1.1263, - "step": 6980 - }, - { - "epoch": 0.71, - "learning_rate": 5.807447027360625e-05, - "loss": 1.089, - "step": 7000 - }, - { - "epoch": 0.71, - "learning_rate": 5.766303229788109e-05, - "loss": 1.0758, - "step": 7020 - }, - { - "epoch": 0.72, - "learning_rate": 5.725159432215593e-05, - "loss": 1.1267, - "step": 7040 - }, - { - "epoch": 0.72, - "learning_rate": 5.6840156346430775e-05, - "loss": 1.1073, - "step": 7060 - }, - { - "epoch": 0.72, - "learning_rate": 5.6428718370705616e-05, - "loss": 1.1094, - "step": 7080 - }, - { - "epoch": 0.72, - "learning_rate": 5.6017280394980464e-05, - "loss": 1.1017, - "step": 7100 - }, - { - "epoch": 0.72, - "learning_rate": 5.5605842419255305e-05, - "loss": 1.1348, - "step": 7120 - }, - { - "epoch": 0.73, - "learning_rate": 5.519440444353015e-05, - "loss": 1.0678, - "step": 7140 - }, - { - "epoch": 0.73, - "learning_rate": 5.478296646780499e-05, - "loss": 1.1314, - "step": 7160 - }, - { - "epoch": 0.73, - "learning_rate": 5.437152849207983e-05, - "loss": 1.1256, - "step": 7180 - }, - { - "epoch": 0.73, - "learning_rate": 5.3960090516354664e-05, - "loss": 1.1377, - "step": 7200 - }, - { - "epoch": 0.74, - "learning_rate": 5.3548652540629505e-05, - "loss": 1.1295, - "step": 7220 - }, - { - "epoch": 0.74, - "learning_rate": 5.3137214564904347e-05, - "loss": 1.1023, - "step": 7240 - }, - { - "epoch": 0.74, - "learning_rate": 5.272577658917919e-05, - "loss": 1.1027, - "step": 7260 - }, - { - "epoch": 0.74, - "learning_rate": 5.231433861345403e-05, - "loss": 1.1238, - "step": 7280 - }, - { - "epoch": 0.74, - "learning_rate": 5.190290063772887e-05, - "loss": 1.0967, - "step": 7300 - }, - { - "epoch": 0.75, - "learning_rate": 5.149146266200371e-05, - "loss": 1.1286, - "step": 7320 - }, - { - "epoch": 0.75, - "learning_rate": 5.1080024686278546e-05, - "loss": 1.1418, - "step": 7340 - }, - { - "epoch": 0.75, - "learning_rate": 5.066858671055339e-05, - "loss": 1.1022, - "step": 7360 - }, - { - "epoch": 0.75, - "learning_rate": 5.025714873482823e-05, - "loss": 1.1195, - "step": 7380 - }, - { - "epoch": 0.75, - "learning_rate": 4.984571075910307e-05, - "loss": 1.125, - "step": 7400 - }, - { - "epoch": 0.76, - "learning_rate": 4.943427278337791e-05, - "loss": 1.0951, - "step": 7420 - }, - { - "epoch": 0.76, - "learning_rate": 4.902283480765275e-05, - "loss": 1.1049, - "step": 7440 - }, - { - "epoch": 0.76, - "learning_rate": 4.8611396831927594e-05, - "loss": 1.1609, - "step": 7460 - }, - { - "epoch": 0.76, - "learning_rate": 4.819995885620243e-05, - "loss": 1.0884, - "step": 7480 - }, - { - "epoch": 0.76, - "learning_rate": 4.778852088047727e-05, - "loss": 1.138, - "step": 7500 - }, - { - "epoch": 0.77, - "learning_rate": 4.737708290475211e-05, - "loss": 1.1537, - "step": 7520 - }, - { - "epoch": 0.77, - "learning_rate": 4.696564492902695e-05, - "loss": 1.1377, - "step": 7540 - }, - { - "epoch": 0.77, - "learning_rate": 4.6554206953301794e-05, - "loss": 1.0849, - "step": 7560 - }, - { - "epoch": 0.77, - "learning_rate": 4.6142768977576636e-05, - "loss": 1.1445, - "step": 7580 - }, - { - "epoch": 0.77, - "learning_rate": 4.573133100185148e-05, - "loss": 1.1293, - "step": 7600 - }, - { - "epoch": 0.78, - "learning_rate": 4.531989302612631e-05, - "loss": 1.1038, - "step": 7620 - }, - { - "epoch": 0.78, - "learning_rate": 4.490845505040115e-05, - "loss": 1.1, - "step": 7640 - }, - { - "epoch": 0.78, - "learning_rate": 4.4497017074675994e-05, - "loss": 1.1133, - "step": 7660 - }, - { - "epoch": 0.78, - "learning_rate": 4.4085579098950836e-05, - "loss": 1.1537, - "step": 7680 - }, - { - "epoch": 0.78, - "learning_rate": 4.367414112322568e-05, - "loss": 1.118, - "step": 7700 - }, - { - "epoch": 0.79, - "learning_rate": 4.326270314750052e-05, - "loss": 1.1545, - "step": 7720 - }, - { - "epoch": 0.79, - "learning_rate": 4.285126517177536e-05, - "loss": 1.1473, - "step": 7740 - }, - { - "epoch": 0.79, - "learning_rate": 4.2439827196050194e-05, - "loss": 1.1389, - "step": 7760 - }, - { - "epoch": 0.79, - "learning_rate": 4.2028389220325035e-05, - "loss": 1.1767, - "step": 7780 - }, - { - "epoch": 0.79, - "learning_rate": 4.161695124459988e-05, - "loss": 1.0442, - "step": 7800 - }, - { - "epoch": 0.8, - "learning_rate": 4.120551326887472e-05, - "loss": 1.1195, - "step": 7820 - }, - { - "epoch": 0.8, - "learning_rate": 4.079407529314956e-05, - "loss": 1.1136, - "step": 7840 - }, - { - "epoch": 0.8, - "learning_rate": 4.03826373174244e-05, - "loss": 1.0853, - "step": 7860 - }, - { - "epoch": 0.8, - "learning_rate": 3.997119934169924e-05, - "loss": 1.1172, - "step": 7880 - }, - { - "epoch": 0.8, - "learning_rate": 3.955976136597408e-05, - "loss": 1.1025, - "step": 7900 - }, - { - "epoch": 0.81, - "learning_rate": 3.914832339024892e-05, - "loss": 1.1266, - "step": 7920 - }, - { - "epoch": 0.81, - "learning_rate": 3.873688541452376e-05, - "loss": 1.0913, - "step": 7940 - }, - { - "epoch": 0.81, - "learning_rate": 3.83254474387986e-05, - "loss": 1.1448, - "step": 7960 - }, - { - "epoch": 0.81, - "learning_rate": 3.791400946307344e-05, - "loss": 1.1111, - "step": 7980 - }, - { - "epoch": 0.81, - "learning_rate": 3.750257148734828e-05, - "loss": 1.1456, - "step": 8000 - }, - { - "epoch": 0.82, - "learning_rate": 3.7091133511623125e-05, - "loss": 1.1447, - "step": 8020 - }, - { - "epoch": 0.82, - "learning_rate": 3.6679695535897966e-05, - "loss": 1.0768, - "step": 8040 - }, - { - "epoch": 0.82, - "learning_rate": 3.626825756017281e-05, - "loss": 1.1319, - "step": 8060 - }, - { - "epoch": 0.82, - "learning_rate": 3.585681958444765e-05, - "loss": 1.1178, - "step": 8080 - }, - { - "epoch": 0.82, - "learning_rate": 3.544538160872249e-05, - "loss": 1.1002, - "step": 8100 - }, - { - "epoch": 0.83, - "learning_rate": 3.503394363299733e-05, - "loss": 1.1129, - "step": 8120 - }, - { - "epoch": 0.83, - "learning_rate": 3.462250565727217e-05, - "loss": 1.1278, - "step": 8140 - }, - { - "epoch": 0.83, - "learning_rate": 3.421106768154701e-05, - "loss": 1.13, - "step": 8160 - }, - { - "epoch": 0.83, - "learning_rate": 3.379962970582185e-05, - "loss": 1.1349, - "step": 8180 - }, - { - "epoch": 0.83, - "learning_rate": 3.338819173009669e-05, - "loss": 1.0957, - "step": 8200 - }, - { - "epoch": 0.84, - "learning_rate": 3.297675375437153e-05, - "loss": 1.1363, - "step": 8220 - }, - { - "epoch": 0.84, - "learning_rate": 3.256531577864637e-05, - "loss": 1.0767, - "step": 8240 - }, - { - "epoch": 0.84, - "learning_rate": 3.2153877802921214e-05, - "loss": 1.1356, - "step": 8260 - }, - { - "epoch": 0.84, - "learning_rate": 3.1742439827196055e-05, - "loss": 1.0676, - "step": 8280 - }, - { - "epoch": 0.85, - "learning_rate": 3.133100185147089e-05, - "loss": 1.1433, - "step": 8300 - }, - { - "epoch": 0.85, - "learning_rate": 3.091956387574573e-05, - "loss": 1.1239, - "step": 8320 - }, - { - "epoch": 0.85, - "learning_rate": 3.0508125900020572e-05, - "loss": 1.104, - "step": 8340 - }, - { - "epoch": 0.85, - "learning_rate": 3.0096687924295414e-05, - "loss": 1.1115, - "step": 8360 - }, - { - "epoch": 0.85, - "learning_rate": 2.9685249948570255e-05, - "loss": 1.0992, - "step": 8380 - }, - { - "epoch": 0.86, - "learning_rate": 2.9273811972845093e-05, - "loss": 1.087, - "step": 8400 - }, - { - "epoch": 0.86, - "learning_rate": 2.8862373997119934e-05, - "loss": 1.0881, - "step": 8420 - }, - { - "epoch": 0.86, - "learning_rate": 2.8450936021394776e-05, - "loss": 1.1266, - "step": 8440 - }, - { - "epoch": 0.86, - "learning_rate": 2.8060069944455876e-05, - "loss": 1.1355, - "step": 8460 - }, - { - "epoch": 0.86, - "learning_rate": 2.7648631968730714e-05, - "loss": 1.1368, - "step": 8480 - }, - { - "epoch": 0.87, - "learning_rate": 2.7237193993005555e-05, - "loss": 1.0835, - "step": 8500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 1.2323865726560256e+17, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-8500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/adapter_model.bin deleted file mode 100644 index 532687cdafb9bc31779ce7266c6bd0bdfa25eb11..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d3118cc24eb31d59762a753ed55afd206eb06ffbadc4ccdfd9246abba4545744 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/optimizer.pt deleted file mode 100644 index fe364e0d89dbf2be2eaf22e045d3daf6a571b7d8..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0441684e913f40451356d9a953f7d422d776767922127eb74b2b37b8b0080f11 -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/rng_state.pth deleted file mode 100644 index 9a8b72dae13e6b9e41b62d28b1bb935075950c94..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cdb5b215ae18709a220a8a18b4257e3c4036fea3e04496781e4963d6a9643c7b -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/scheduler.pt deleted file mode 100644 index be50251da9c3d53b38e34bb28867d1b88a5e4e7e..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ea4166c9bbb0879e53b678ba664ca9d4593877116ffdf746c14d04aac348c162 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/trainer_state.json deleted file mode 100644 index 4ef4739873702a0d2161c0f3657d13876e917af2..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/trainer_state.json +++ /dev/null @@ -1,2719 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9162986624584802, - "eval_steps": 500, - "global_step": 9000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - }, - { - "epoch": 0.51, - "learning_rate": 9.880682987039704e-05, - "loss": 1.1907, - "step": 5020 - }, - { - "epoch": 0.51, - "learning_rate": 9.839539189467189e-05, - "loss": 1.1597, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 9.798395391894672e-05, - "loss": 1.1622, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 9.757251594322157e-05, - "loss": 1.1307, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 9.71610779674964e-05, - "loss": 1.1226, - "step": 5100 - }, - { - "epoch": 0.52, - "learning_rate": 9.674963999177124e-05, - "loss": 1.1567, - "step": 5120 - }, - { - "epoch": 0.52, - "learning_rate": 9.633820201604609e-05, - "loss": 1.1353, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 9.592676404032092e-05, - "loss": 1.1414, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 9.551532606459577e-05, - "loss": 1.1383, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 9.51038880888706e-05, - "loss": 1.1997, - "step": 5200 - }, - { - "epoch": 0.53, - "learning_rate": 9.469245011314545e-05, - "loss": 1.187, - "step": 5220 - }, - { - "epoch": 0.53, - "learning_rate": 9.428101213742029e-05, - "loss": 1.1302, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 9.386957416169512e-05, - "loss": 1.1718, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 9.345813618596997e-05, - "loss": 1.1594, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 9.30466982102448e-05, - "loss": 1.163, - "step": 5300 - }, - { - "epoch": 0.54, - "learning_rate": 9.263526023451965e-05, - "loss": 1.1381, - "step": 5320 - }, - { - "epoch": 0.54, - "learning_rate": 9.222382225879449e-05, - "loss": 1.1134, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 9.181238428306934e-05, - "loss": 1.1548, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 9.140094630734417e-05, - "loss": 1.1535, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 9.0989508331619e-05, - "loss": 1.164, - "step": 5400 - }, - { - "epoch": 0.55, - "learning_rate": 9.057807035589385e-05, - "loss": 1.1448, - "step": 5420 - }, - { - "epoch": 0.55, - "learning_rate": 9.016663238016869e-05, - "loss": 1.1273, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 8.975519440444354e-05, - "loss": 1.1406, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 8.934375642871837e-05, - "loss": 1.0838, - "step": 5480 - }, - { - "epoch": 0.56, - "learning_rate": 8.893231845299322e-05, - "loss": 1.1039, - "step": 5500 - }, - { - "epoch": 0.56, - "learning_rate": 8.852088047726805e-05, - "loss": 1.0586, - "step": 5520 - }, - { - "epoch": 0.56, - "learning_rate": 8.810944250154289e-05, - "loss": 1.1216, - "step": 5540 - }, - { - "epoch": 0.57, - "learning_rate": 8.769800452581774e-05, - "loss": 1.1089, - "step": 5560 - }, - { - "epoch": 0.57, - "learning_rate": 8.728656655009257e-05, - "loss": 1.1512, - "step": 5580 - }, - { - "epoch": 0.57, - "learning_rate": 8.687512857436742e-05, - "loss": 1.1494, - "step": 5600 - }, - { - "epoch": 0.57, - "learning_rate": 8.646369059864225e-05, - "loss": 1.1401, - "step": 5620 - }, - { - "epoch": 0.57, - "learning_rate": 8.60522526229171e-05, - "loss": 1.1531, - "step": 5640 - }, - { - "epoch": 0.58, - "learning_rate": 8.564081464719194e-05, - "loss": 1.1129, - "step": 5660 - }, - { - "epoch": 0.58, - "learning_rate": 8.522937667146677e-05, - "loss": 1.1592, - "step": 5680 - }, - { - "epoch": 0.58, - "learning_rate": 8.481793869574162e-05, - "loss": 1.1192, - "step": 5700 - }, - { - "epoch": 0.58, - "learning_rate": 8.440650072001645e-05, - "loss": 1.1473, - "step": 5720 - }, - { - "epoch": 0.58, - "learning_rate": 8.39950627442913e-05, - "loss": 1.1218, - "step": 5740 - }, - { - "epoch": 0.59, - "learning_rate": 8.358362476856614e-05, - "loss": 1.1342, - "step": 5760 - }, - { - "epoch": 0.59, - "learning_rate": 8.317218679284098e-05, - "loss": 1.1129, - "step": 5780 - }, - { - "epoch": 0.59, - "learning_rate": 8.276074881711582e-05, - "loss": 1.1054, - "step": 5800 - }, - { - "epoch": 0.59, - "learning_rate": 8.234931084139065e-05, - "loss": 1.1019, - "step": 5820 - }, - { - "epoch": 0.59, - "learning_rate": 8.193787286566551e-05, - "loss": 1.1857, - "step": 5840 - }, - { - "epoch": 0.6, - "learning_rate": 8.152643488994035e-05, - "loss": 1.165, - "step": 5860 - }, - { - "epoch": 0.6, - "learning_rate": 8.11149969142152e-05, - "loss": 1.1514, - "step": 5880 - }, - { - "epoch": 0.6, - "learning_rate": 8.070355893849003e-05, - "loss": 1.1565, - "step": 5900 - }, - { - "epoch": 0.6, - "learning_rate": 8.029212096276487e-05, - "loss": 1.1252, - "step": 5920 - }, - { - "epoch": 0.6, - "learning_rate": 7.988068298703971e-05, - "loss": 1.129, - "step": 5940 - }, - { - "epoch": 0.61, - "learning_rate": 7.946924501131455e-05, - "loss": 1.1471, - "step": 5960 - }, - { - "epoch": 0.61, - "learning_rate": 7.90578070355894e-05, - "loss": 1.1697, - "step": 5980 - }, - { - "epoch": 0.61, - "learning_rate": 7.864636905986423e-05, - "loss": 1.0938, - "step": 6000 - }, - { - "epoch": 0.61, - "learning_rate": 7.823493108413908e-05, - "loss": 1.1911, - "step": 6020 - }, - { - "epoch": 0.61, - "learning_rate": 7.782349310841391e-05, - "loss": 1.0867, - "step": 6040 - }, - { - "epoch": 0.62, - "learning_rate": 7.741205513268875e-05, - "loss": 1.1008, - "step": 6060 - }, - { - "epoch": 0.62, - "learning_rate": 7.70006171569636e-05, - "loss": 1.1364, - "step": 6080 - }, - { - "epoch": 0.62, - "learning_rate": 7.658917918123843e-05, - "loss": 1.1343, - "step": 6100 - }, - { - "epoch": 0.62, - "learning_rate": 7.617774120551328e-05, - "loss": 1.1324, - "step": 6120 - }, - { - "epoch": 0.63, - "learning_rate": 7.576630322978811e-05, - "loss": 1.1251, - "step": 6140 - }, - { - "epoch": 0.63, - "learning_rate": 7.535486525406296e-05, - "loss": 1.1371, - "step": 6160 - }, - { - "epoch": 0.63, - "learning_rate": 7.49434272783378e-05, - "loss": 1.0772, - "step": 6180 - }, - { - "epoch": 0.63, - "learning_rate": 7.453198930261263e-05, - "loss": 1.147, - "step": 6200 - }, - { - "epoch": 0.63, - "learning_rate": 7.412055132688748e-05, - "loss": 1.1028, - "step": 6220 - }, - { - "epoch": 0.64, - "learning_rate": 7.370911335116231e-05, - "loss": 1.1747, - "step": 6240 - }, - { - "epoch": 0.64, - "learning_rate": 7.329767537543716e-05, - "loss": 1.1088, - "step": 6260 - }, - { - "epoch": 0.64, - "learning_rate": 7.2886237399712e-05, - "loss": 1.1395, - "step": 6280 - }, - { - "epoch": 0.64, - "learning_rate": 7.247479942398684e-05, - "loss": 1.1626, - "step": 6300 - }, - { - "epoch": 0.64, - "learning_rate": 7.206336144826168e-05, - "loss": 1.1437, - "step": 6320 - }, - { - "epoch": 0.65, - "learning_rate": 7.165192347253651e-05, - "loss": 1.1134, - "step": 6340 - }, - { - "epoch": 0.65, - "learning_rate": 7.124048549681136e-05, - "loss": 1.2004, - "step": 6360 - }, - { - "epoch": 0.65, - "learning_rate": 7.08290475210862e-05, - "loss": 1.0999, - "step": 6380 - }, - { - "epoch": 0.65, - "learning_rate": 7.041760954536104e-05, - "loss": 1.0905, - "step": 6400 - }, - { - "epoch": 0.65, - "learning_rate": 7.000617156963588e-05, - "loss": 1.0895, - "step": 6420 - }, - { - "epoch": 0.66, - "learning_rate": 6.959473359391073e-05, - "loss": 1.1287, - "step": 6440 - }, - { - "epoch": 0.66, - "learning_rate": 6.918329561818556e-05, - "loss": 1.1531, - "step": 6460 - }, - { - "epoch": 0.66, - "learning_rate": 6.87718576424604e-05, - "loss": 1.1333, - "step": 6480 - }, - { - "epoch": 0.66, - "learning_rate": 6.836041966673524e-05, - "loss": 1.1603, - "step": 6500 - }, - { - "epoch": 0.66, - "learning_rate": 6.794898169101008e-05, - "loss": 1.1131, - "step": 6520 - }, - { - "epoch": 0.67, - "learning_rate": 6.753754371528493e-05, - "loss": 1.1459, - "step": 6540 - }, - { - "epoch": 0.67, - "learning_rate": 6.712610573955976e-05, - "loss": 1.1554, - "step": 6560 - }, - { - "epoch": 0.67, - "learning_rate": 6.671466776383461e-05, - "loss": 1.0908, - "step": 6580 - }, - { - "epoch": 0.67, - "learning_rate": 6.630322978810944e-05, - "loss": 1.1529, - "step": 6600 - }, - { - "epoch": 0.67, - "learning_rate": 6.589179181238428e-05, - "loss": 1.1465, - "step": 6620 - }, - { - "epoch": 0.68, - "learning_rate": 6.548035383665913e-05, - "loss": 1.1097, - "step": 6640 - }, - { - "epoch": 0.68, - "learning_rate": 6.506891586093396e-05, - "loss": 1.1133, - "step": 6660 - }, - { - "epoch": 0.68, - "learning_rate": 6.465747788520881e-05, - "loss": 1.1193, - "step": 6680 - }, - { - "epoch": 0.68, - "learning_rate": 6.424603990948364e-05, - "loss": 1.0739, - "step": 6700 - }, - { - "epoch": 0.68, - "learning_rate": 6.383460193375849e-05, - "loss": 1.149, - "step": 6720 - }, - { - "epoch": 0.69, - "learning_rate": 6.342316395803333e-05, - "loss": 1.1588, - "step": 6740 - }, - { - "epoch": 0.69, - "learning_rate": 6.301172598230816e-05, - "loss": 1.1533, - "step": 6760 - }, - { - "epoch": 0.69, - "learning_rate": 6.260028800658301e-05, - "loss": 1.1436, - "step": 6780 - }, - { - "epoch": 0.69, - "learning_rate": 6.218885003085784e-05, - "loss": 1.162, - "step": 6800 - }, - { - "epoch": 0.69, - "learning_rate": 6.177741205513269e-05, - "loss": 1.1584, - "step": 6820 - }, - { - "epoch": 0.7, - "learning_rate": 6.136597407940753e-05, - "loss": 1.1081, - "step": 6840 - }, - { - "epoch": 0.7, - "learning_rate": 6.095453610368237e-05, - "loss": 1.1222, - "step": 6860 - }, - { - "epoch": 0.7, - "learning_rate": 6.054309812795721e-05, - "loss": 1.1222, - "step": 6880 - }, - { - "epoch": 0.7, - "learning_rate": 6.013166015223205e-05, - "loss": 1.0933, - "step": 6900 - }, - { - "epoch": 0.7, - "learning_rate": 5.972022217650689e-05, - "loss": 1.1053, - "step": 6920 - }, - { - "epoch": 0.71, - "learning_rate": 5.930878420078173e-05, - "loss": 1.0899, - "step": 6940 - }, - { - "epoch": 0.71, - "learning_rate": 5.889734622505657e-05, - "loss": 1.1284, - "step": 6960 - }, - { - "epoch": 0.71, - "learning_rate": 5.848590824933141e-05, - "loss": 1.1263, - "step": 6980 - }, - { - "epoch": 0.71, - "learning_rate": 5.807447027360625e-05, - "loss": 1.089, - "step": 7000 - }, - { - "epoch": 0.71, - "learning_rate": 5.766303229788109e-05, - "loss": 1.0758, - "step": 7020 - }, - { - "epoch": 0.72, - "learning_rate": 5.725159432215593e-05, - "loss": 1.1267, - "step": 7040 - }, - { - "epoch": 0.72, - "learning_rate": 5.6840156346430775e-05, - "loss": 1.1073, - "step": 7060 - }, - { - "epoch": 0.72, - "learning_rate": 5.6428718370705616e-05, - "loss": 1.1094, - "step": 7080 - }, - { - "epoch": 0.72, - "learning_rate": 5.6017280394980464e-05, - "loss": 1.1017, - "step": 7100 - }, - { - "epoch": 0.72, - "learning_rate": 5.5605842419255305e-05, - "loss": 1.1348, - "step": 7120 - }, - { - "epoch": 0.73, - "learning_rate": 5.519440444353015e-05, - "loss": 1.0678, - "step": 7140 - }, - { - "epoch": 0.73, - "learning_rate": 5.478296646780499e-05, - "loss": 1.1314, - "step": 7160 - }, - { - "epoch": 0.73, - "learning_rate": 5.437152849207983e-05, - "loss": 1.1256, - "step": 7180 - }, - { - "epoch": 0.73, - "learning_rate": 5.3960090516354664e-05, - "loss": 1.1377, - "step": 7200 - }, - { - "epoch": 0.74, - "learning_rate": 5.3548652540629505e-05, - "loss": 1.1295, - "step": 7220 - }, - { - "epoch": 0.74, - "learning_rate": 5.3137214564904347e-05, - "loss": 1.1023, - "step": 7240 - }, - { - "epoch": 0.74, - "learning_rate": 5.272577658917919e-05, - "loss": 1.1027, - "step": 7260 - }, - { - "epoch": 0.74, - "learning_rate": 5.231433861345403e-05, - "loss": 1.1238, - "step": 7280 - }, - { - "epoch": 0.74, - "learning_rate": 5.190290063772887e-05, - "loss": 1.0967, - "step": 7300 - }, - { - "epoch": 0.75, - "learning_rate": 5.149146266200371e-05, - "loss": 1.1286, - "step": 7320 - }, - { - "epoch": 0.75, - "learning_rate": 5.1080024686278546e-05, - "loss": 1.1418, - "step": 7340 - }, - { - "epoch": 0.75, - "learning_rate": 5.066858671055339e-05, - "loss": 1.1022, - "step": 7360 - }, - { - "epoch": 0.75, - "learning_rate": 5.025714873482823e-05, - "loss": 1.1195, - "step": 7380 - }, - { - "epoch": 0.75, - "learning_rate": 4.984571075910307e-05, - "loss": 1.125, - "step": 7400 - }, - { - "epoch": 0.76, - "learning_rate": 4.943427278337791e-05, - "loss": 1.0951, - "step": 7420 - }, - { - "epoch": 0.76, - "learning_rate": 4.902283480765275e-05, - "loss": 1.1049, - "step": 7440 - }, - { - "epoch": 0.76, - "learning_rate": 4.8611396831927594e-05, - "loss": 1.1609, - "step": 7460 - }, - { - "epoch": 0.76, - "learning_rate": 4.819995885620243e-05, - "loss": 1.0884, - "step": 7480 - }, - { - "epoch": 0.76, - "learning_rate": 4.778852088047727e-05, - "loss": 1.138, - "step": 7500 - }, - { - "epoch": 0.77, - "learning_rate": 4.737708290475211e-05, - "loss": 1.1537, - "step": 7520 - }, - { - "epoch": 0.77, - "learning_rate": 4.696564492902695e-05, - "loss": 1.1377, - "step": 7540 - }, - { - "epoch": 0.77, - "learning_rate": 4.6554206953301794e-05, - "loss": 1.0849, - "step": 7560 - }, - { - "epoch": 0.77, - "learning_rate": 4.6142768977576636e-05, - "loss": 1.1445, - "step": 7580 - }, - { - "epoch": 0.77, - "learning_rate": 4.573133100185148e-05, - "loss": 1.1293, - "step": 7600 - }, - { - "epoch": 0.78, - "learning_rate": 4.531989302612631e-05, - "loss": 1.1038, - "step": 7620 - }, - { - "epoch": 0.78, - "learning_rate": 4.490845505040115e-05, - "loss": 1.1, - "step": 7640 - }, - { - "epoch": 0.78, - "learning_rate": 4.4497017074675994e-05, - "loss": 1.1133, - "step": 7660 - }, - { - "epoch": 0.78, - "learning_rate": 4.4085579098950836e-05, - "loss": 1.1537, - "step": 7680 - }, - { - "epoch": 0.78, - "learning_rate": 4.367414112322568e-05, - "loss": 1.118, - "step": 7700 - }, - { - "epoch": 0.79, - "learning_rate": 4.326270314750052e-05, - "loss": 1.1545, - "step": 7720 - }, - { - "epoch": 0.79, - "learning_rate": 4.285126517177536e-05, - "loss": 1.1473, - "step": 7740 - }, - { - "epoch": 0.79, - "learning_rate": 4.2439827196050194e-05, - "loss": 1.1389, - "step": 7760 - }, - { - "epoch": 0.79, - "learning_rate": 4.2028389220325035e-05, - "loss": 1.1767, - "step": 7780 - }, - { - "epoch": 0.79, - "learning_rate": 4.161695124459988e-05, - "loss": 1.0442, - "step": 7800 - }, - { - "epoch": 0.8, - "learning_rate": 4.120551326887472e-05, - "loss": 1.1195, - "step": 7820 - }, - { - "epoch": 0.8, - "learning_rate": 4.079407529314956e-05, - "loss": 1.1136, - "step": 7840 - }, - { - "epoch": 0.8, - "learning_rate": 4.03826373174244e-05, - "loss": 1.0853, - "step": 7860 - }, - { - "epoch": 0.8, - "learning_rate": 3.997119934169924e-05, - "loss": 1.1172, - "step": 7880 - }, - { - "epoch": 0.8, - "learning_rate": 3.955976136597408e-05, - "loss": 1.1025, - "step": 7900 - }, - { - "epoch": 0.81, - "learning_rate": 3.914832339024892e-05, - "loss": 1.1266, - "step": 7920 - }, - { - "epoch": 0.81, - "learning_rate": 3.873688541452376e-05, - "loss": 1.0913, - "step": 7940 - }, - { - "epoch": 0.81, - "learning_rate": 3.83254474387986e-05, - "loss": 1.1448, - "step": 7960 - }, - { - "epoch": 0.81, - "learning_rate": 3.791400946307344e-05, - "loss": 1.1111, - "step": 7980 - }, - { - "epoch": 0.81, - "learning_rate": 3.750257148734828e-05, - "loss": 1.1456, - "step": 8000 - }, - { - "epoch": 0.82, - "learning_rate": 3.7091133511623125e-05, - "loss": 1.1447, - "step": 8020 - }, - { - "epoch": 0.82, - "learning_rate": 3.6679695535897966e-05, - "loss": 1.0768, - "step": 8040 - }, - { - "epoch": 0.82, - "learning_rate": 3.626825756017281e-05, - "loss": 1.1319, - "step": 8060 - }, - { - "epoch": 0.82, - "learning_rate": 3.585681958444765e-05, - "loss": 1.1178, - "step": 8080 - }, - { - "epoch": 0.82, - "learning_rate": 3.544538160872249e-05, - "loss": 1.1002, - "step": 8100 - }, - { - "epoch": 0.83, - "learning_rate": 3.503394363299733e-05, - "loss": 1.1129, - "step": 8120 - }, - { - "epoch": 0.83, - "learning_rate": 3.462250565727217e-05, - "loss": 1.1278, - "step": 8140 - }, - { - "epoch": 0.83, - "learning_rate": 3.421106768154701e-05, - "loss": 1.13, - "step": 8160 - }, - { - "epoch": 0.83, - "learning_rate": 3.379962970582185e-05, - "loss": 1.1349, - "step": 8180 - }, - { - "epoch": 0.83, - "learning_rate": 3.338819173009669e-05, - "loss": 1.0957, - "step": 8200 - }, - { - "epoch": 0.84, - "learning_rate": 3.297675375437153e-05, - "loss": 1.1363, - "step": 8220 - }, - { - "epoch": 0.84, - "learning_rate": 3.256531577864637e-05, - "loss": 1.0767, - "step": 8240 - }, - { - "epoch": 0.84, - "learning_rate": 3.2153877802921214e-05, - "loss": 1.1356, - "step": 8260 - }, - { - "epoch": 0.84, - "learning_rate": 3.1742439827196055e-05, - "loss": 1.0676, - "step": 8280 - }, - { - "epoch": 0.85, - "learning_rate": 3.133100185147089e-05, - "loss": 1.1433, - "step": 8300 - }, - { - "epoch": 0.85, - "learning_rate": 3.091956387574573e-05, - "loss": 1.1239, - "step": 8320 - }, - { - "epoch": 0.85, - "learning_rate": 3.0508125900020572e-05, - "loss": 1.104, - "step": 8340 - }, - { - "epoch": 0.85, - "learning_rate": 3.0096687924295414e-05, - "loss": 1.1115, - "step": 8360 - }, - { - "epoch": 0.85, - "learning_rate": 2.9685249948570255e-05, - "loss": 1.0992, - "step": 8380 - }, - { - "epoch": 0.86, - "learning_rate": 2.9273811972845093e-05, - "loss": 1.087, - "step": 8400 - }, - { - "epoch": 0.86, - "learning_rate": 2.8862373997119934e-05, - "loss": 1.0881, - "step": 8420 - }, - { - "epoch": 0.86, - "learning_rate": 2.8450936021394776e-05, - "loss": 1.1266, - "step": 8440 - }, - { - "epoch": 0.86, - "learning_rate": 2.8060069944455876e-05, - "loss": 1.1355, - "step": 8460 - }, - { - "epoch": 0.86, - "learning_rate": 2.7648631968730714e-05, - "loss": 1.1368, - "step": 8480 - }, - { - "epoch": 0.87, - "learning_rate": 2.7237193993005555e-05, - "loss": 1.0835, - "step": 8500 - }, - { - "epoch": 0.87, - "learning_rate": 2.6825756017280397e-05, - "loss": 1.1137, - "step": 8520 - }, - { - "epoch": 0.87, - "learning_rate": 2.6414318041555235e-05, - "loss": 1.0783, - "step": 8540 - }, - { - "epoch": 0.87, - "learning_rate": 2.6002880065830076e-05, - "loss": 1.1114, - "step": 8560 - }, - { - "epoch": 0.87, - "learning_rate": 2.5591442090104917e-05, - "loss": 1.1286, - "step": 8580 - }, - { - "epoch": 0.88, - "learning_rate": 2.518000411437976e-05, - "loss": 1.1531, - "step": 8600 - }, - { - "epoch": 0.88, - "learning_rate": 2.47685661386546e-05, - "loss": 1.0784, - "step": 8620 - }, - { - "epoch": 0.88, - "learning_rate": 2.435712816292944e-05, - "loss": 1.1358, - "step": 8640 - }, - { - "epoch": 0.88, - "learning_rate": 2.3945690187204283e-05, - "loss": 1.1323, - "step": 8660 - }, - { - "epoch": 0.88, - "learning_rate": 2.353425221147912e-05, - "loss": 1.154, - "step": 8680 - }, - { - "epoch": 0.89, - "learning_rate": 2.3122814235753962e-05, - "loss": 1.103, - "step": 8700 - }, - { - "epoch": 0.89, - "learning_rate": 2.2711376260028803e-05, - "loss": 1.1319, - "step": 8720 - }, - { - "epoch": 0.89, - "learning_rate": 2.229993828430364e-05, - "loss": 1.1118, - "step": 8740 - }, - { - "epoch": 0.89, - "learning_rate": 2.1888500308578482e-05, - "loss": 1.0364, - "step": 8760 - }, - { - "epoch": 0.89, - "learning_rate": 2.1477062332853324e-05, - "loss": 1.155, - "step": 8780 - }, - { - "epoch": 0.9, - "learning_rate": 2.1065624357128165e-05, - "loss": 1.1201, - "step": 8800 - }, - { - "epoch": 0.9, - "learning_rate": 2.0654186381403003e-05, - "loss": 1.1045, - "step": 8820 - }, - { - "epoch": 0.9, - "learning_rate": 2.0242748405677844e-05, - "loss": 1.0828, - "step": 8840 - }, - { - "epoch": 0.9, - "learning_rate": 1.9831310429952686e-05, - "loss": 1.0452, - "step": 8860 - }, - { - "epoch": 0.9, - "learning_rate": 1.9419872454227524e-05, - "loss": 1.0463, - "step": 8880 - }, - { - "epoch": 0.91, - "learning_rate": 1.9008434478502365e-05, - "loss": 1.1709, - "step": 8900 - }, - { - "epoch": 0.91, - "learning_rate": 1.8596996502777206e-05, - "loss": 1.0854, - "step": 8920 - }, - { - "epoch": 0.91, - "learning_rate": 1.8185558527052048e-05, - "loss": 1.1362, - "step": 8940 - }, - { - "epoch": 0.91, - "learning_rate": 1.777412055132689e-05, - "loss": 1.1538, - "step": 8960 - }, - { - "epoch": 0.91, - "learning_rate": 1.736268257560173e-05, - "loss": 1.1125, - "step": 8980 - }, - { - "epoch": 0.92, - "learning_rate": 1.695124459987657e-05, - "loss": 1.1158, - "step": 9000 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 1.3047142652596224e+17, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/README.md b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/README.md deleted file mode 100644 index fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/README.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -library_name: peft ---- -## Training procedure - - -The following `bitsandbytes` quantization config was used during training: -- quant_method: QuantizationMethod.BITS_AND_BYTES -- load_in_8bit: False -- load_in_4bit: True -- llm_int8_threshold: 6.0 -- llm_int8_skip_modules: None -- llm_int8_enable_fp32_cpu_offload: False -- llm_int8_has_fp16_weight: False -- bnb_4bit_quant_type: nf4 -- bnb_4bit_use_double_quant: True -- bnb_4bit_compute_dtype: bfloat16 -### Framework versions - - -- PEFT 0.4.0.dev0 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/adapter_config.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/adapter_config.json deleted file mode 100644 index 9e6628421df1d22ee7942d475c44f6682641941d..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/adapter_config.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "base_model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf", - "bias": "none", - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layers_pattern": null, - "layers_to_transform": null, - "lora_alpha": 32, - "lora_dropout": 0.05, - "modules_to_save": null, - "peft_type": "LORA", - "r": 8, - "revision": null, - "target_modules": [ - "q_proj", - "k_proj", - "v_proj" - ], - "task_type": "CAUSAL_LM" -} \ No newline at end of file diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/adapter_model.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/adapter_model.bin deleted file mode 100644 index bc06e40fae415e8550b784bd8160b9a2d5f6f8de..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/adapter_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:13b55e95bf00fe977152e7afae49e86337b3ad5a9393b029264e85927daad122 -size 39407821 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/optimizer.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/optimizer.pt deleted file mode 100644 index c89d15444dd3c80019ef3cb472b32d570815a61b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2af63cad2780eed2819e9522944436e4f24656a540b537a4172a7ad2ff6312e -size 78844165 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/rng_state.pth b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/rng_state.pth deleted file mode 100644 index f16785215a6bf961a7f59fc9fde8b61e6d9b68b5..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:68443aa7344925fc862fd23f2eefd699b0093c6c653bf835b86a60d9c479ec31 -size 14575 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/scheduler.pt b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/scheduler.pt deleted file mode 100644 index 8b5595c84503cba39d7b275bff9a23a93296f611..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c15e058da0b73cddea7ff3a17bb4f6b441a02341ae2741cf889640287ecd3a06 -size 627 diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/trainer_state.json b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/trainer_state.json deleted file mode 100644 index 5fd6623156127b767454f6d4e2b698df874abe69..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/trainer_state.json +++ /dev/null @@ -1,2869 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 0.9672041437061736, - "eval_steps": 500, - "global_step": 9500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 4e-05, - "loss": 3.5766, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 8e-05, - "loss": 2.9038, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 0.00012, - "loss": 1.9072, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 0.00016, - "loss": 1.7359, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 0.0002, - "loss": 1.5349, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019958856202427486, - "loss": 1.4908, - "step": 120 - }, - { - "epoch": 0.01, - "learning_rate": 0.00019917712404854968, - "loss": 1.4931, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019876568607282453, - "loss": 1.4201, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019835424809709937, - "loss": 1.4541, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019794281012137422, - "loss": 1.4102, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 0.00019753137214564904, - "loss": 1.3861, - "step": 220 - }, - { - "epoch": 0.02, - "learning_rate": 0.0001971199341699239, - "loss": 1.3939, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019670849619419874, - "loss": 1.3883, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019629705821847356, - "loss": 1.3257, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001958856202427484, - "loss": 1.386, - "step": 300 - }, - { - "epoch": 0.03, - "learning_rate": 0.00019547418226702326, - "loss": 1.3746, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 0.0001950627442912981, - "loss": 1.3266, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019465130631557293, - "loss": 1.3591, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019423986833984777, - "loss": 1.3464, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019382843036412262, - "loss": 1.3573, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 0.00019341699238839744, - "loss": 1.3455, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 0.0001930055544126723, - "loss": 1.3223, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019259411643694714, - "loss": 1.3293, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 0.000192182678461222, - "loss": 1.3033, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001917712404854968, - "loss": 1.3323, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 0.00019135980250977166, - "loss": 1.2838, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 0.0001909483645340465, - "loss": 1.332, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019053692655832133, - "loss": 1.3118, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 0.00019012548858259617, - "loss": 1.3237, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018971405060687102, - "loss": 1.2541, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00018930261263114587, - "loss": 1.28, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001888911746554207, - "loss": 1.231, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018847973667969554, - "loss": 1.2676, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001880682987039704, - "loss": 1.2909, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 0.0001876568607282452, - "loss": 1.2499, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 0.00018724542275252006, - "loss": 1.2679, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 0.0001868339847767949, - "loss": 1.2674, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018642254680106975, - "loss": 1.2736, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018601110882534457, - "loss": 1.2843, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018559967084961942, - "loss": 1.281, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00018518823287389427, - "loss": 1.3699, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001847767948981691, - "loss": 1.2705, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018436535692244394, - "loss": 1.2279, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 0.0001839539189467188, - "loss": 1.2779, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018354248097099364, - "loss": 1.2086, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 0.00018313104299526846, - "loss": 1.2999, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 0.0001827196050195433, - "loss": 1.2503, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018230816704381815, - "loss": 1.2466, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018189672906809297, - "loss": 1.2113, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018148529109236782, - "loss": 1.2356, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 0.00018107385311664267, - "loss": 1.2631, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018066241514091752, - "loss": 1.2443, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 0.00018025097716519234, - "loss": 1.2406, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 0.0001798395391894672, - "loss": 1.2308, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017942810121374204, - "loss": 1.2649, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 0.00017901666323801686, - "loss": 1.2263, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001786052252622917, - "loss": 1.2869, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017819378728656655, - "loss": 1.2255, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 0.0001777823493108414, - "loss": 1.2596, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017737091133511622, - "loss": 1.2748, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 0.00017695947335939107, - "loss": 1.2587, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017654803538366592, - "loss": 1.2651, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017613659740794074, - "loss": 1.2659, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 0.0001757251594322156, - "loss": 1.2077, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017531372145649044, - "loss": 1.25, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 0.00017490228348076528, - "loss": 1.2149, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001744908455050401, - "loss": 1.2417, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017407940752931498, - "loss": 1.1939, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017366796955358983, - "loss": 1.2688, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 0.00017325653157786465, - "loss": 1.2287, - "step": 1400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0001728450936021395, - "loss": 1.2931, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017243365562641435, - "loss": 1.2695, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017202221765068917, - "loss": 1.2228, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017161077967496401, - "loss": 1.2419, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 0.00017119934169923886, - "loss": 1.2483, - "step": 1500 - }, - { - "epoch": 0.15, - "learning_rate": 0.0001707879037235137, - "loss": 1.2144, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 0.00017037646574778853, - "loss": 1.2148, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016996502777206338, - "loss": 1.2196, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016955358979633823, - "loss": 1.2581, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 0.00016914215182061305, - "loss": 1.2414, - "step": 1600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0001687307138448879, - "loss": 1.2357, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016831927586916274, - "loss": 1.22, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 0.0001679078378934376, - "loss": 1.2234, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016749639991771241, - "loss": 1.2291, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 0.00016708496194198726, - "loss": 1.2344, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 0.0001666735239662621, - "loss": 1.1987, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016626208599053693, - "loss": 1.2232, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016585064801481178, - "loss": 1.2402, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016543921003908663, - "loss": 1.1475, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 0.00016502777206336148, - "loss": 1.272, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0001646163340876363, - "loss": 1.2369, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016420489611191114, - "loss": 1.212, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 0.000163793458136186, - "loss": 1.2191, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016338202016046081, - "loss": 1.1953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 0.00016297058218473566, - "loss": 1.2126, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 0.0001625591442090105, - "loss": 1.2396, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016214770623328536, - "loss": 1.1468, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016173626825756018, - "loss": 1.1847, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016132483028183503, - "loss": 1.2214, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 0.00016091339230610988, - "loss": 1.2138, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001605019543303847, - "loss": 1.2158, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 0.00016009051635465954, - "loss": 1.2065, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 0.0001596790783789344, - "loss": 1.2048, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015926764040320924, - "loss": 1.1904, - "step": 2080 - }, - { - "epoch": 0.21, - "learning_rate": 0.00015885620242748406, - "loss": 1.1872, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 0.0001584447644517589, - "loss": 1.2087, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015803332647603376, - "loss": 1.1909, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015762188850030858, - "loss": 1.2609, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015721045052458343, - "loss": 1.2083, - "step": 2180 - }, - { - "epoch": 0.22, - "learning_rate": 0.00015679901254885827, - "loss": 1.2205, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015638757457313312, - "loss": 1.1966, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015597613659740794, - "loss": 1.235, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 0.0001555646986216828, - "loss": 1.2488, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015515326064595764, - "loss": 1.2229, - "step": 2280 - }, - { - "epoch": 0.23, - "learning_rate": 0.00015474182267023246, - "loss": 1.1874, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 0.0001543303846945073, - "loss": 1.1724, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015391894671878216, - "loss": 1.1786, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 0.000153507508743057, - "loss": 1.1913, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015309607076733183, - "loss": 1.2138, - "step": 2380 - }, - { - "epoch": 0.24, - "learning_rate": 0.00015268463279160667, - "loss": 1.1954, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015227319481588152, - "loss": 1.1926, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015186175684015634, - "loss": 1.1893, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001514503188644312, - "loss": 1.2152, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 0.00015103888088870604, - "loss": 1.199, - "step": 2480 - }, - { - "epoch": 0.25, - "learning_rate": 0.0001506274429129809, - "loss": 1.1966, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001502160049372557, - "loss": 1.2042, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014980456696153056, - "loss": 1.1901, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 0.0001493931289858054, - "loss": 1.2023, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014898169101008023, - "loss": 1.1597, - "step": 2580 - }, - { - "epoch": 0.26, - "learning_rate": 0.00014857025303435507, - "loss": 1.1828, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014815881505862992, - "loss": 1.1907, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014774737708290477, - "loss": 1.1477, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001473359391071796, - "loss": 1.2146, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 0.00014692450113145444, - "loss": 1.2327, - "step": 2680 - }, - { - "epoch": 0.27, - "learning_rate": 0.0001465130631557293, - "loss": 1.1747, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001461016251800041, - "loss": 1.1745, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014569018720427896, - "loss": 1.1914, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 0.0001452787492285538, - "loss": 1.1781, - "step": 2760 - }, - { - "epoch": 0.28, - "learning_rate": 0.00014486731125282865, - "loss": 1.1819, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014445587327710347, - "loss": 1.1894, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014404443530137832, - "loss": 1.2198, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014363299732565317, - "loss": 1.1464, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 0.000143221559349928, - "loss": 1.2039, - "step": 2860 - }, - { - "epoch": 0.29, - "learning_rate": 0.00014281012137420284, - "loss": 1.1758, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001423986833984777, - "loss": 1.1958, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014198724542275254, - "loss": 1.2163, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014157580744702736, - "loss": 1.1724, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 0.0001411643694713022, - "loss": 1.1339, - "step": 2960 - }, - { - "epoch": 0.3, - "learning_rate": 0.00014075293149557705, - "loss": 1.1752, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 0.00014034149351985187, - "loss": 1.2221, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013993005554412672, - "loss": 1.1954, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013951861756840157, - "loss": 1.1904, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013910717959267642, - "loss": 1.1601, - "step": 3060 - }, - { - "epoch": 0.31, - "learning_rate": 0.00013869574161695124, - "loss": 1.148, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001382843036412261, - "loss": 1.2106, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013787286566550094, - "loss": 1.1367, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013746142768977576, - "loss": 1.1951, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 0.0001370499897140506, - "loss": 1.1706, - "step": 3160 - }, - { - "epoch": 0.32, - "learning_rate": 0.00013663855173832545, - "loss": 1.1872, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 0.0001362271137626003, - "loss": 1.1498, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013581567578687512, - "loss": 1.2111, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013540423781114997, - "loss": 1.1774, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013499279983542482, - "loss": 1.1927, - "step": 3260 - }, - { - "epoch": 0.33, - "learning_rate": 0.00013458136185969964, - "loss": 1.1903, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 0.0001341699238839745, - "loss": 1.1976, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013375848590824934, - "loss": 1.1687, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013334704793252418, - "loss": 1.1598, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 0.000132935609956799, - "loss": 1.1414, - "step": 3360 - }, - { - "epoch": 0.34, - "learning_rate": 0.00013252417198107385, - "loss": 1.169, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 0.0001321127340053487, - "loss": 1.1676, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013170129602962352, - "loss": 1.2179, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013128985805389837, - "loss": 1.2435, - "step": 3440 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013087842007817322, - "loss": 1.1457, - "step": 3460 - }, - { - "epoch": 0.35, - "learning_rate": 0.00013046698210244807, - "loss": 1.1818, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001300555441267229, - "loss": 1.1666, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012964410615099774, - "loss": 1.2004, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012923266817527258, - "loss": 1.1809, - "step": 3540 - }, - { - "epoch": 0.36, - "learning_rate": 0.0001288212301995474, - "loss": 1.1805, - "step": 3560 - }, - { - "epoch": 0.36, - "learning_rate": 0.00012840979222382225, - "loss": 1.1617, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001279983542480971, - "loss": 1.1938, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012758691627237195, - "loss": 1.1675, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012717547829664677, - "loss": 1.193, - "step": 3640 - }, - { - "epoch": 0.37, - "learning_rate": 0.00012678461221970788, - "loss": 1.1736, - "step": 3660 - }, - { - "epoch": 0.37, - "learning_rate": 0.0001263731742439827, - "loss": 1.1817, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012596173626825755, - "loss": 1.2049, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 0.0001255502982925324, - "loss": 1.1507, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012513886031680725, - "loss": 1.1715, - "step": 3740 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012472742234108207, - "loss": 1.1357, - "step": 3760 - }, - { - "epoch": 0.38, - "learning_rate": 0.00012431598436535692, - "loss": 1.1494, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012390454638963177, - "loss": 1.1395, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0001234931084139066, - "loss": 1.1631, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012308167043818144, - "loss": 1.1772, - "step": 3840 - }, - { - "epoch": 0.39, - "learning_rate": 0.00012267023246245628, - "loss": 1.1787, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012225879448673113, - "loss": 1.183, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012184735651100597, - "loss": 1.1284, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 0.0001214359185352808, - "loss": 1.1741, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012102448055955565, - "loss": 1.2094, - "step": 3940 - }, - { - "epoch": 0.4, - "learning_rate": 0.00012061304258383048, - "loss": 1.1491, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 0.00012020160460810534, - "loss": 1.1814, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011979016663238018, - "loss": 1.1348, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011937872865665503, - "loss": 1.1686, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011896729068092986, - "loss": 1.1556, - "step": 4040 - }, - { - "epoch": 0.41, - "learning_rate": 0.00011855585270520471, - "loss": 1.1986, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011814441472947954, - "loss": 1.1863, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011773297675375439, - "loss": 1.1557, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011732153877802923, - "loss": 1.182, - "step": 4120 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011691010080230406, - "loss": 1.2037, - "step": 4140 - }, - { - "epoch": 0.42, - "learning_rate": 0.00011649866282657891, - "loss": 1.1697, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011608722485085374, - "loss": 1.1584, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011567578687512859, - "loss": 1.1722, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011526434889940343, - "loss": 1.1419, - "step": 4220 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011485291092367828, - "loss": 1.1501, - "step": 4240 - }, - { - "epoch": 0.43, - "learning_rate": 0.00011444147294795311, - "loss": 1.1392, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011403003497222794, - "loss": 1.1603, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011361859699650279, - "loss": 1.1347, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011320715902077763, - "loss": 1.1792, - "step": 4320 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011279572104505248, - "loss": 1.2084, - "step": 4340 - }, - { - "epoch": 0.44, - "learning_rate": 0.00011238428306932731, - "loss": 1.1591, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011197284509360216, - "loss": 1.1681, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011156140711787699, - "loss": 1.1726, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011114996914215183, - "loss": 1.1315, - "step": 4420 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011073853116642668, - "loss": 1.1795, - "step": 4440 - }, - { - "epoch": 0.45, - "learning_rate": 0.00011032709319070151, - "loss": 1.1803, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010991565521497636, - "loss": 1.1413, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010950421723925119, - "loss": 1.1431, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010909277926352604, - "loss": 1.1048, - "step": 4520 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010868134128780088, - "loss": 1.1417, - "step": 4540 - }, - { - "epoch": 0.46, - "learning_rate": 0.00010826990331207571, - "loss": 1.1298, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010785846533635056, - "loss": 1.1241, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010744702736062539, - "loss": 1.1266, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010703558938490024, - "loss": 1.1663, - "step": 4620 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010662415140917507, - "loss": 1.1134, - "step": 4640 - }, - { - "epoch": 0.47, - "learning_rate": 0.00010621271343344992, - "loss": 1.1725, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010580127545772476, - "loss": 1.1919, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010538983748199959, - "loss": 1.1685, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010497839950627444, - "loss": 1.1561, - "step": 4720 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010456696153054927, - "loss": 1.0836, - "step": 4740 - }, - { - "epoch": 0.48, - "learning_rate": 0.00010415552355482412, - "loss": 1.1477, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010374408557909896, - "loss": 1.1177, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 0.0001033326476033738, - "loss": 1.1836, - "step": 4800 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010292120962764864, - "loss": 1.1369, - "step": 4820 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010250977165192347, - "loss": 1.1964, - "step": 4840 - }, - { - "epoch": 0.49, - "learning_rate": 0.00010209833367619832, - "loss": 1.1515, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010168689570047316, - "loss": 1.1509, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 0.000101275457724748, - "loss": 1.1646, - "step": 4900 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010086401974902284, - "loss": 1.1593, - "step": 4920 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010045258177329769, - "loss": 1.1583, - "step": 4940 - }, - { - "epoch": 0.5, - "learning_rate": 0.00010004114379757252, - "loss": 1.1392, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 9.962970582184736e-05, - "loss": 1.1157, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 9.92182678461222e-05, - "loss": 1.1485, - "step": 5000 - }, - { - "epoch": 0.51, - "learning_rate": 9.880682987039704e-05, - "loss": 1.1907, - "step": 5020 - }, - { - "epoch": 0.51, - "learning_rate": 9.839539189467189e-05, - "loss": 1.1597, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 9.798395391894672e-05, - "loss": 1.1622, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 9.757251594322157e-05, - "loss": 1.1307, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 9.71610779674964e-05, - "loss": 1.1226, - "step": 5100 - }, - { - "epoch": 0.52, - "learning_rate": 9.674963999177124e-05, - "loss": 1.1567, - "step": 5120 - }, - { - "epoch": 0.52, - "learning_rate": 9.633820201604609e-05, - "loss": 1.1353, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 9.592676404032092e-05, - "loss": 1.1414, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 9.551532606459577e-05, - "loss": 1.1383, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 9.51038880888706e-05, - "loss": 1.1997, - "step": 5200 - }, - { - "epoch": 0.53, - "learning_rate": 9.469245011314545e-05, - "loss": 1.187, - "step": 5220 - }, - { - "epoch": 0.53, - "learning_rate": 9.428101213742029e-05, - "loss": 1.1302, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 9.386957416169512e-05, - "loss": 1.1718, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 9.345813618596997e-05, - "loss": 1.1594, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 9.30466982102448e-05, - "loss": 1.163, - "step": 5300 - }, - { - "epoch": 0.54, - "learning_rate": 9.263526023451965e-05, - "loss": 1.1381, - "step": 5320 - }, - { - "epoch": 0.54, - "learning_rate": 9.222382225879449e-05, - "loss": 1.1134, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 9.181238428306934e-05, - "loss": 1.1548, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 9.140094630734417e-05, - "loss": 1.1535, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 9.0989508331619e-05, - "loss": 1.164, - "step": 5400 - }, - { - "epoch": 0.55, - "learning_rate": 9.057807035589385e-05, - "loss": 1.1448, - "step": 5420 - }, - { - "epoch": 0.55, - "learning_rate": 9.016663238016869e-05, - "loss": 1.1273, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 8.975519440444354e-05, - "loss": 1.1406, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 8.934375642871837e-05, - "loss": 1.0838, - "step": 5480 - }, - { - "epoch": 0.56, - "learning_rate": 8.893231845299322e-05, - "loss": 1.1039, - "step": 5500 - }, - { - "epoch": 0.56, - "learning_rate": 8.852088047726805e-05, - "loss": 1.0586, - "step": 5520 - }, - { - "epoch": 0.56, - "learning_rate": 8.810944250154289e-05, - "loss": 1.1216, - "step": 5540 - }, - { - "epoch": 0.57, - "learning_rate": 8.769800452581774e-05, - "loss": 1.1089, - "step": 5560 - }, - { - "epoch": 0.57, - "learning_rate": 8.728656655009257e-05, - "loss": 1.1512, - "step": 5580 - }, - { - "epoch": 0.57, - "learning_rate": 8.687512857436742e-05, - "loss": 1.1494, - "step": 5600 - }, - { - "epoch": 0.57, - "learning_rate": 8.646369059864225e-05, - "loss": 1.1401, - "step": 5620 - }, - { - "epoch": 0.57, - "learning_rate": 8.60522526229171e-05, - "loss": 1.1531, - "step": 5640 - }, - { - "epoch": 0.58, - "learning_rate": 8.564081464719194e-05, - "loss": 1.1129, - "step": 5660 - }, - { - "epoch": 0.58, - "learning_rate": 8.522937667146677e-05, - "loss": 1.1592, - "step": 5680 - }, - { - "epoch": 0.58, - "learning_rate": 8.481793869574162e-05, - "loss": 1.1192, - "step": 5700 - }, - { - "epoch": 0.58, - "learning_rate": 8.440650072001645e-05, - "loss": 1.1473, - "step": 5720 - }, - { - "epoch": 0.58, - "learning_rate": 8.39950627442913e-05, - "loss": 1.1218, - "step": 5740 - }, - { - "epoch": 0.59, - "learning_rate": 8.358362476856614e-05, - "loss": 1.1342, - "step": 5760 - }, - { - "epoch": 0.59, - "learning_rate": 8.317218679284098e-05, - "loss": 1.1129, - "step": 5780 - }, - { - "epoch": 0.59, - "learning_rate": 8.276074881711582e-05, - "loss": 1.1054, - "step": 5800 - }, - { - "epoch": 0.59, - "learning_rate": 8.234931084139065e-05, - "loss": 1.1019, - "step": 5820 - }, - { - "epoch": 0.59, - "learning_rate": 8.193787286566551e-05, - "loss": 1.1857, - "step": 5840 - }, - { - "epoch": 0.6, - "learning_rate": 8.152643488994035e-05, - "loss": 1.165, - "step": 5860 - }, - { - "epoch": 0.6, - "learning_rate": 8.11149969142152e-05, - "loss": 1.1514, - "step": 5880 - }, - { - "epoch": 0.6, - "learning_rate": 8.070355893849003e-05, - "loss": 1.1565, - "step": 5900 - }, - { - "epoch": 0.6, - "learning_rate": 8.029212096276487e-05, - "loss": 1.1252, - "step": 5920 - }, - { - "epoch": 0.6, - "learning_rate": 7.988068298703971e-05, - "loss": 1.129, - "step": 5940 - }, - { - "epoch": 0.61, - "learning_rate": 7.946924501131455e-05, - "loss": 1.1471, - "step": 5960 - }, - { - "epoch": 0.61, - "learning_rate": 7.90578070355894e-05, - "loss": 1.1697, - "step": 5980 - }, - { - "epoch": 0.61, - "learning_rate": 7.864636905986423e-05, - "loss": 1.0938, - "step": 6000 - }, - { - "epoch": 0.61, - "learning_rate": 7.823493108413908e-05, - "loss": 1.1911, - "step": 6020 - }, - { - "epoch": 0.61, - "learning_rate": 7.782349310841391e-05, - "loss": 1.0867, - "step": 6040 - }, - { - "epoch": 0.62, - "learning_rate": 7.741205513268875e-05, - "loss": 1.1008, - "step": 6060 - }, - { - "epoch": 0.62, - "learning_rate": 7.70006171569636e-05, - "loss": 1.1364, - "step": 6080 - }, - { - "epoch": 0.62, - "learning_rate": 7.658917918123843e-05, - "loss": 1.1343, - "step": 6100 - }, - { - "epoch": 0.62, - "learning_rate": 7.617774120551328e-05, - "loss": 1.1324, - "step": 6120 - }, - { - "epoch": 0.63, - "learning_rate": 7.576630322978811e-05, - "loss": 1.1251, - "step": 6140 - }, - { - "epoch": 0.63, - "learning_rate": 7.535486525406296e-05, - "loss": 1.1371, - "step": 6160 - }, - { - "epoch": 0.63, - "learning_rate": 7.49434272783378e-05, - "loss": 1.0772, - "step": 6180 - }, - { - "epoch": 0.63, - "learning_rate": 7.453198930261263e-05, - "loss": 1.147, - "step": 6200 - }, - { - "epoch": 0.63, - "learning_rate": 7.412055132688748e-05, - "loss": 1.1028, - "step": 6220 - }, - { - "epoch": 0.64, - "learning_rate": 7.370911335116231e-05, - "loss": 1.1747, - "step": 6240 - }, - { - "epoch": 0.64, - "learning_rate": 7.329767537543716e-05, - "loss": 1.1088, - "step": 6260 - }, - { - "epoch": 0.64, - "learning_rate": 7.2886237399712e-05, - "loss": 1.1395, - "step": 6280 - }, - { - "epoch": 0.64, - "learning_rate": 7.247479942398684e-05, - "loss": 1.1626, - "step": 6300 - }, - { - "epoch": 0.64, - "learning_rate": 7.206336144826168e-05, - "loss": 1.1437, - "step": 6320 - }, - { - "epoch": 0.65, - "learning_rate": 7.165192347253651e-05, - "loss": 1.1134, - "step": 6340 - }, - { - "epoch": 0.65, - "learning_rate": 7.124048549681136e-05, - "loss": 1.2004, - "step": 6360 - }, - { - "epoch": 0.65, - "learning_rate": 7.08290475210862e-05, - "loss": 1.0999, - "step": 6380 - }, - { - "epoch": 0.65, - "learning_rate": 7.041760954536104e-05, - "loss": 1.0905, - "step": 6400 - }, - { - "epoch": 0.65, - "learning_rate": 7.000617156963588e-05, - "loss": 1.0895, - "step": 6420 - }, - { - "epoch": 0.66, - "learning_rate": 6.959473359391073e-05, - "loss": 1.1287, - "step": 6440 - }, - { - "epoch": 0.66, - "learning_rate": 6.918329561818556e-05, - "loss": 1.1531, - "step": 6460 - }, - { - "epoch": 0.66, - "learning_rate": 6.87718576424604e-05, - "loss": 1.1333, - "step": 6480 - }, - { - "epoch": 0.66, - "learning_rate": 6.836041966673524e-05, - "loss": 1.1603, - "step": 6500 - }, - { - "epoch": 0.66, - "learning_rate": 6.794898169101008e-05, - "loss": 1.1131, - "step": 6520 - }, - { - "epoch": 0.67, - "learning_rate": 6.753754371528493e-05, - "loss": 1.1459, - "step": 6540 - }, - { - "epoch": 0.67, - "learning_rate": 6.712610573955976e-05, - "loss": 1.1554, - "step": 6560 - }, - { - "epoch": 0.67, - "learning_rate": 6.671466776383461e-05, - "loss": 1.0908, - "step": 6580 - }, - { - "epoch": 0.67, - "learning_rate": 6.630322978810944e-05, - "loss": 1.1529, - "step": 6600 - }, - { - "epoch": 0.67, - "learning_rate": 6.589179181238428e-05, - "loss": 1.1465, - "step": 6620 - }, - { - "epoch": 0.68, - "learning_rate": 6.548035383665913e-05, - "loss": 1.1097, - "step": 6640 - }, - { - "epoch": 0.68, - "learning_rate": 6.506891586093396e-05, - "loss": 1.1133, - "step": 6660 - }, - { - "epoch": 0.68, - "learning_rate": 6.465747788520881e-05, - "loss": 1.1193, - "step": 6680 - }, - { - "epoch": 0.68, - "learning_rate": 6.424603990948364e-05, - "loss": 1.0739, - "step": 6700 - }, - { - "epoch": 0.68, - "learning_rate": 6.383460193375849e-05, - "loss": 1.149, - "step": 6720 - }, - { - "epoch": 0.69, - "learning_rate": 6.342316395803333e-05, - "loss": 1.1588, - "step": 6740 - }, - { - "epoch": 0.69, - "learning_rate": 6.301172598230816e-05, - "loss": 1.1533, - "step": 6760 - }, - { - "epoch": 0.69, - "learning_rate": 6.260028800658301e-05, - "loss": 1.1436, - "step": 6780 - }, - { - "epoch": 0.69, - "learning_rate": 6.218885003085784e-05, - "loss": 1.162, - "step": 6800 - }, - { - "epoch": 0.69, - "learning_rate": 6.177741205513269e-05, - "loss": 1.1584, - "step": 6820 - }, - { - "epoch": 0.7, - "learning_rate": 6.136597407940753e-05, - "loss": 1.1081, - "step": 6840 - }, - { - "epoch": 0.7, - "learning_rate": 6.095453610368237e-05, - "loss": 1.1222, - "step": 6860 - }, - { - "epoch": 0.7, - "learning_rate": 6.054309812795721e-05, - "loss": 1.1222, - "step": 6880 - }, - { - "epoch": 0.7, - "learning_rate": 6.013166015223205e-05, - "loss": 1.0933, - "step": 6900 - }, - { - "epoch": 0.7, - "learning_rate": 5.972022217650689e-05, - "loss": 1.1053, - "step": 6920 - }, - { - "epoch": 0.71, - "learning_rate": 5.930878420078173e-05, - "loss": 1.0899, - "step": 6940 - }, - { - "epoch": 0.71, - "learning_rate": 5.889734622505657e-05, - "loss": 1.1284, - "step": 6960 - }, - { - "epoch": 0.71, - "learning_rate": 5.848590824933141e-05, - "loss": 1.1263, - "step": 6980 - }, - { - "epoch": 0.71, - "learning_rate": 5.807447027360625e-05, - "loss": 1.089, - "step": 7000 - }, - { - "epoch": 0.71, - "learning_rate": 5.766303229788109e-05, - "loss": 1.0758, - "step": 7020 - }, - { - "epoch": 0.72, - "learning_rate": 5.725159432215593e-05, - "loss": 1.1267, - "step": 7040 - }, - { - "epoch": 0.72, - "learning_rate": 5.6840156346430775e-05, - "loss": 1.1073, - "step": 7060 - }, - { - "epoch": 0.72, - "learning_rate": 5.6428718370705616e-05, - "loss": 1.1094, - "step": 7080 - }, - { - "epoch": 0.72, - "learning_rate": 5.6017280394980464e-05, - "loss": 1.1017, - "step": 7100 - }, - { - "epoch": 0.72, - "learning_rate": 5.5605842419255305e-05, - "loss": 1.1348, - "step": 7120 - }, - { - "epoch": 0.73, - "learning_rate": 5.519440444353015e-05, - "loss": 1.0678, - "step": 7140 - }, - { - "epoch": 0.73, - "learning_rate": 5.478296646780499e-05, - "loss": 1.1314, - "step": 7160 - }, - { - "epoch": 0.73, - "learning_rate": 5.437152849207983e-05, - "loss": 1.1256, - "step": 7180 - }, - { - "epoch": 0.73, - "learning_rate": 5.3960090516354664e-05, - "loss": 1.1377, - "step": 7200 - }, - { - "epoch": 0.74, - "learning_rate": 5.3548652540629505e-05, - "loss": 1.1295, - "step": 7220 - }, - { - "epoch": 0.74, - "learning_rate": 5.3137214564904347e-05, - "loss": 1.1023, - "step": 7240 - }, - { - "epoch": 0.74, - "learning_rate": 5.272577658917919e-05, - "loss": 1.1027, - "step": 7260 - }, - { - "epoch": 0.74, - "learning_rate": 5.231433861345403e-05, - "loss": 1.1238, - "step": 7280 - }, - { - "epoch": 0.74, - "learning_rate": 5.190290063772887e-05, - "loss": 1.0967, - "step": 7300 - }, - { - "epoch": 0.75, - "learning_rate": 5.149146266200371e-05, - "loss": 1.1286, - "step": 7320 - }, - { - "epoch": 0.75, - "learning_rate": 5.1080024686278546e-05, - "loss": 1.1418, - "step": 7340 - }, - { - "epoch": 0.75, - "learning_rate": 5.066858671055339e-05, - "loss": 1.1022, - "step": 7360 - }, - { - "epoch": 0.75, - "learning_rate": 5.025714873482823e-05, - "loss": 1.1195, - "step": 7380 - }, - { - "epoch": 0.75, - "learning_rate": 4.984571075910307e-05, - "loss": 1.125, - "step": 7400 - }, - { - "epoch": 0.76, - "learning_rate": 4.943427278337791e-05, - "loss": 1.0951, - "step": 7420 - }, - { - "epoch": 0.76, - "learning_rate": 4.902283480765275e-05, - "loss": 1.1049, - "step": 7440 - }, - { - "epoch": 0.76, - "learning_rate": 4.8611396831927594e-05, - "loss": 1.1609, - "step": 7460 - }, - { - "epoch": 0.76, - "learning_rate": 4.819995885620243e-05, - "loss": 1.0884, - "step": 7480 - }, - { - "epoch": 0.76, - "learning_rate": 4.778852088047727e-05, - "loss": 1.138, - "step": 7500 - }, - { - "epoch": 0.77, - "learning_rate": 4.737708290475211e-05, - "loss": 1.1537, - "step": 7520 - }, - { - "epoch": 0.77, - "learning_rate": 4.696564492902695e-05, - "loss": 1.1377, - "step": 7540 - }, - { - "epoch": 0.77, - "learning_rate": 4.6554206953301794e-05, - "loss": 1.0849, - "step": 7560 - }, - { - "epoch": 0.77, - "learning_rate": 4.6142768977576636e-05, - "loss": 1.1445, - "step": 7580 - }, - { - "epoch": 0.77, - "learning_rate": 4.573133100185148e-05, - "loss": 1.1293, - "step": 7600 - }, - { - "epoch": 0.78, - "learning_rate": 4.531989302612631e-05, - "loss": 1.1038, - "step": 7620 - }, - { - "epoch": 0.78, - "learning_rate": 4.490845505040115e-05, - "loss": 1.1, - "step": 7640 - }, - { - "epoch": 0.78, - "learning_rate": 4.4497017074675994e-05, - "loss": 1.1133, - "step": 7660 - }, - { - "epoch": 0.78, - "learning_rate": 4.4085579098950836e-05, - "loss": 1.1537, - "step": 7680 - }, - { - "epoch": 0.78, - "learning_rate": 4.367414112322568e-05, - "loss": 1.118, - "step": 7700 - }, - { - "epoch": 0.79, - "learning_rate": 4.326270314750052e-05, - "loss": 1.1545, - "step": 7720 - }, - { - "epoch": 0.79, - "learning_rate": 4.285126517177536e-05, - "loss": 1.1473, - "step": 7740 - }, - { - "epoch": 0.79, - "learning_rate": 4.2439827196050194e-05, - "loss": 1.1389, - "step": 7760 - }, - { - "epoch": 0.79, - "learning_rate": 4.2028389220325035e-05, - "loss": 1.1767, - "step": 7780 - }, - { - "epoch": 0.79, - "learning_rate": 4.161695124459988e-05, - "loss": 1.0442, - "step": 7800 - }, - { - "epoch": 0.8, - "learning_rate": 4.120551326887472e-05, - "loss": 1.1195, - "step": 7820 - }, - { - "epoch": 0.8, - "learning_rate": 4.079407529314956e-05, - "loss": 1.1136, - "step": 7840 - }, - { - "epoch": 0.8, - "learning_rate": 4.03826373174244e-05, - "loss": 1.0853, - "step": 7860 - }, - { - "epoch": 0.8, - "learning_rate": 3.997119934169924e-05, - "loss": 1.1172, - "step": 7880 - }, - { - "epoch": 0.8, - "learning_rate": 3.955976136597408e-05, - "loss": 1.1025, - "step": 7900 - }, - { - "epoch": 0.81, - "learning_rate": 3.914832339024892e-05, - "loss": 1.1266, - "step": 7920 - }, - { - "epoch": 0.81, - "learning_rate": 3.873688541452376e-05, - "loss": 1.0913, - "step": 7940 - }, - { - "epoch": 0.81, - "learning_rate": 3.83254474387986e-05, - "loss": 1.1448, - "step": 7960 - }, - { - "epoch": 0.81, - "learning_rate": 3.791400946307344e-05, - "loss": 1.1111, - "step": 7980 - }, - { - "epoch": 0.81, - "learning_rate": 3.750257148734828e-05, - "loss": 1.1456, - "step": 8000 - }, - { - "epoch": 0.82, - "learning_rate": 3.7091133511623125e-05, - "loss": 1.1447, - "step": 8020 - }, - { - "epoch": 0.82, - "learning_rate": 3.6679695535897966e-05, - "loss": 1.0768, - "step": 8040 - }, - { - "epoch": 0.82, - "learning_rate": 3.626825756017281e-05, - "loss": 1.1319, - "step": 8060 - }, - { - "epoch": 0.82, - "learning_rate": 3.585681958444765e-05, - "loss": 1.1178, - "step": 8080 - }, - { - "epoch": 0.82, - "learning_rate": 3.544538160872249e-05, - "loss": 1.1002, - "step": 8100 - }, - { - "epoch": 0.83, - "learning_rate": 3.503394363299733e-05, - "loss": 1.1129, - "step": 8120 - }, - { - "epoch": 0.83, - "learning_rate": 3.462250565727217e-05, - "loss": 1.1278, - "step": 8140 - }, - { - "epoch": 0.83, - "learning_rate": 3.421106768154701e-05, - "loss": 1.13, - "step": 8160 - }, - { - "epoch": 0.83, - "learning_rate": 3.379962970582185e-05, - "loss": 1.1349, - "step": 8180 - }, - { - "epoch": 0.83, - "learning_rate": 3.338819173009669e-05, - "loss": 1.0957, - "step": 8200 - }, - { - "epoch": 0.84, - "learning_rate": 3.297675375437153e-05, - "loss": 1.1363, - "step": 8220 - }, - { - "epoch": 0.84, - "learning_rate": 3.256531577864637e-05, - "loss": 1.0767, - "step": 8240 - }, - { - "epoch": 0.84, - "learning_rate": 3.2153877802921214e-05, - "loss": 1.1356, - "step": 8260 - }, - { - "epoch": 0.84, - "learning_rate": 3.1742439827196055e-05, - "loss": 1.0676, - "step": 8280 - }, - { - "epoch": 0.85, - "learning_rate": 3.133100185147089e-05, - "loss": 1.1433, - "step": 8300 - }, - { - "epoch": 0.85, - "learning_rate": 3.091956387574573e-05, - "loss": 1.1239, - "step": 8320 - }, - { - "epoch": 0.85, - "learning_rate": 3.0508125900020572e-05, - "loss": 1.104, - "step": 8340 - }, - { - "epoch": 0.85, - "learning_rate": 3.0096687924295414e-05, - "loss": 1.1115, - "step": 8360 - }, - { - "epoch": 0.85, - "learning_rate": 2.9685249948570255e-05, - "loss": 1.0992, - "step": 8380 - }, - { - "epoch": 0.86, - "learning_rate": 2.9273811972845093e-05, - "loss": 1.087, - "step": 8400 - }, - { - "epoch": 0.86, - "learning_rate": 2.8862373997119934e-05, - "loss": 1.0881, - "step": 8420 - }, - { - "epoch": 0.86, - "learning_rate": 2.8450936021394776e-05, - "loss": 1.1266, - "step": 8440 - }, - { - "epoch": 0.86, - "learning_rate": 2.8060069944455876e-05, - "loss": 1.1355, - "step": 8460 - }, - { - "epoch": 0.86, - "learning_rate": 2.7648631968730714e-05, - "loss": 1.1368, - "step": 8480 - }, - { - "epoch": 0.87, - "learning_rate": 2.7237193993005555e-05, - "loss": 1.0835, - "step": 8500 - }, - { - "epoch": 0.87, - "learning_rate": 2.6825756017280397e-05, - "loss": 1.1137, - "step": 8520 - }, - { - "epoch": 0.87, - "learning_rate": 2.6414318041555235e-05, - "loss": 1.0783, - "step": 8540 - }, - { - "epoch": 0.87, - "learning_rate": 2.6002880065830076e-05, - "loss": 1.1114, - "step": 8560 - }, - { - "epoch": 0.87, - "learning_rate": 2.5591442090104917e-05, - "loss": 1.1286, - "step": 8580 - }, - { - "epoch": 0.88, - "learning_rate": 2.518000411437976e-05, - "loss": 1.1531, - "step": 8600 - }, - { - "epoch": 0.88, - "learning_rate": 2.47685661386546e-05, - "loss": 1.0784, - "step": 8620 - }, - { - "epoch": 0.88, - "learning_rate": 2.435712816292944e-05, - "loss": 1.1358, - "step": 8640 - }, - { - "epoch": 0.88, - "learning_rate": 2.3945690187204283e-05, - "loss": 1.1323, - "step": 8660 - }, - { - "epoch": 0.88, - "learning_rate": 2.353425221147912e-05, - "loss": 1.154, - "step": 8680 - }, - { - "epoch": 0.89, - "learning_rate": 2.3122814235753962e-05, - "loss": 1.103, - "step": 8700 - }, - { - "epoch": 0.89, - "learning_rate": 2.2711376260028803e-05, - "loss": 1.1319, - "step": 8720 - }, - { - "epoch": 0.89, - "learning_rate": 2.229993828430364e-05, - "loss": 1.1118, - "step": 8740 - }, - { - "epoch": 0.89, - "learning_rate": 2.1888500308578482e-05, - "loss": 1.0364, - "step": 8760 - }, - { - "epoch": 0.89, - "learning_rate": 2.1477062332853324e-05, - "loss": 1.155, - "step": 8780 - }, - { - "epoch": 0.9, - "learning_rate": 2.1065624357128165e-05, - "loss": 1.1201, - "step": 8800 - }, - { - "epoch": 0.9, - "learning_rate": 2.0654186381403003e-05, - "loss": 1.1045, - "step": 8820 - }, - { - "epoch": 0.9, - "learning_rate": 2.0242748405677844e-05, - "loss": 1.0828, - "step": 8840 - }, - { - "epoch": 0.9, - "learning_rate": 1.9831310429952686e-05, - "loss": 1.0452, - "step": 8860 - }, - { - "epoch": 0.9, - "learning_rate": 1.9419872454227524e-05, - "loss": 1.0463, - "step": 8880 - }, - { - "epoch": 0.91, - "learning_rate": 1.9008434478502365e-05, - "loss": 1.1709, - "step": 8900 - }, - { - "epoch": 0.91, - "learning_rate": 1.8596996502777206e-05, - "loss": 1.0854, - "step": 8920 - }, - { - "epoch": 0.91, - "learning_rate": 1.8185558527052048e-05, - "loss": 1.1362, - "step": 8940 - }, - { - "epoch": 0.91, - "learning_rate": 1.777412055132689e-05, - "loss": 1.1538, - "step": 8960 - }, - { - "epoch": 0.91, - "learning_rate": 1.736268257560173e-05, - "loss": 1.1125, - "step": 8980 - }, - { - "epoch": 0.92, - "learning_rate": 1.695124459987657e-05, - "loss": 1.1158, - "step": 9000 - }, - { - "epoch": 0.92, - "learning_rate": 1.653980662415141e-05, - "loss": 1.0928, - "step": 9020 - }, - { - "epoch": 0.92, - "learning_rate": 1.612836864842625e-05, - "loss": 1.0934, - "step": 9040 - }, - { - "epoch": 0.92, - "learning_rate": 1.5716930672701092e-05, - "loss": 1.1428, - "step": 9060 - }, - { - "epoch": 0.92, - "learning_rate": 1.530549269697593e-05, - "loss": 1.1344, - "step": 9080 - }, - { - "epoch": 0.93, - "learning_rate": 1.4894054721250772e-05, - "loss": 1.1131, - "step": 9100 - }, - { - "epoch": 0.93, - "learning_rate": 1.4482616745525613e-05, - "loss": 1.1259, - "step": 9120 - }, - { - "epoch": 0.93, - "learning_rate": 1.4071178769800453e-05, - "loss": 1.113, - "step": 9140 - }, - { - "epoch": 0.93, - "learning_rate": 1.3659740794075294e-05, - "loss": 1.1277, - "step": 9160 - }, - { - "epoch": 0.93, - "learning_rate": 1.3248302818350133e-05, - "loss": 1.0949, - "step": 9180 - }, - { - "epoch": 0.94, - "learning_rate": 1.2836864842624973e-05, - "loss": 1.093, - "step": 9200 - }, - { - "epoch": 0.94, - "learning_rate": 1.2425426866899816e-05, - "loss": 1.1086, - "step": 9220 - }, - { - "epoch": 0.94, - "learning_rate": 1.2013988891174656e-05, - "loss": 1.0652, - "step": 9240 - }, - { - "epoch": 0.94, - "learning_rate": 1.1602550915449497e-05, - "loss": 1.1047, - "step": 9260 - }, - { - "epoch": 0.94, - "learning_rate": 1.1191112939724337e-05, - "loss": 1.1189, - "step": 9280 - }, - { - "epoch": 0.95, - "learning_rate": 1.0779674963999176e-05, - "loss": 1.1149, - "step": 9300 - }, - { - "epoch": 0.95, - "learning_rate": 1.036823698827402e-05, - "loss": 1.0873, - "step": 9320 - }, - { - "epoch": 0.95, - "learning_rate": 9.956799012548859e-06, - "loss": 1.1108, - "step": 9340 - }, - { - "epoch": 0.95, - "learning_rate": 9.545361036823699e-06, - "loss": 1.1246, - "step": 9360 - }, - { - "epoch": 0.95, - "learning_rate": 9.13392306109854e-06, - "loss": 1.1322, - "step": 9380 - }, - { - "epoch": 0.96, - "learning_rate": 8.72248508537338e-06, - "loss": 1.1567, - "step": 9400 - }, - { - "epoch": 0.96, - "learning_rate": 8.311047109648221e-06, - "loss": 1.1127, - "step": 9420 - }, - { - "epoch": 0.96, - "learning_rate": 7.89960913392306e-06, - "loss": 1.1136, - "step": 9440 - }, - { - "epoch": 0.96, - "learning_rate": 7.488171158197901e-06, - "loss": 1.0707, - "step": 9460 - }, - { - "epoch": 0.97, - "learning_rate": 7.076733182472743e-06, - "loss": 1.1256, - "step": 9480 - }, - { - "epoch": 0.97, - "learning_rate": 6.665295206747584e-06, - "loss": 1.1317, - "step": 9500 - } - ], - "logging_steps": 20, - "max_steps": 9822, - "num_train_epochs": 1, - "save_steps": 500, - "total_flos": 1.37720895035136e+17, - "trial_name": null, - "trial_params": null -} diff --git a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/training_args.bin b/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/training_args.bin deleted file mode 100644 index e1f4530bc1dde4c9a685eadf65f7a3651cf9588b..0000000000000000000000000000000000000000 --- a/https:/huggingface.co/AbineshMoonpai/CodeLlama-SQL-13b/tree/main/trainer_outputs/checkpoint-9500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8819fc087370c0c0dd1869922822cf7a5ebe84fa7a7194c69a0ec917ff22569b -size 4027