|
[WARNING|2025-02-12 13:09:13] logging.py:162 >> `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training. |
|
|
|
[INFO|2025-02-12 13:09:13] parser.py:355 >> Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 |
|
|
|
[INFO|2025-02-12 13:09:13] parser.py:355 >> Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 |
|
|
|
[INFO|2025-02-12 13:09:13] parser.py:355 >> Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: torch.bfloat16 |
|
|
|
[INFO|2025-02-12 13:09:14] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--deepseek-ai--deepseek-coder-6.7b-base/snapshots/ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912/config.json |
|
|
|
[INFO|2025-02-12 13:09:14] configuration_utils.py:746 >> Model config LlamaConfig { |
|
"_name_or_path": "deepseek-ai/deepseek-coder-6.7b-base", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 32013, |
|
"eos_token_id": 32014, |
|
"head_dim": 128, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 11008, |
|
"max_position_embeddings": 16384, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 32, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": { |
|
"factor": 4.0, |
|
"rope_type": "linear", |
|
"type": "linear" |
|
}, |
|
"rope_theta": 100000, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.46.1", |
|
"use_cache": true, |
|
"vocab_size": 32256 |
|
} |
|
|
|
|
|
[INFO|2025-02-12 13:09:14] tokenization_utils_base.py:2211 >> loading file tokenizer.model from cache at None |
|
|
|
[INFO|2025-02-12 13:09:14] tokenization_utils_base.py:2211 >> loading file tokenizer.json from cache at /home/ubuntu/.cache/huggingface/hub/models--deepseek-ai--deepseek-coder-6.7b-base/snapshots/ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912/tokenizer.json |
|
|
|
[INFO|2025-02-12 13:09:14] tokenization_utils_base.py:2211 >> loading file added_tokens.json from cache at None |
|
|
|
[INFO|2025-02-12 13:09:14] tokenization_utils_base.py:2211 >> loading file special_tokens_map.json from cache at None |
|
|
|
[INFO|2025-02-12 13:09:14] tokenization_utils_base.py:2211 >> loading file tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--deepseek-ai--deepseek-coder-6.7b-base/snapshots/ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 13:09:14] tokenization_utils_base.py:2475 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
|
[INFO|2025-02-12 13:09:16] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--deepseek-ai--deepseek-coder-6.7b-base/snapshots/ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912/config.json |
|
|
|
[INFO|2025-02-12 13:09:16] configuration_utils.py:746 >> Model config LlamaConfig { |
|
"_name_or_path": "deepseek-ai/deepseek-coder-6.7b-base", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 32013, |
|
"eos_token_id": 32014, |
|
"head_dim": 128, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 11008, |
|
"max_position_embeddings": 16384, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 32, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": { |
|
"factor": 4.0, |
|
"rope_type": "linear", |
|
"type": "linear" |
|
}, |
|
"rope_theta": 100000, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.46.1", |
|
"use_cache": true, |
|
"vocab_size": 32256 |
|
} |
|
|
|
|
|
[INFO|2025-02-12 13:09:17] tokenization_utils_base.py:2211 >> loading file tokenizer.model from cache at None |
|
|
|
[INFO|2025-02-12 13:09:17] tokenization_utils_base.py:2211 >> loading file tokenizer.json from cache at /home/ubuntu/.cache/huggingface/hub/models--deepseek-ai--deepseek-coder-6.7b-base/snapshots/ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912/tokenizer.json |
|
|
|
[INFO|2025-02-12 13:09:17] tokenization_utils_base.py:2211 >> loading file added_tokens.json from cache at None |
|
|
|
[INFO|2025-02-12 13:09:17] tokenization_utils_base.py:2211 >> loading file special_tokens_map.json from cache at None |
|
|
|
[INFO|2025-02-12 13:09:17] tokenization_utils_base.py:2211 >> loading file tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--deepseek-ai--deepseek-coder-6.7b-base/snapshots/ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 13:09:17] tokenization_utils_base.py:2475 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
|
[INFO|2025-02-12 13:09:17] logging.py:157 >> Loading dataset new-datasets/solidity_v3.json... |
|
|
|
[INFO|2025-02-12 13:09:24] configuration_utils.py:679 >> loading configuration file config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--deepseek-ai--deepseek-coder-6.7b-base/snapshots/ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912/config.json |
|
|
|
[INFO|2025-02-12 13:09:24] configuration_utils.py:746 >> Model config LlamaConfig { |
|
"_name_or_path": "deepseek-ai/deepseek-coder-6.7b-base", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 32013, |
|
"eos_token_id": 32014, |
|
"head_dim": 128, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 11008, |
|
"max_position_embeddings": 16384, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 32, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": { |
|
"factor": 4.0, |
|
"rope_type": "linear", |
|
"type": "linear" |
|
}, |
|
"rope_theta": 100000, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.46.1", |
|
"use_cache": true, |
|
"vocab_size": 32256 |
|
} |
|
|
|
|
|
[INFO|2025-02-12 13:09:24] modeling_utils.py:3937 >> loading weights file model.safetensors from cache at /home/ubuntu/.cache/huggingface/hub/models--deepseek-ai--deepseek-coder-6.7b-base/snapshots/ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912/model.safetensors.index.json |
|
|
|
[INFO|2025-02-12 13:09:24] modeling_utils.py:1670 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. |
|
|
|
[INFO|2025-02-12 13:09:24] configuration_utils.py:1096 >> Generate config GenerationConfig { |
|
"bos_token_id": 32013, |
|
"eos_token_id": 32014 |
|
} |
|
|
|
|
|
[INFO|2025-02-12 13:09:27] modeling_utils.py:4800 >> All model checkpoint weights were used when initializing LlamaForCausalLM. |
|
|
|
|
|
[INFO|2025-02-12 13:09:27] modeling_utils.py:4808 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at deepseek-ai/deepseek-coder-6.7b-base. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. |
|
|
|
[INFO|2025-02-12 13:09:28] configuration_utils.py:1051 >> loading configuration file generation_config.json from cache at /home/ubuntu/.cache/huggingface/hub/models--deepseek-ai--deepseek-coder-6.7b-base/snapshots/ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912/generation_config.json |
|
|
|
[INFO|2025-02-12 13:09:28] configuration_utils.py:1096 >> Generate config GenerationConfig { |
|
"bos_token_id": 32013, |
|
"eos_token_id": 32014 |
|
} |
|
|
|
|
|
[INFO|2025-02-12 13:09:28] logging.py:157 >> Gradient checkpointing enabled. |
|
|
|
[INFO|2025-02-12 13:09:28] logging.py:157 >> Using torch SDPA for faster training and inference. |
|
|
|
[INFO|2025-02-12 13:09:28] logging.py:157 >> Upcasting trainable params to float32. |
|
|
|
[INFO|2025-02-12 13:09:28] logging.py:157 >> Fine-tuning method: LoRA |
|
|
|
[INFO|2025-02-12 13:09:28] logging.py:157 >> Found linear modules: o_proj,down_proj,gate_proj,q_proj,v_proj,up_proj,k_proj |
|
|
|
[INFO|2025-02-12 13:09:28] logging.py:157 >> trainable params: 39,976,960 || all params: 6,780,489,728 || trainable%: 0.5896 |
|
|
|
[INFO|2025-02-12 13:09:28] trainer.py:698 >> Using auto half precision backend |
|
|
|
[INFO|2025-02-12 13:09:29] trainer.py:2313 >> ***** Running training ***** |
|
|
|
[INFO|2025-02-12 13:09:29] trainer.py:2314 >> Num examples = 38,495 |
|
|
|
[INFO|2025-02-12 13:09:29] trainer.py:2315 >> Num Epochs = 2 |
|
|
|
[INFO|2025-02-12 13:09:29] trainer.py:2316 >> Instantaneous batch size per device = 16 |
|
|
|
[INFO|2025-02-12 13:09:29] trainer.py:2319 >> Total train batch size (w. parallel, distributed & accumulation) = 96 |
|
|
|
[INFO|2025-02-12 13:09:29] trainer.py:2320 >> Gradient Accumulation steps = 2 |
|
|
|
[INFO|2025-02-12 13:09:29] trainer.py:2321 >> Total optimization steps = 802 |
|
|
|
[INFO|2025-02-12 13:09:29] trainer.py:2322 >> Number of trainable parameters = 39,976,960 |
|
|
|
[INFO|2025-02-12 13:10:29] logging.py:157 >> {'loss': 0.4047, 'learning_rate': 4.9995e-05, 'epoch': 0.01} |
|
|
|
[INFO|2025-02-12 13:11:29] logging.py:157 >> {'loss': 0.3855, 'learning_rate': 4.9981e-05, 'epoch': 0.02} |
|
|
|
[INFO|2025-02-12 13:12:29] logging.py:157 >> {'loss': 0.3818, 'learning_rate': 4.9957e-05, 'epoch': 0.04} |
|
|
|
[INFO|2025-02-12 13:13:29] logging.py:157 >> {'loss': 0.3884, 'learning_rate': 4.9923e-05, 'epoch': 0.05} |
|
|
|
[INFO|2025-02-12 13:14:28] logging.py:157 >> {'loss': 0.3713, 'learning_rate': 4.9880e-05, 'epoch': 0.06} |
|
|
|
[INFO|2025-02-12 13:15:28] logging.py:157 >> {'loss': 0.3492, 'learning_rate': 4.9828e-05, 'epoch': 0.07} |
|
|
|
[INFO|2025-02-12 13:16:28] logging.py:157 >> {'loss': 0.3596, 'learning_rate': 4.9765e-05, 'epoch': 0.09} |
|
|
|
[INFO|2025-02-12 13:17:28] logging.py:157 >> {'loss': 0.3792, 'learning_rate': 4.9694e-05, 'epoch': 0.10} |
|
|
|
[INFO|2025-02-12 13:18:28] logging.py:157 >> {'loss': 0.3393, 'learning_rate': 4.9613e-05, 'epoch': 0.11} |
|
|
|
[INFO|2025-02-12 13:19:27] logging.py:157 >> {'loss': 0.3433, 'learning_rate': 4.9522e-05, 'epoch': 0.12} |
|
|
|
[INFO|2025-02-12 13:20:27] logging.py:157 >> {'loss': 0.3563, 'learning_rate': 4.9422e-05, 'epoch': 0.14} |
|
|
|
[INFO|2025-02-12 13:21:27] logging.py:157 >> {'loss': 0.3543, 'learning_rate': 4.9313e-05, 'epoch': 0.15} |
|
|
|
[INFO|2025-02-12 13:22:27] logging.py:157 >> {'loss': 0.3442, 'learning_rate': 4.9194e-05, 'epoch': 0.16} |
|
|
|
[INFO|2025-02-12 13:23:26] logging.py:157 >> {'loss': 0.3440, 'learning_rate': 4.9066e-05, 'epoch': 0.17} |
|
|
|
[INFO|2025-02-12 13:24:26] logging.py:157 >> {'loss': 0.3426, 'learning_rate': 4.8929e-05, 'epoch': 0.19} |
|
|
|
[INFO|2025-02-12 13:25:26] logging.py:157 >> {'loss': 0.3677, 'learning_rate': 4.8782e-05, 'epoch': 0.20} |
|
|
|
[INFO|2025-02-12 13:26:26] logging.py:157 >> {'loss': 0.3330, 'learning_rate': 4.8627e-05, 'epoch': 0.21} |
|
|
|
[INFO|2025-02-12 13:27:26] logging.py:157 >> {'loss': 0.3596, 'learning_rate': 4.8462e-05, 'epoch': 0.22} |
|
|
|
[INFO|2025-02-12 13:28:25] logging.py:157 >> {'loss': 0.3156, 'learning_rate': 4.8289e-05, 'epoch': 0.24} |
|
|
|
[INFO|2025-02-12 13:29:25] logging.py:157 >> {'loss': 0.3355, 'learning_rate': 4.8106e-05, 'epoch': 0.25} |
|
|
|
[INFO|2025-02-12 13:29:25] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-100 |
|
|
|
[INFO|2025-02-12 13:29:26] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-100/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 13:29:26] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-100/special_tokens_map.json |
|
|
|
[INFO|2025-02-12 13:30:26] logging.py:157 >> {'loss': 0.3518, 'learning_rate': 4.7915e-05, 'epoch': 0.26} |
|
|
|
[INFO|2025-02-12 13:31:26] logging.py:157 >> {'loss': 0.3031, 'learning_rate': 4.7715e-05, 'epoch': 0.27} |
|
|
|
[INFO|2025-02-12 13:32:26] logging.py:157 >> {'loss': 0.3452, 'learning_rate': 4.7506e-05, 'epoch': 0.29} |
|
|
|
[INFO|2025-02-12 13:33:26] logging.py:157 >> {'loss': 0.3430, 'learning_rate': 4.7288e-05, 'epoch': 0.30} |
|
|
|
[INFO|2025-02-12 13:34:26] logging.py:157 >> {'loss': 0.3234, 'learning_rate': 4.7062e-05, 'epoch': 0.31} |
|
|
|
[INFO|2025-02-12 13:35:26] logging.py:157 >> {'loss': 0.3238, 'learning_rate': 4.6828e-05, 'epoch': 0.32} |
|
|
|
[INFO|2025-02-12 13:36:25] logging.py:157 >> {'loss': 0.3337, 'learning_rate': 4.6585e-05, 'epoch': 0.34} |
|
|
|
[INFO|2025-02-12 13:37:25] logging.py:157 >> {'loss': 0.3422, 'learning_rate': 4.6334e-05, 'epoch': 0.35} |
|
|
|
[INFO|2025-02-12 13:38:25] logging.py:157 >> {'loss': 0.3150, 'learning_rate': 4.6075e-05, 'epoch': 0.36} |
|
|
|
[INFO|2025-02-12 13:39:25] logging.py:157 >> {'loss': 0.3213, 'learning_rate': 4.5807e-05, 'epoch': 0.37} |
|
|
|
[INFO|2025-02-12 13:40:24] logging.py:157 >> {'loss': 0.3326, 'learning_rate': 4.5532e-05, 'epoch': 0.39} |
|
|
|
[INFO|2025-02-12 13:41:24] logging.py:157 >> {'loss': 0.3264, 'learning_rate': 4.5248e-05, 'epoch': 0.40} |
|
|
|
[INFO|2025-02-12 13:42:24] logging.py:157 >> {'loss': 0.3422, 'learning_rate': 4.4957e-05, 'epoch': 0.41} |
|
|
|
[INFO|2025-02-12 13:43:24] logging.py:157 >> {'loss': 0.3136, 'learning_rate': 4.4659e-05, 'epoch': 0.42} |
|
|
|
[INFO|2025-02-12 13:44:24] logging.py:157 >> {'loss': 0.3350, 'learning_rate': 4.4352e-05, 'epoch': 0.44} |
|
|
|
[INFO|2025-02-12 13:45:23] logging.py:157 >> {'loss': 0.3188, 'learning_rate': 4.4039e-05, 'epoch': 0.45} |
|
|
|
[INFO|2025-02-12 13:46:23] logging.py:157 >> {'loss': 0.3355, 'learning_rate': 4.3718e-05, 'epoch': 0.46} |
|
|
|
[INFO|2025-02-12 13:47:23] logging.py:157 >> {'loss': 0.3358, 'learning_rate': 4.3390e-05, 'epoch': 0.47} |
|
|
|
[INFO|2025-02-12 13:48:23] logging.py:157 >> {'loss': 0.2961, 'learning_rate': 4.3054e-05, 'epoch': 0.49} |
|
|
|
[INFO|2025-02-12 13:49:23] logging.py:157 >> {'loss': 0.3014, 'learning_rate': 4.2712e-05, 'epoch': 0.50} |
|
|
|
[INFO|2025-02-12 13:49:23] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-200 |
|
|
|
[INFO|2025-02-12 13:49:23] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-200/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 13:49:23] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-200/special_tokens_map.json |
|
|
|
[INFO|2025-02-12 13:50:24] logging.py:157 >> {'loss': 0.3182, 'learning_rate': 4.2363e-05, 'epoch': 0.51} |
|
|
|
[INFO|2025-02-12 13:51:24] logging.py:157 >> {'loss': 0.3117, 'learning_rate': 4.2008e-05, 'epoch': 0.52} |
|
|
|
[INFO|2025-02-12 13:52:23] logging.py:157 >> {'loss': 0.2916, 'learning_rate': 4.1646e-05, 'epoch': 0.54} |
|
|
|
[INFO|2025-02-12 13:53:23] logging.py:157 >> {'loss': 0.3342, 'learning_rate': 4.1277e-05, 'epoch': 0.55} |
|
|
|
[INFO|2025-02-12 13:54:23] logging.py:157 >> {'loss': 0.3049, 'learning_rate': 4.0902e-05, 'epoch': 0.56} |
|
|
|
[INFO|2025-02-12 13:55:23] logging.py:157 >> {'loss': 0.3349, 'learning_rate': 4.0522e-05, 'epoch': 0.57} |
|
|
|
[INFO|2025-02-12 13:56:23] logging.py:157 >> {'loss': 0.3266, 'learning_rate': 4.0135e-05, 'epoch': 0.59} |
|
|
|
[INFO|2025-02-12 13:57:22] logging.py:157 >> {'loss': 0.3163, 'learning_rate': 3.9742e-05, 'epoch': 0.60} |
|
|
|
[INFO|2025-02-12 13:58:22] logging.py:157 >> {'loss': 0.2934, 'learning_rate': 3.9344e-05, 'epoch': 0.61} |
|
|
|
[INFO|2025-02-12 13:59:22] logging.py:157 >> {'loss': 0.2749, 'learning_rate': 3.8940e-05, 'epoch': 0.62} |
|
|
|
[INFO|2025-02-12 14:00:22] logging.py:157 >> {'loss': 0.3367, 'learning_rate': 3.8531e-05, 'epoch': 0.64} |
|
|
|
[INFO|2025-02-12 14:01:22] logging.py:157 >> {'loss': 0.3205, 'learning_rate': 3.8117e-05, 'epoch': 0.65} |
|
|
|
[INFO|2025-02-12 14:02:21] logging.py:157 >> {'loss': 0.3159, 'learning_rate': 3.7697e-05, 'epoch': 0.66} |
|
|
|
[INFO|2025-02-12 14:03:21] logging.py:157 >> {'loss': 0.2995, 'learning_rate': 3.7273e-05, 'epoch': 0.67} |
|
|
|
[INFO|2025-02-12 14:04:21] logging.py:157 >> {'loss': 0.3094, 'learning_rate': 3.6844e-05, 'epoch': 0.69} |
|
|
|
[INFO|2025-02-12 14:05:21] logging.py:157 >> {'loss': 0.3321, 'learning_rate': 3.6411e-05, 'epoch': 0.70} |
|
|
|
[INFO|2025-02-12 14:06:20] logging.py:157 >> {'loss': 0.3310, 'learning_rate': 3.5973e-05, 'epoch': 0.71} |
|
|
|
[INFO|2025-02-12 14:07:20] logging.py:157 >> {'loss': 0.3199, 'learning_rate': 3.5531e-05, 'epoch': 0.72} |
|
|
|
[INFO|2025-02-12 14:08:20] logging.py:157 >> {'loss': 0.3087, 'learning_rate': 3.5085e-05, 'epoch': 0.74} |
|
|
|
[INFO|2025-02-12 14:09:20] logging.py:157 >> {'loss': 0.2914, 'learning_rate': 3.4635e-05, 'epoch': 0.75} |
|
|
|
[INFO|2025-02-12 14:09:20] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-300 |
|
|
|
[INFO|2025-02-12 14:09:21] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-300/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 14:09:21] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-300/special_tokens_map.json |
|
|
|
[INFO|2025-02-12 14:10:21] logging.py:157 >> {'loss': 0.3059, 'learning_rate': 3.4181e-05, 'epoch': 0.76} |
|
|
|
[INFO|2025-02-12 14:11:21] logging.py:157 >> {'loss': 0.3110, 'learning_rate': 3.3724e-05, 'epoch': 0.77} |
|
|
|
[INFO|2025-02-12 14:12:21] logging.py:157 >> {'loss': 0.3262, 'learning_rate': 3.3264e-05, 'epoch': 0.79} |
|
|
|
[INFO|2025-02-12 14:13:20] logging.py:157 >> {'loss': 0.3070, 'learning_rate': 3.2800e-05, 'epoch': 0.80} |
|
|
|
[INFO|2025-02-12 14:14:20] logging.py:157 >> {'loss': 0.3225, 'learning_rate': 3.2333e-05, 'epoch': 0.81} |
|
|
|
[INFO|2025-02-12 14:15:20] logging.py:157 >> {'loss': 0.3427, 'learning_rate': 3.1864e-05, 'epoch': 0.82} |
|
|
|
[INFO|2025-02-12 14:16:20] logging.py:157 >> {'loss': 0.3320, 'learning_rate': 3.1392e-05, 'epoch': 0.84} |
|
|
|
[INFO|2025-02-12 14:17:20] logging.py:157 >> {'loss': 0.3031, 'learning_rate': 3.0917e-05, 'epoch': 0.85} |
|
|
|
[INFO|2025-02-12 14:18:19] logging.py:157 >> {'loss': 0.3140, 'learning_rate': 3.0440e-05, 'epoch': 0.86} |
|
|
|
[INFO|2025-02-12 14:19:19] logging.py:157 >> {'loss': 0.2994, 'learning_rate': 2.9961e-05, 'epoch': 0.87} |
|
|
|
[INFO|2025-02-12 14:20:19] logging.py:157 >> {'loss': 0.2904, 'learning_rate': 2.9480e-05, 'epoch': 0.89} |
|
|
|
[INFO|2025-02-12 14:21:19] logging.py:157 >> {'loss': 0.3259, 'learning_rate': 2.8998e-05, 'epoch': 0.90} |
|
|
|
[INFO|2025-02-12 14:22:18] logging.py:157 >> {'loss': 0.2975, 'learning_rate': 2.8514e-05, 'epoch': 0.91} |
|
|
|
[INFO|2025-02-12 14:23:18] logging.py:157 >> {'loss': 0.3248, 'learning_rate': 2.8028e-05, 'epoch': 0.92} |
|
|
|
[INFO|2025-02-12 14:24:18] logging.py:157 >> {'loss': 0.3043, 'learning_rate': 2.7542e-05, 'epoch': 0.94} |
|
|
|
[INFO|2025-02-12 14:25:18] logging.py:157 >> {'loss': 0.3101, 'learning_rate': 2.7054e-05, 'epoch': 0.95} |
|
|
|
[INFO|2025-02-12 14:26:18] logging.py:157 >> {'loss': 0.2902, 'learning_rate': 2.6566e-05, 'epoch': 0.96} |
|
|
|
[INFO|2025-02-12 14:27:18] logging.py:157 >> {'loss': 0.3329, 'learning_rate': 2.6077e-05, 'epoch': 0.97} |
|
|
|
[INFO|2025-02-12 14:28:17] logging.py:157 >> {'loss': 0.3212, 'learning_rate': 2.5588e-05, 'epoch': 0.99} |
|
|
|
[INFO|2025-02-12 14:29:17] logging.py:157 >> {'loss': 0.2880, 'learning_rate': 2.5098e-05, 'epoch': 1.00} |
|
|
|
[INFO|2025-02-12 14:29:17] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-400 |
|
|
|
[INFO|2025-02-12 14:29:28] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-400/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 14:29:28] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-400/special_tokens_map.json |
|
|
|
[INFO|2025-02-12 14:30:28] logging.py:157 >> {'loss': 0.2973, 'learning_rate': 2.4608e-05, 'epoch': 1.01} |
|
|
|
[INFO|2025-02-12 14:31:28] logging.py:157 >> {'loss': 0.2869, 'learning_rate': 2.4119e-05, 'epoch': 1.02} |
|
|
|
[INFO|2025-02-12 14:32:28] logging.py:157 >> {'loss': 0.3053, 'learning_rate': 2.3630e-05, 'epoch': 1.03} |
|
|
|
[INFO|2025-02-12 14:33:27] logging.py:157 >> {'loss': 0.2907, 'learning_rate': 2.3141e-05, 'epoch': 1.05} |
|
|
|
[INFO|2025-02-12 14:34:27] logging.py:157 >> {'loss': 0.2691, 'learning_rate': 2.2653e-05, 'epoch': 1.06} |
|
|
|
[INFO|2025-02-12 14:35:27] logging.py:157 >> {'loss': 0.2969, 'learning_rate': 2.2166e-05, 'epoch': 1.07} |
|
|
|
[INFO|2025-02-12 14:36:27] logging.py:157 >> {'loss': 0.3006, 'learning_rate': 2.1680e-05, 'epoch': 1.08} |
|
|
|
[INFO|2025-02-12 14:37:26] logging.py:157 >> {'loss': 0.2777, 'learning_rate': 2.1196e-05, 'epoch': 1.10} |
|
|
|
[INFO|2025-02-12 14:38:26] logging.py:157 >> {'loss': 0.3153, 'learning_rate': 2.0712e-05, 'epoch': 1.11} |
|
|
|
[INFO|2025-02-12 14:39:26] logging.py:157 >> {'loss': 0.3023, 'learning_rate': 2.0231e-05, 'epoch': 1.12} |
|
|
|
[INFO|2025-02-12 14:40:26] logging.py:157 >> {'loss': 0.3058, 'learning_rate': 1.9751e-05, 'epoch': 1.13} |
|
|
|
[INFO|2025-02-12 14:41:25] logging.py:157 >> {'loss': 0.3056, 'learning_rate': 1.9273e-05, 'epoch': 1.15} |
|
|
|
[INFO|2025-02-12 14:42:25] logging.py:157 >> {'loss': 0.2994, 'learning_rate': 1.8798e-05, 'epoch': 1.16} |
|
|
|
[INFO|2025-02-12 14:43:25] logging.py:157 >> {'loss': 0.3146, 'learning_rate': 1.8325e-05, 'epoch': 1.17} |
|
|
|
[INFO|2025-02-12 14:44:25] logging.py:157 >> {'loss': 0.3107, 'learning_rate': 1.7854e-05, 'epoch': 1.18} |
|
|
|
[INFO|2025-02-12 14:45:25] logging.py:157 >> {'loss': 0.3146, 'learning_rate': 1.7386e-05, 'epoch': 1.20} |
|
|
|
[INFO|2025-02-12 14:46:24] logging.py:157 >> {'loss': 0.3102, 'learning_rate': 1.6922e-05, 'epoch': 1.21} |
|
|
|
[INFO|2025-02-12 14:47:24] logging.py:157 >> {'loss': 0.2748, 'learning_rate': 1.6460e-05, 'epoch': 1.22} |
|
|
|
[INFO|2025-02-12 14:48:24] logging.py:157 >> {'loss': 0.3133, 'learning_rate': 1.6001e-05, 'epoch': 1.23} |
|
|
|
[INFO|2025-02-12 14:49:24] logging.py:157 >> {'loss': 0.3062, 'learning_rate': 1.5546e-05, 'epoch': 1.25} |
|
|
|
[INFO|2025-02-12 14:49:24] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-500 |
|
|
|
[INFO|2025-02-12 14:49:25] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-500/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 14:49:25] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-500/special_tokens_map.json |
|
|
|
[INFO|2025-02-12 14:50:25] logging.py:157 >> {'loss': 0.2781, 'learning_rate': 1.5095e-05, 'epoch': 1.26} |
|
|
|
[INFO|2025-02-12 14:51:25] logging.py:157 >> {'loss': 0.2935, 'learning_rate': 1.4647e-05, 'epoch': 1.27} |
|
|
|
[INFO|2025-02-12 14:52:24] logging.py:157 >> {'loss': 0.3056, 'learning_rate': 1.4203e-05, 'epoch': 1.28} |
|
|
|
[INFO|2025-02-12 14:53:24] logging.py:157 >> {'loss': 0.2948, 'learning_rate': 1.3764e-05, 'epoch': 1.30} |
|
|
|
[INFO|2025-02-12 14:54:24] logging.py:157 >> {'loss': 0.3089, 'learning_rate': 1.3329e-05, 'epoch': 1.31} |
|
|
|
[INFO|2025-02-12 14:55:24] logging.py:157 >> {'loss': 0.3152, 'learning_rate': 1.2898e-05, 'epoch': 1.32} |
|
|
|
[INFO|2025-02-12 14:56:24] logging.py:157 >> {'loss': 0.3105, 'learning_rate': 1.2472e-05, 'epoch': 1.33} |
|
|
|
[INFO|2025-02-12 14:57:23] logging.py:157 >> {'loss': 0.3053, 'learning_rate': 1.2050e-05, 'epoch': 1.35} |
|
|
|
[INFO|2025-02-12 14:58:23] logging.py:157 >> {'loss': 0.2892, 'learning_rate': 1.1634e-05, 'epoch': 1.36} |
|
|
|
[INFO|2025-02-12 14:59:23] logging.py:157 >> {'loss': 0.2919, 'learning_rate': 1.1223e-05, 'epoch': 1.37} |
|
|
|
[INFO|2025-02-12 15:00:23] logging.py:157 >> {'loss': 0.2886, 'learning_rate': 1.0817e-05, 'epoch': 1.38} |
|
|
|
[INFO|2025-02-12 15:01:23] logging.py:157 >> {'loss': 0.3008, 'learning_rate': 1.0417e-05, 'epoch': 1.40} |
|
|
|
[INFO|2025-02-12 15:02:22] logging.py:157 >> {'loss': 0.2882, 'learning_rate': 1.0022e-05, 'epoch': 1.41} |
|
|
|
[INFO|2025-02-12 15:03:22] logging.py:157 >> {'loss': 0.3124, 'learning_rate': 9.6325e-06, 'epoch': 1.42} |
|
|
|
[INFO|2025-02-12 15:04:22] logging.py:157 >> {'loss': 0.3374, 'learning_rate': 9.2492e-06, 'epoch': 1.43} |
|
|
|
[INFO|2025-02-12 15:05:22] logging.py:157 >> {'loss': 0.3153, 'learning_rate': 8.8720e-06, 'epoch': 1.45} |
|
|
|
[INFO|2025-02-12 15:06:22] logging.py:157 >> {'loss': 0.2844, 'learning_rate': 8.5010e-06, 'epoch': 1.46} |
|
|
|
[INFO|2025-02-12 15:07:21] logging.py:157 >> {'loss': 0.2733, 'learning_rate': 8.1363e-06, 'epoch': 1.47} |
|
|
|
[INFO|2025-02-12 15:08:21] logging.py:157 >> {'loss': 0.3047, 'learning_rate': 7.7781e-06, 'epoch': 1.48} |
|
|
|
[INFO|2025-02-12 15:09:21] logging.py:157 >> {'loss': 0.3215, 'learning_rate': 7.4265e-06, 'epoch': 1.50} |
|
|
|
[INFO|2025-02-12 15:09:21] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-600 |
|
|
|
[INFO|2025-02-12 15:09:22] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-600/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 15:09:22] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-600/special_tokens_map.json |
|
|
|
[INFO|2025-02-12 15:10:22] logging.py:157 >> {'loss': 0.2956, 'learning_rate': 7.0816e-06, 'epoch': 1.51} |
|
|
|
[INFO|2025-02-12 15:11:22] logging.py:157 >> {'loss': 0.3137, 'learning_rate': 6.7436e-06, 'epoch': 1.52} |
|
|
|
[INFO|2025-02-12 15:12:22] logging.py:157 >> {'loss': 0.3049, 'learning_rate': 6.4126e-06, 'epoch': 1.53} |
|
|
|
[INFO|2025-02-12 15:13:22] logging.py:157 >> {'loss': 0.2976, 'learning_rate': 6.0888e-06, 'epoch': 1.55} |
|
|
|
[INFO|2025-02-12 15:14:22] logging.py:157 >> {'loss': 0.3089, 'learning_rate': 5.7722e-06, 'epoch': 1.56} |
|
|
|
[INFO|2025-02-12 15:15:21] logging.py:157 >> {'loss': 0.2935, 'learning_rate': 5.4629e-06, 'epoch': 1.57} |
|
|
|
[INFO|2025-02-12 15:16:21] logging.py:157 >> {'loss': 0.2789, 'learning_rate': 5.1612e-06, 'epoch': 1.58} |
|
|
|
[INFO|2025-02-12 15:17:21] logging.py:157 >> {'loss': 0.3113, 'learning_rate': 4.8671e-06, 'epoch': 1.60} |
|
|
|
[INFO|2025-02-12 15:18:21] logging.py:157 >> {'loss': 0.2980, 'learning_rate': 4.5807e-06, 'epoch': 1.61} |
|
|
|
[INFO|2025-02-12 15:19:21] logging.py:157 >> {'loss': 0.2944, 'learning_rate': 4.3021e-06, 'epoch': 1.62} |
|
|
|
[INFO|2025-02-12 15:20:20] logging.py:157 >> {'loss': 0.3276, 'learning_rate': 4.0315e-06, 'epoch': 1.63} |
|
|
|
[INFO|2025-02-12 15:21:20] logging.py:157 >> {'loss': 0.2884, 'learning_rate': 3.7689e-06, 'epoch': 1.65} |
|
|
|
[INFO|2025-02-12 15:22:20] logging.py:157 >> {'loss': 0.3036, 'learning_rate': 3.5144e-06, 'epoch': 1.66} |
|
|
|
[INFO|2025-02-12 15:23:20] logging.py:157 >> {'loss': 0.2983, 'learning_rate': 3.2682e-06, 'epoch': 1.67} |
|
|
|
[INFO|2025-02-12 15:24:20] logging.py:157 >> {'loss': 0.3029, 'learning_rate': 3.0304e-06, 'epoch': 1.68} |
|
|
|
[INFO|2025-02-12 15:25:19] logging.py:157 >> {'loss': 0.3161, 'learning_rate': 2.8009e-06, 'epoch': 1.70} |
|
|
|
[INFO|2025-02-12 15:26:19] logging.py:157 >> {'loss': 0.2799, 'learning_rate': 2.5800e-06, 'epoch': 1.71} |
|
|
|
[INFO|2025-02-12 15:27:19] logging.py:157 >> {'loss': 0.3199, 'learning_rate': 2.3677e-06, 'epoch': 1.72} |
|
|
|
[INFO|2025-02-12 15:28:19] logging.py:157 >> {'loss': 0.2765, 'learning_rate': 2.1640e-06, 'epoch': 1.73} |
|
|
|
[INFO|2025-02-12 15:29:19] logging.py:157 >> {'loss': 0.2986, 'learning_rate': 1.9691e-06, 'epoch': 1.75} |
|
|
|
[INFO|2025-02-12 15:29:19] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-700 |
|
|
|
[INFO|2025-02-12 15:29:20] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-700/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 15:29:20] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-700/special_tokens_map.json |
|
|
|
[INFO|2025-02-12 15:30:20] logging.py:157 >> {'loss': 0.3137, 'learning_rate': 1.7831e-06, 'epoch': 1.76} |
|
|
|
[INFO|2025-02-12 15:31:20] logging.py:157 >> {'loss': 0.3068, 'learning_rate': 1.6059e-06, 'epoch': 1.77} |
|
|
|
[INFO|2025-02-12 15:32:20] logging.py:157 >> {'loss': 0.2971, 'learning_rate': 1.4378e-06, 'epoch': 1.78} |
|
|
|
[INFO|2025-02-12 15:33:20] logging.py:157 >> {'loss': 0.2810, 'learning_rate': 1.2786e-06, 'epoch': 1.80} |
|
|
|
[INFO|2025-02-12 15:34:19] logging.py:157 >> {'loss': 0.3003, 'learning_rate': 1.1286e-06, 'epoch': 1.81} |
|
|
|
[INFO|2025-02-12 15:35:19] logging.py:157 >> {'loss': 0.2982, 'learning_rate': 9.8775e-07, 'epoch': 1.82} |
|
|
|
[INFO|2025-02-12 15:36:19] logging.py:157 >> {'loss': 0.2919, 'learning_rate': 8.5608e-07, 'epoch': 1.83} |
|
|
|
[INFO|2025-02-12 15:37:19] logging.py:157 >> {'loss': 0.2787, 'learning_rate': 7.3368e-07, 'epoch': 1.85} |
|
|
|
[INFO|2025-02-12 15:38:19] logging.py:157 >> {'loss': 0.2953, 'learning_rate': 6.2059e-07, 'epoch': 1.86} |
|
|
|
[INFO|2025-02-12 15:39:18] logging.py:157 >> {'loss': 0.3198, 'learning_rate': 5.1685e-07, 'epoch': 1.87} |
|
|
|
[INFO|2025-02-12 15:40:18] logging.py:157 >> {'loss': 0.2757, 'learning_rate': 4.2250e-07, 'epoch': 1.88} |
|
|
|
[INFO|2025-02-12 15:41:18] logging.py:157 >> {'loss': 0.2687, 'learning_rate': 3.3758e-07, 'epoch': 1.90} |
|
|
|
[INFO|2025-02-12 15:42:18] logging.py:157 >> {'loss': 0.2904, 'learning_rate': 2.6212e-07, 'epoch': 1.91} |
|
|
|
[INFO|2025-02-12 15:43:18] logging.py:157 >> {'loss': 0.2968, 'learning_rate': 1.9615e-07, 'epoch': 1.92} |
|
|
|
[INFO|2025-02-12 15:44:17] logging.py:157 >> {'loss': 0.2820, 'learning_rate': 1.3970e-07, 'epoch': 1.93} |
|
|
|
[INFO|2025-02-12 15:45:17] logging.py:157 >> {'loss': 0.3100, 'learning_rate': 9.2776e-08, 'epoch': 1.95} |
|
|
|
[INFO|2025-02-12 15:46:17] logging.py:157 >> {'loss': 0.2805, 'learning_rate': 5.5411e-08, 'epoch': 1.96} |
|
|
|
[INFO|2025-02-12 15:47:17] logging.py:157 >> {'loss': 0.2869, 'learning_rate': 2.7615e-08, 'epoch': 1.97} |
|
|
|
[INFO|2025-02-12 15:48:17] logging.py:157 >> {'loss': 0.3079, 'learning_rate': 9.3979e-09, 'epoch': 1.98} |
|
|
|
[INFO|2025-02-12 15:49:16] logging.py:157 >> {'loss': 0.3110, 'learning_rate': 7.6722e-10, 'epoch': 2.00} |
|
|
|
[INFO|2025-02-12 15:49:16] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-800 |
|
|
|
[INFO|2025-02-12 15:49:17] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-800/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 15:49:17] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-800/special_tokens_map.json |
|
|
|
[INFO|2025-02-12 15:49:42] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-802 |
|
|
|
[INFO|2025-02-12 15:49:42] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-802/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 15:49:42] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/checkpoint-802/special_tokens_map.json |
|
|
|
[INFO|2025-02-12 15:49:43] trainer.py:2584 >> |
|
|
|
Training completed. Do not forget to share your model on huggingface.co/models =) |
|
|
|
|
|
|
|
[INFO|2025-02-12 15:49:43] trainer.py:3801 >> Saving model checkpoint to saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned |
|
|
|
[INFO|2025-02-12 15:49:44] tokenization_utils_base.py:2646 >> tokenizer config file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/tokenizer_config.json |
|
|
|
[INFO|2025-02-12 15:49:44] tokenization_utils_base.py:2655 >> Special tokens file saved in saves/DeepSeek-Coder-6.7B-Base/lora/solidity_lora_finetuned/special_tokens_map.json |
|
|
|
[WARNING|2025-02-12 15:49:44] logging.py:162 >> No metric eval_loss to plot. |
|
|
|
[WARNING|2025-02-12 15:49:44] logging.py:162 >> No metric eval_accuracy to plot. |
|
|
|
[INFO|2025-02-12 15:49:44] modelcard.py:449 >> Dropping the following result as it does not have all the necessary fields: |
|
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} |
|
|
|
|