diff --git "a/run.log" "b/run.log" new file mode 100644--- /dev/null +++ "b/run.log" @@ -0,0 +1,1649 @@ +[2022-12-18 23:20:39,793] [WARNING] [runner.py:179:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. +[2022-12-18 23:20:39,805] [INFO] [runner.py:508:main] cmd = /home/milan/hf_env/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 run_speech_recognition_seq2seq_streaming.py --deepspeed=ds_config.json --model_name_or_path=openai/whisper-large-v2 --dataset_name=mozilla-foundation/common_voice_11_0 --dataset_config_name=hu --language=hungarian --train_split_name=train+validation --eval_split_name=test --model_index_name=Whisper Large-v2 Hungarian CV11 --max_steps=5000 --output_dir=./ --per_device_train_batch_size=32 --per_device_eval_batch_size=8 --gradient_accumulation_steps=2 --logging_steps=25 --learning_rate=1e-5 --warmup_steps=500 --evaluation_strategy=steps --eval_steps=1000 --save_strategy=steps --save_steps=1000 --generation_max_length=225 --length_column_name=input_length --max_duration_in_seconds=30 --text_column_name=sentence --freeze_feature_encoder=False --report_to=tensorboard --metric_for_best_model=wer --greater_is_better=False --load_best_model_at_end --gradient_checkpointing --fp16 --overwrite_output_dir --do_train --do_eval --predict_with_generate --do_normalize_eval --streaming=False --use_auth_token --push_to_hub +[2022-12-18 23:20:41,392] [INFO] [launch.py:142:main] WORLD INFO DICT: {'localhost': [0]} +[2022-12-18 23:20:41,392] [INFO] [launch.py:148:main] nnodes=1, num_local_procs=1, node_rank=0 +[2022-12-18 23:20:41,392] [INFO] [launch.py:161:main] global_rank_mapping=defaultdict(, {'localhost': [0]}) +[2022-12-18 23:20:41,392] [INFO] [launch.py:162:main] dist_world_size=1 +[2022-12-18 23:20:41,392] [INFO] [launch.py:164:main] Setting CUDA_VISIBLE_DEVICES=0 +[2022-12-18 23:20:42,403] [INFO] [launch.py:318:sigkill_handler] Killing subprocess 1700873 +[2022-12-18 23:20:42,403] [ERROR] [launch.py:324:sigkill_handler] ['/home/milan/hf_env/bin/python3', '-u', 'run_speech_recognition_seq2seq_streaming.py', '--local_rank=0', '--deepspeed=ds_config.json', '--model_name_or_path=openai/whisper-large-v2', '--dataset_name=mozilla-foundation/common_voice_11_0', '--dataset_config_name=hu', '--language=hungarian', '--train_split_name=train+validation', '--eval_split_name=test', '--model_index_name=Whisper Large-v2 Hungarian CV11', '--max_steps=5000', '--output_dir=./', '--per_device_train_batch_size=32', '--per_device_eval_batch_size=8', '--gradient_accumulation_steps=2', '--logging_steps=25', '--learning_rate=1e-5', '--warmup_steps=500', '--evaluation_strategy=steps', '--eval_steps=1000', '--save_strategy=steps', '--save_steps=1000', '--generation_max_length=225', '--length_column_name=input_length', '--max_duration_in_seconds=30', '--text_column_name=sentence', '--freeze_feature_encoder=False', '--report_to=tensorboard', '--metric_for_best_model=wer', '--greater_is_better=False', '--load_best_model_at_end', '--gradient_checkpointing', '--fp16', '--overwrite_output_dir', '--do_train', '--do_eval', '--predict_with_generate', '--do_normalize_eval', '--streaming=False', '--use_auth_token', '--push_to_hub'] exits with return code = 2 +[2022-12-18 23:21:24,489] [WARNING] [runner.py:179:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only. +[2022-12-18 23:21:24,500] [INFO] [runner.py:508:main] cmd = /home/milan/hf_env/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 run_speech_recognition_seq2seq_streaming.py --deepspeed=ds_config.json --model_name_or_path=openai/whisper-large-v2 --dataset_name=mozilla-foundation/common_voice_11_0 --dataset_config_name=hu --language=hungarian --train_split_name=train+validation --eval_split_name=test --model_index_name=Whisper Large-v2 Hungarian CV11 --max_steps=5000 --output_dir=./ --per_device_train_batch_size=32 --per_device_eval_batch_size=8 --gradient_accumulation_steps=2 --logging_steps=25 --learning_rate=1e-5 --warmup_steps=500 --evaluation_strategy=steps --eval_steps=1000 --save_strategy=steps --save_steps=1000 --generation_max_length=225 --length_column_name=input_length --max_duration_in_seconds=30 --text_column_name=sentence --freeze_feature_encoder=False --report_to=tensorboard --metric_for_best_model=wer --greater_is_better=False --load_best_model_at_end --gradient_checkpointing --fp16 --overwrite_output_dir --do_train --do_eval --predict_with_generate --do_normalize_eval --streaming=False --use_auth_token --push_to_hub +[2022-12-18 23:21:26,072] [INFO] [launch.py:142:main] WORLD INFO DICT: {'localhost': [0]} +[2022-12-18 23:21:26,073] [INFO] [launch.py:148:main] nnodes=1, num_local_procs=1, node_rank=0 +[2022-12-18 23:21:26,073] [INFO] [launch.py:161:main] global_rank_mapping=defaultdict(, {'localhost': [0]}) +[2022-12-18 23:21:26,073] [INFO] [launch.py:162:main] dist_world_size=1 +[2022-12-18 23:21:26,073] [INFO] [launch.py:164:main] Setting CUDA_VISIBLE_DEVICES=0 +[2022-12-18 23:21:30,550] [INFO] [comm.py:654:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +12/18/2022 23:21:30 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: True +12/18/2022 23:21:30 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=ds_config.json, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generation_max_length=225, +generation_num_beams=None, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +greater_is_better=False, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=1e-05, +length_column_name=input_length, +load_best_model_at_end=True, +local_rank=0, +log_level=passive, +log_level_replica=passive, +log_on_each_node=True, +logging_dir=./runs/Dec18_23-21-30_129-146-123-136, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=25, +logging_strategy=steps, +lr_scheduler_type=linear, +max_grad_norm=1.0, +max_steps=5000, +metric_for_best_model=wer, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=adamw_hf, +optim_args=None, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=8, +per_device_train_batch_size=32, +predict_with_generate=True, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=None, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +sortish_sampler=False, +tf32=None, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +use_mps_device=False, +warmup_ratio=0.0, +warmup_steps=500, +weight_decay=0.0, +xpu_backend=None, +) +12/18/2022 23:21:30 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( +_n_gpu=1, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=ds_config.json, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generation_max_length=225, +generation_num_beams=None, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +greater_is_better=False, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=1e-05, +length_column_name=input_length, +load_best_model_at_end=True, +local_rank=0, +log_level=passive, +log_level_replica=passive, +log_on_each_node=True, +logging_dir=./runs/Dec18_23-21-30_129-146-123-136, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=25, +logging_strategy=steps, +lr_scheduler_type=linear, +max_grad_norm=1.0, +max_steps=5000, +metric_for_best_model=wer, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=adamw_hf, +optim_args=None, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=8, +per_device_train_batch_size=32, +predict_with_generate=True, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['tensorboard'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=None, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +sortish_sampler=False, +tf32=None, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +use_mps_device=False, +warmup_ratio=0.0, +warmup_steps=500, +weight_decay=0.0, +xpu_backend=None, +) +12/18/2022 23:21:32 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/18/2022 23:21:32 - INFO - datasets.builder - Generating dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) +Downloading and preparing dataset common_voice_11_0/hu to /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f... +12/18/2022 23:21:32 - INFO - datasets.builder - Dataset not on Hf google storage. Downloading and preparing it from source +12/18/2022 23:21:32 - INFO - datasets.download.download_manager - Downloading took 0.0 min +12/18/2022 23:21:32 - INFO - datasets.download.download_manager - Checksum Computation took 0.0 min +12/18/2022 23:21:33 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/train/hu_train_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpn9e2qg3x +12/18/2022 23:21:36 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/train/hu_train_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/7834691dc3252612415601745623f11a98f4b1bffa5bfbef1775a6a125bf96d5 +12/18/2022 23:21:36 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/7834691dc3252612415601745623f11a98f4b1bffa5bfbef1775a6a125bf96d5 +12/18/2022 23:21:36 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/dev/hu_dev_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpbckid5hj +12/18/2022 23:21:39 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/dev/hu_dev_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/f4f7f59bff00cd3b0b6b94ffedd3f53e06e4c3d38e577ece03f351ffe08a8825 +12/18/2022 23:21:39 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/f4f7f59bff00cd3b0b6b94ffedd3f53e06e4c3d38e577ece03f351ffe08a8825 +12/18/2022 23:21:39 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/test/hu_test_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmphjnydhgg +12/18/2022 23:21:41 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/test/hu_test_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/31d615ab9a0ef7361f97cf9d3083d5561690aead7af67e08a4ef18c5b7926f04 +12/18/2022 23:21:41 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/31d615ab9a0ef7361f97cf9d3083d5561690aead7af67e08a4ef18c5b7926f04 +12/18/2022 23:21:41 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/other/hu_other_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmp3lrwbg7l +12/18/2022 23:21:43 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/other/hu_other_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/99e33b35c4b81364a2c4ef5f17b9fb8fc28d87152465e864d0a680627170ab2d +12/18/2022 23:21:43 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/99e33b35c4b81364a2c4ef5f17b9fb8fc28d87152465e864d0a680627170ab2d +12/18/2022 23:21:43 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/invalidated/hu_invalidated_0.tar not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpg6qcc5ou +12/18/2022 23:21:44 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/audio/hu/invalidated/hu_invalidated_0.tar in cache at /home/milan/.cache/huggingface/datasets/downloads/a540babc5e249d63db76dea5c2bd5c7553411f4dc9144a980f49ebcdd4eeb388 +12/18/2022 23:21:44 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/a540babc5e249d63db76dea5c2bd5c7553411f4dc9144a980f49ebcdd4eeb388 +12/18/2022 23:21:44 - INFO - datasets.download.download_manager - Downloading took 0.0 min +12/18/2022 23:21:44 - INFO - datasets.download.download_manager - Checksum Computation took 0.0 min +12/18/2022 23:21:47 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/train.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpxe8j68i2 +12/18/2022 23:21:48 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/train.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/1915e72959d2cb0a4b6532a7fdb8857bb29f1e69e5b1476dbce7c18a3372609b +12/18/2022 23:21:48 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/1915e72959d2cb0a4b6532a7fdb8857bb29f1e69e5b1476dbce7c18a3372609b +12/18/2022 23:21:48 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/dev.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpqmu0mwf4 +12/18/2022 23:21:49 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/dev.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/d789b00e1e46cb8ce815f3f23eb950e38ba07c6a4a354c231b781e239afa5dff +12/18/2022 23:21:49 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/d789b00e1e46cb8ce815f3f23eb950e38ba07c6a4a354c231b781e239afa5dff +12/18/2022 23:21:49 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/test.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpug_du8ny +12/18/2022 23:21:49 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/test.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/209e8710991230d77e369b85cd3e5b7baccf6623929d9b164f1020dd0d5d74b3 +12/18/2022 23:21:49 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/209e8710991230d77e369b85cd3e5b7baccf6623929d9b164f1020dd0d5d74b3 +12/18/2022 23:21:49 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/other.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmpy9ri4851 +12/18/2022 23:21:50 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/other.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/767ea79b4879066f497bc041f562b7cf7d3f9d63dded84079eab8f7378e39bb8 +12/18/2022 23:21:50 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/767ea79b4879066f497bc041f562b7cf7d3f9d63dded84079eab8f7378e39bb8 +12/18/2022 23:21:50 - INFO - datasets.utils.file_utils - https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/invalidated.tsv not found in cache or force_download set to True, downloading to /home/milan/.cache/huggingface/datasets/downloads/tmphpulzdsv +12/18/2022 23:21:51 - INFO - datasets.utils.file_utils - storing https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0/resolve/streaming/transcript/hu/invalidated.tsv in cache at /home/milan/.cache/huggingface/datasets/downloads/e809ce8b81700f5ad6edef088b29cb6a39b5ec201977c3bbd5679994e0259f0a +12/18/2022 23:21:51 - INFO - datasets.utils.file_utils - creating metadata file for /home/milan/.cache/huggingface/datasets/downloads/e809ce8b81700f5ad6edef088b29cb6a39b5ec201977c3bbd5679994e0259f0a +12/18/2022 23:21:51 - INFO - datasets.download.download_manager - Downloading took 0.0 min +12/18/2022 23:21:51 - INFO - datasets.download.download_manager - Checksum Computation took 0.0 min +12/18/2022 23:21:51 - INFO - datasets.utils.info_utils - Unable to verify checksums. +12/18/2022 23:21:51 - INFO - datasets.builder - Generating train split +12/18/2022 23:21:53 - INFO - datasets.builder - Generating validation split +12/18/2022 23:21:54 - INFO - datasets.builder - Generating test split +12/18/2022 23:21:56 - INFO - datasets.builder - Generating other split +12/18/2022 23:21:56 - INFO - datasets.builder - Generating invalidated split +12/18/2022 23:21:56 - INFO - datasets.utils.info_utils - Unable to verify splits sizes. +Dataset common_voice_11_0 downloaded and prepared to /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f. Subsequent calls will reuse this data. +12/18/2022 23:21:58 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/18/2022 23:21:58 - INFO - datasets.builder - Overwrite dataset info from restored data version. +12/18/2022 23:21:58 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/18/2022 23:21:58 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) +12/18/2022 23:21:58 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/18/2022 23:22:00 - INFO - datasets.info - Loading Dataset Infos from /home/milan/.cache/huggingface/modules/datasets_modules/datasets/mozilla-foundation--common_voice_11_0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/18/2022 23:22:00 - INFO - datasets.builder - Overwrite dataset info from restored data version. +12/18/2022 23:22:00 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/18/2022 23:22:00 - WARNING - datasets.builder - Found cached dataset common_voice_11_0 (/home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f) +12/18/2022 23:22:00 - INFO - datasets.info - Loading Dataset info from /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f +12/18/2022 23:22:19 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-49c0230e0aca00fb.arrow +12/18/2022 23:43:21 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-74d1a56461b6f9a2.arrow +12/18/2022 23:55:29 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/milan/.cache/huggingface/datasets/mozilla-foundation___common_voice_11_0/hu/11.0.0/f8e47235d9b4e68fa24ed71d63266a02018ccf7194b2a8c9c598a5f3ab304d9f/cache-dddd3ddd0dbc3a22.arrow +12/18/2022 23:55:31 - WARNING - huggingface_hub.repository - /home/milan/whisper-large2-hu-cv11/./ is already a clone of https://huggingface.co/mikr/whisper-large2-hu-cv11. Make sure you pull the latest changes with `repo.git_pull()`. +[2022-12-18 23:55:35,322] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.7, git-hash=unknown, git-branch=unknown +[2022-12-18 23:55:36,509] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +[2022-12-18 23:55:37,681] [WARNING] [cpu_adam.py:83:__init__] FP16 params for CPUAdam may not work on AMD CPUs +Installed CUDA version 11.6 does not match the version torch was compiled with 11.7 but since the APIs are compatible, accepting this combination +[1/3] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/includes -I/usr/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/TH -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/THC -isystem /usr/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 --use_fast_math -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -c /home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/common/custom_cuda_kernel.cu -o custom_cuda_kernel.cuda.o +[2/3] c++ -MMD -MF cpu_adam.o.d -DTORCH_EXTENSION_NAME=cpu_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/includes -I/usr/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/TH -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/THC -isystem /usr/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -std=c++14 -g -Wno-reorder -L/usr/lib64 -lcudart -lcublas -g -march=native -fopenmp -D__AVX256__ -c /home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/adam/cpu_adam.cpp -o cpu_adam.o +[3/3] c++ cpu_adam.o custom_cuda_kernel.cuda.o -shared -lcurand -L/home/milan/hf_env/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/lib64 -lcudart -o cpu_adam.so +Time to load cpu_adam op: 28.829350471496582 seconds +Adam Optimizer #0 is created with AVX2 arithmetic capability. +Config: alpha=0.000010, betas=(0.900000, 0.999000), weight_decay=0.000000, adam_w=1 +[2022-12-18 23:56:08,279] [INFO] [logging.py:68:log_dist] [Rank 0] Using DeepSpeed Optimizer param name adamw as basic optimizer +[2022-12-18 23:56:08,583] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = DeepSpeedCPUAdam +[2022-12-18 23:56:08,583] [INFO] [utils.py:52:is_zero_supported_optimizer] Checking ZeRO support for optimizer=DeepSpeedCPUAdam type= +[2022-12-18 23:56:08,583] [INFO] [logging.py:68:log_dist] [Rank 0] Creating fp16 ZeRO stage 2 optimizer +[2022-12-18 23:56:08,583] [INFO] [stage_1_and_2.py:140:__init__] Reduce bucket size 200000000 +[2022-12-18 23:56:08,584] [INFO] [stage_1_and_2.py:141:__init__] Allgather bucket size 200000000 +[2022-12-18 23:56:08,584] [INFO] [stage_1_and_2.py:142:__init__] CPU Offload: True +[2022-12-18 23:56:08,584] [INFO] [stage_1_and_2.py:143:__init__] Round robin gradient partitioning: False +[1/2] c++ -MMD -MF flatten_unflatten.o.d -DTORCH_EXTENSION_NAME=utils -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/TH -isystem /home/milan/hf_env/lib/python3.8/site-packages/torch/include/THC -isystem /usr/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -c /home/milan/hf_env/lib/python3.8/site-packages/deepspeed/ops/csrc/utils/flatten_unflatten.cpp -o flatten_unflatten.o +[2/2] c++ flatten_unflatten.o -shared -L/home/milan/hf_env/lib/python3.8/site-packages/torch/lib -lc10 -ltorch_cpu -ltorch -ltorch_python -o utils.so +Time to load utils op: 15.238850355148315 seconds +Rank: 0 partition count [1] and sizes[(1543304960, False)] +[2022-12-18 23:56:27,203] [INFO] [utils.py:827:see_memory_usage] Before initializing optimizer states +[2022-12-18 23:56:27,203] [INFO] [utils.py:828:see_memory_usage] MA 3.0 GB Max_MA 3.0 GB CA 5.99 GB Max_CA 6 GB +[2022-12-18 23:56:27,204] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 15.46 GB, percent = 7.9% +[2022-12-18 23:56:31,112] [INFO] [utils.py:827:see_memory_usage] After initializing optimizer states +[2022-12-18 23:56:31,112] [INFO] [utils.py:828:see_memory_usage] MA 3.0 GB Max_MA 3.0 GB CA 5.99 GB Max_CA 6 GB +[2022-12-18 23:56:31,113] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.12 GB, percent = 17.9% +[2022-12-18 23:56:31,113] [INFO] [stage_1_and_2.py:525:__init__] optimizer state initialized +[2022-12-18 23:56:31,183] [INFO] [utils.py:827:see_memory_usage] After initializing ZeRO optimizer +[2022-12-18 23:56:31,183] [INFO] [utils.py:828:see_memory_usage] MA 3.0 GB Max_MA 3.0 GB CA 5.99 GB Max_CA 6 GB +[2022-12-18 23:56:31,184] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.12 GB, percent = 17.9% +[2022-12-18 23:56:31,208] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = adamw +[2022-12-18 23:56:31,208] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using configured LR scheduler = WarmupDecayLR +[2022-12-18 23:56:31,208] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +[2022-12-18 23:56:31,208] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-18 23:56:31,210] [INFO] [config.py:1020:print] DeepSpeedEngine configuration: +[2022-12-18 23:56:31,210] [INFO] [config.py:1024:print] activation_checkpointing_config { + "partition_activations": false, + "contiguous_memory_optimization": false, + "cpu_checkpointing": false, + "number_checkpoints": null, + "synchronize_checkpoint_boundary": false, + "profile": false +} +[2022-12-18 23:56:31,210] [INFO] [config.py:1024:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +[2022-12-18 23:56:31,210] [INFO] [config.py:1024:print] amp_enabled .................. False +[2022-12-18 23:56:31,210] [INFO] [config.py:1024:print] amp_params ................... False +[2022-12-18 23:56:31,210] [INFO] [config.py:1024:print] autotuning_config ............ { + "enabled": false, + "start_step": null, + "end_step": null, + "metric_path": null, + "arg_mappings": null, + "metric": "throughput", + "model_info": null, + "results_dir": "autotuning_results", + "exps_dir": "autotuning_exps", + "overwrite": true, + "fast": true, + "start_profile_step": 3, + "end_profile_step": 5, + "tuner_type": "gridsearch", + "tuner_early_stopping": 5, + "tuner_num_trials": 50, + "model_info_path": null, + "mp_size": 1, + "max_train_batch_size": null, + "min_train_batch_size": 1, + "max_train_micro_batch_size_per_gpu": 1.024000e+03, + "min_train_micro_batch_size_per_gpu": 1, + "num_tuning_micro_batch_sizes": 3 +} +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] bfloat16_enabled ............. False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] checkpoint_parallel_write_pipeline False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] checkpoint_tag_validation_enabled True +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] checkpoint_tag_validation_fail False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] comms_config ................. +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] communication_data_type ...... None +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] curriculum_enabled ........... False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] curriculum_params ............ False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] dataloader_drop_last ......... False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] disable_allgather ............ False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] dump_state ................... False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] dynamic_loss_scale_args ...... {'init_scale': 65536, 'scale_window': 1000, 'delayed_shift': 2, 'min_scale': 1} +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] eigenvalue_enabled ........... False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] eigenvalue_gas_boundary_resolution 1 +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] eigenvalue_layer_name ........ bert.encoder.layer +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] eigenvalue_layer_num ......... 0 +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] eigenvalue_max_iter .......... 100 +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] eigenvalue_stability ......... 1e-06 +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] eigenvalue_tol ............... 0.01 +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] eigenvalue_verbose ........... False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] elasticity_enabled ........... False +[2022-12-18 23:56:31,211] [INFO] [config.py:1024:print] flops_profiler_config ........ { + "enabled": false, + "profile_step": 1, + "module_depth": -1, + "top_modules": 1, + "detailed": true, + "output_file": null +} +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] fp16_auto_cast ............... False +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] fp16_enabled ................. True +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] fp16_master_weights_and_gradients False +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] global_rank .................. 0 +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] grad_accum_dtype ............. None +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] gradient_accumulation_steps .. 2 +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] gradient_clipping ............ 1.0 +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] gradient_predivide_factor .... 1.0 +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] initial_dynamic_scale ........ 65536 +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] load_universal_checkpoint .... False +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] loss_scale ................... 0 +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] memory_breakdown ............. False +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] monitor_config ............... +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] nebula_config ................ { + "enabled": false, + "persistent_storage_path": null, + "persistent_time_interval": 100, + "num_of_version_in_retention": 2, + "enable_nebula_load": true, + "load_path": null +} +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] optimizer_legacy_fusion ...... False +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] optimizer_name ............... adamw +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] optimizer_params ............. {'lr': 1e-05, 'betas': [0.9, 0.999], 'eps': 1e-08, 'weight_decay': 0.0} +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] pld_enabled .................. False +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] pld_params ................... False +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] prescale_gradients ........... False +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] scheduler_name ............... WarmupDecayLR +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] scheduler_params ............. {'last_batch_iteration': -1, 'total_num_steps': 5000, 'warmup_min_lr': 0, 'warmup_max_lr': 1e-05, 'warmup_num_steps': 500} +[2022-12-18 23:56:31,212] [INFO] [config.py:1024:print] sparse_attention ............. None +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] sparse_gradients_enabled ..... False +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] steps_per_print .............. 10 +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] train_batch_size ............. 64 +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] train_micro_batch_size_per_gpu 32 +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] use_node_local_storage ....... False +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] wall_clock_breakdown ......... False +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] world_size ................... 1 +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] zero_allow_untested_optimizer False +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=200000000 allgather_partitions=True allgather_bucket_size=200000000 overlap_comm=True load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=DeepSpeedZeroOffloadOptimizerConfig(device='cpu', nvme_path=None, buffer_count=4, pin_memory=True, pipeline=False, pipeline_read=False, pipeline_write=False, fast_init=False) sub_group_size=1,000,000,000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50,000,000 param_persistence_threshold=100,000 model_persistence_threshold=sys.maxsize max_live_parameters=1,000,000,000 max_reuse_distance=1,000,000,000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] zero_enabled ................. True +[2022-12-18 23:56:31,213] [INFO] [config.py:1024:print] zero_optimization_stage ...... 2 +[2022-12-18 23:56:31,213] [INFO] [config.py:1009:print_user_config] json = { + "fp16": { + "enabled": true, + "loss_scale": 0, + "loss_scale_window": 1000, + "initial_scale_power": 16, + "hysteresis": 2, + "min_loss_scale": 1 + }, + "optimizer": { + "type": "AdamW", + "params": { + "lr": 1e-05, + "betas": [0.9, 0.999], + "eps": 1e-08, + "weight_decay": 0.0 + } + }, + "scheduler": { + "type": "WarmupDecayLR", + "params": { + "last_batch_iteration": -1, + "total_num_steps": 5.000000e+03, + "warmup_min_lr": 0, + "warmup_max_lr": 1e-05, + "warmup_num_steps": 500 + } + }, + "zero_optimization": { + "stage": 2, + "offload_optimizer": { + "device": "cpu", + "pin_memory": true + }, + "allgather_partitions": true, + "allgather_bucket_size": 2.000000e+08, + "overlap_comm": true, + "reduce_scatter": true, + "reduce_bucket_size": 2.000000e+08, + "contiguous_gradients": true + }, + "gradient_accumulation_steps": 2, + "gradient_clipping": 1.0, + "train_batch_size": 64, + "train_micro_batch_size_per_gpu": 32 +} +Time to load utils op: 0.0003571510314941406 seconds +[2022-12-18 23:56:55,627] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 65536 +[2022-12-18 23:56:55,629] [INFO] [timer.py:197:stop] 0/4, RunningAvgSamplesPerSec=6.763630714125663, CurrSamplesPerSec=6.400820417656072, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:57:06,985] [INFO] [timer.py:197:stop] 0/6, RunningAvgSamplesPerSec=6.536430141688239, CurrSamplesPerSec=5.682596666322877, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:57:18,338] [INFO] [timer.py:197:stop] 0/8, RunningAvgSamplesPerSec=6.460811070153207, CurrSamplesPerSec=5.669483377217449, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:57:29,033] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536, reducing to 32768.0 +[2022-12-18 23:57:29,035] [INFO] [timer.py:197:stop] 0/10, RunningAvgSamplesPerSec=6.52917405857395, CurrSamplesPerSec=6.383157841872113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:57:39,764] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 32768.0, reducing to 16384.0 +[2022-12-18 23:57:39,766] [INFO] [timer.py:197:stop] 0/12, RunningAvgSamplesPerSec=6.568214533908828, CurrSamplesPerSec=6.393844122568189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:57:51,114] [INFO] [timer.py:197:stop] 0/14, RunningAvgSamplesPerSec=6.525088580840918, CurrSamplesPerSec=5.684102939821898, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:58:02,429] [INFO] [timer.py:197:stop] 0/16, RunningAvgSamplesPerSec=6.497422112749595, CurrSamplesPerSec=5.714115760953486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:58:13,702] [INFO] [timer.py:197:stop] 0/18, RunningAvgSamplesPerSec=6.476673258035022, CurrSamplesPerSec=5.712435261056795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:58:25,074] [INFO] [logging.py:68:log_dist] [Rank 0] step=10, skipped=3, lr=[3.131187225706726e-06], mom=[[0.9, 0.999]] +[2022-12-18 23:58:25,076] [INFO] [timer.py:197:stop] 0/20, RunningAvgSamplesPerSec=6.457550487934793, CurrSamplesPerSec=5.672483307478059, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:58:36,426] [INFO] [timer.py:197:stop] 0/22, RunningAvgSamplesPerSec=6.44415601612862, CurrSamplesPerSec=5.681215514560827, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:58:47,757] [INFO] [timer.py:197:stop] 0/24, RunningAvgSamplesPerSec=6.43271076727295, CurrSamplesPerSec=5.699260773577131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:58:59,101] [INFO] [timer.py:197:stop] 0/26, RunningAvgSamplesPerSec=6.422076700162281, CurrSamplesPerSec=5.681345374840111, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:59:10,506] [INFO] [timer.py:197:stop] 0/28, RunningAvgSamplesPerSec=6.411644554384639, CurrSamplesPerSec=5.650441250855927, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:59:21,916] [INFO] [timer.py:197:stop] 0/30, RunningAvgSamplesPerSec=6.404704339537071, CurrSamplesPerSec=5.6816339744942725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:59:33,244] [INFO] [timer.py:197:stop] 0/32, RunningAvgSamplesPerSec=6.398182268942303, CurrSamplesPerSec=5.66514939224817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:59:44,592] [INFO] [timer.py:197:stop] 0/34, RunningAvgSamplesPerSec=6.393101858742206, CurrSamplesPerSec=5.696323843911464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-18 23:59:56,041] [INFO] [timer.py:197:stop] 0/36, RunningAvgSamplesPerSec=6.38533580992348, CurrSamplesPerSec=5.614370152964268, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:00:07,832] [INFO] [timer.py:197:stop] 0/38, RunningAvgSamplesPerSec=6.379980867946317, CurrSamplesPerSec=5.682436676189912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:00:19,630] [INFO] [logging.py:68:log_dist] [Rank 0] step=20, skipped=3, lr=[4.558957377820063e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:00:19,631] [INFO] [timer.py:197:stop] 0/40, RunningAvgSamplesPerSec=6.375262110305903, CurrSamplesPerSec=5.6558136022639145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:00:31,589] [INFO] [timer.py:197:stop] 0/42, RunningAvgSamplesPerSec=6.371413619674249, CurrSamplesPerSec=5.687795349133654, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:00:43,045] [INFO] [timer.py:197:stop] 0/44, RunningAvgSamplesPerSec=6.369337824657779, CurrSamplesPerSec=5.695208352808133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:00:54,450] [INFO] [timer.py:197:stop] 0/46, RunningAvgSamplesPerSec=6.365879927224, CurrSamplesPerSec=5.663550381260192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:01:05,809] [INFO] [timer.py:197:stop] 0/48, RunningAvgSamplesPerSec=6.3628448782739975, CurrSamplesPerSec=5.668740594174487, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:01:17,133] [INFO] [timer.py:197:stop] 0/50, RunningAvgSamplesPerSec=6.3610764096707095, CurrSamplesPerSec=5.702925414016152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.469, 'learning_rate': 4.973833272194737e-06, 'epoch': 0.19} +[2022-12-19 00:01:28,495] [INFO] [timer.py:197:stop] 0/52, RunningAvgSamplesPerSec=6.360192297523213, CurrSamplesPerSec=5.717501892447633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:01:39,874] [INFO] [timer.py:197:stop] 0/54, RunningAvgSamplesPerSec=6.357957980419815, CurrSamplesPerSec=5.6773392751122245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:01:51,182] [INFO] [timer.py:197:stop] 0/56, RunningAvgSamplesPerSec=6.357177728437714, CurrSamplesPerSec=5.704344538382278, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:02:02,727] [INFO] [timer.py:197:stop] 0/58, RunningAvgSamplesPerSec=6.355616580687708, CurrSamplesPerSec=5.6876068669078315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:02:14,121] [INFO] [logging.py:68:log_dist] [Rank 0] step=30, skipped=3, lr=[5.303370403744525e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:02:14,122] [INFO] [timer.py:197:stop] 0/60, RunningAvgSamplesPerSec=6.354439303923271, CurrSamplesPerSec=5.698230253920275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:02:25,514] [INFO] [timer.py:197:stop] 0/62, RunningAvgSamplesPerSec=6.353114078967978, CurrSamplesPerSec=5.673747966682336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:02:36,883] [INFO] [timer.py:197:stop] 0/64, RunningAvgSamplesPerSec=6.3523286363618405, CurrSamplesPerSec=5.693366030373531, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:02:48,304] [INFO] [timer.py:197:stop] 0/66, RunningAvgSamplesPerSec=6.351188060029837, CurrSamplesPerSec=5.692320259040157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:02:59,845] [INFO] [timer.py:197:stop] 0/68, RunningAvgSamplesPerSec=6.349831784979684, CurrSamplesPerSec=5.662888713661619, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:03:11,147] [INFO] [timer.py:197:stop] 0/70, RunningAvgSamplesPerSec=6.349548670035175, CurrSamplesPerSec=5.7110026315224, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:03:22,556] [INFO] [timer.py:197:stop] 0/72, RunningAvgSamplesPerSec=6.3490761334381425, CurrSamplesPerSec=5.730710295607525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:03:33,954] [INFO] [timer.py:197:stop] 0/74, RunningAvgSamplesPerSec=6.3478368588085825, CurrSamplesPerSec=5.664371886193913, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:03:45,417] [INFO] [timer.py:197:stop] 0/76, RunningAvgSamplesPerSec=6.346795782912525, CurrSamplesPerSec=5.668257482610842, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:03:56,804] [INFO] [timer.py:197:stop] 0/78, RunningAvgSamplesPerSec=6.34552787901348, CurrSamplesPerSec=5.67239604425034, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:04:08,235] [INFO] [logging.py:68:log_dist] [Rank 0] step=40, skipped=3, lr=[5.810371073215365e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:04:08,237] [INFO] [timer.py:197:stop] 0/80, RunningAvgSamplesPerSec=6.344626667174682, CurrSamplesPerSec=5.6718181130139245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:04:19,591] [INFO] [timer.py:197:stop] 0/82, RunningAvgSamplesPerSec=6.343983756791708, CurrSamplesPerSec=5.684003283073533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:04:31,082] [INFO] [timer.py:197:stop] 0/84, RunningAvgSamplesPerSec=6.343322138412069, CurrSamplesPerSec=5.682808396438611, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:04:42,408] [INFO] [timer.py:197:stop] 0/86, RunningAvgSamplesPerSec=6.342770646234098, CurrSamplesPerSec=5.688240814577332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:04:53,901] [INFO] [timer.py:197:stop] 0/88, RunningAvgSamplesPerSec=6.342187782305656, CurrSamplesPerSec=5.697844177243397, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:05:05,287] [INFO] [timer.py:197:stop] 0/90, RunningAvgSamplesPerSec=6.341244291943437, CurrSamplesPerSec=5.6740868875432495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:05:16,777] [INFO] [timer.py:197:stop] 0/92, RunningAvgSamplesPerSec=6.340564634082798, CurrSamplesPerSec=5.670572042474326, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:05:28,156] [INFO] [timer.py:197:stop] 0/94, RunningAvgSamplesPerSec=6.340309004792443, CurrSamplesPerSec=5.712390769212697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:05:39,483] [INFO] [timer.py:197:stop] 0/96, RunningAvgSamplesPerSec=6.340256485181379, CurrSamplesPerSec=5.717315576498292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:05:50,971] [INFO] [timer.py:197:stop] 0/98, RunningAvgSamplesPerSec=6.33952480296046, CurrSamplesPerSec=5.674263679743229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:06:02,415] [INFO] [logging.py:68:log_dist] [Rank 0] step=50, skipped=3, lr=[6.195318418690893e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:06:02,417] [INFO] [timer.py:197:stop] 0/100, RunningAvgSamplesPerSec=6.33897692276685, CurrSamplesPerSec=5.695520114560736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.2768, 'learning_rate': 6.195318418690893e-06, 'epoch': 0.37} +[2022-12-19 00:06:13,756] [INFO] [timer.py:197:stop] 0/102, RunningAvgSamplesPerSec=6.3385488318911065, CurrSamplesPerSec=5.68085770806229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:06:25,338] [INFO] [timer.py:197:stop] 0/104, RunningAvgSamplesPerSec=6.337702098624332, CurrSamplesPerSec=5.671691803620073, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:06:36,705] [INFO] [timer.py:197:stop] 0/106, RunningAvgSamplesPerSec=6.337090033703842, CurrSamplesPerSec=5.67942428907718, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:06:48,243] [INFO] [timer.py:197:stop] 0/108, RunningAvgSamplesPerSec=6.336622589288899, CurrSamplesPerSec=5.6761032599704375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:06:59,796] [INFO] [timer.py:197:stop] 0/110, RunningAvgSamplesPerSec=6.33521692888461, CurrSamplesPerSec=5.662787171287403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:07:11,136] [INFO] [timer.py:197:stop] 0/112, RunningAvgSamplesPerSec=6.33498229466653, CurrSamplesPerSec=5.682813689893659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:07:22,432] [INFO] [timer.py:197:stop] 0/114, RunningAvgSamplesPerSec=6.335179424223348, CurrSamplesPerSec=5.717447092378461, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:07:33,770] [INFO] [timer.py:197:stop] 0/116, RunningAvgSamplesPerSec=6.334813993604455, CurrSamplesPerSec=5.681002219911839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:07:45,324] [INFO] [timer.py:197:stop] 0/118, RunningAvgSamplesPerSec=6.334159186301398, CurrSamplesPerSec=5.66402432491068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:07:56,812] [INFO] [logging.py:68:log_dist] [Rank 0] step=60, skipped=3, lr=[6.505722008216461e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:07:56,813] [INFO] [timer.py:197:stop] 0/120, RunningAvgSamplesPerSec=6.334148983596415, CurrSamplesPerSec=5.70464639030498, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:08:08,316] [INFO] [timer.py:197:stop] 0/122, RunningAvgSamplesPerSec=6.333515012729501, CurrSamplesPerSec=5.648025915880862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:08:19,714] [INFO] [timer.py:197:stop] 0/124, RunningAvgSamplesPerSec=6.33315550170096, CurrSamplesPerSec=5.683986914666378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:08:31,032] [INFO] [timer.py:197:stop] 0/126, RunningAvgSamplesPerSec=6.332929766520779, CurrSamplesPerSec=5.703466076508138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:08:42,352] [INFO] [timer.py:197:stop] 0/128, RunningAvgSamplesPerSec=6.332797306552546, CurrSamplesPerSec=5.687579391002574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:08:53,644] [INFO] [timer.py:197:stop] 0/130, RunningAvgSamplesPerSec=6.332691472812845, CurrSamplesPerSec=5.692608526146744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:09:04,933] [INFO] [timer.py:197:stop] 0/132, RunningAvgSamplesPerSec=6.332611746493174, CurrSamplesPerSec=5.691202474082477, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:09:16,262] [INFO] [timer.py:197:stop] 0/134, RunningAvgSamplesPerSec=6.3325367852403724, CurrSamplesPerSec=5.688610642467863, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:09:27,607] [INFO] [timer.py:197:stop] 0/136, RunningAvgSamplesPerSec=6.332240924170941, CurrSamplesPerSec=5.695987821193032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:09:38,967] [INFO] [timer.py:197:stop] 0/138, RunningAvgSamplesPerSec=6.3317661562480065, CurrSamplesPerSec=5.666736625998635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:09:50,308] [INFO] [logging.py:68:log_dist] [Rank 0] step=70, skipped=3, lr=[6.765821034569313e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:09:50,309] [INFO] [timer.py:197:stop] 0/140, RunningAvgSamplesPerSec=6.33173963301883, CurrSamplesPerSec=5.688135709298204, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:10:01,649] [INFO] [timer.py:197:stop] 0/142, RunningAvgSamplesPerSec=6.331692178123189, CurrSamplesPerSec=5.699286184381356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:10:12,985] [INFO] [timer.py:197:stop] 0/144, RunningAvgSamplesPerSec=6.3315197039139335, CurrSamplesPerSec=5.6873140452636, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:10:24,349] [INFO] [timer.py:197:stop] 0/146, RunningAvgSamplesPerSec=6.331157679958395, CurrSamplesPerSec=5.658794546738779, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:10:35,686] [INFO] [timer.py:197:stop] 0/148, RunningAvgSamplesPerSec=6.330903280401342, CurrSamplesPerSec=5.6861688358950335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:10:47,000] [INFO] [timer.py:197:stop] 0/150, RunningAvgSamplesPerSec=6.330923666571888, CurrSamplesPerSec=5.68941194044744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.2426, 'learning_rate': 6.881634451095711e-06, 'epoch': 0.56} +[2022-12-19 00:10:58,301] [INFO] [timer.py:197:stop] 0/152, RunningAvgSamplesPerSec=6.331214712690136, CurrSamplesPerSec=5.724838499410061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:11:09,625] [INFO] [timer.py:197:stop] 0/154, RunningAvgSamplesPerSec=6.331224620981721, CurrSamplesPerSec=5.69813155264911, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:11:20,972] [INFO] [timer.py:197:stop] 0/156, RunningAvgSamplesPerSec=6.331098653418828, CurrSamplesPerSec=5.69718777348036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:11:32,289] [INFO] [timer.py:197:stop] 0/158, RunningAvgSamplesPerSec=6.331127713740547, CurrSamplesPerSec=5.687857295508174, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:11:43,679] [INFO] [logging.py:68:log_dist] [Rank 0] step=80, skipped=3, lr=[6.9896691039239e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:11:43,681] [INFO] [timer.py:197:stop] 0/160, RunningAvgSamplesPerSec=6.330809560610306, CurrSamplesPerSec=5.673481751513187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:11:55,122] [INFO] [timer.py:197:stop] 0/162, RunningAvgSamplesPerSec=6.3299476836731685, CurrSamplesPerSec=5.5857973350859265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:12:06,570] [INFO] [timer.py:197:stop] 0/164, RunningAvgSamplesPerSec=6.3290526826189835, CurrSamplesPerSec=5.600629157171996, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:12:17,860] [INFO] [timer.py:197:stop] 0/166, RunningAvgSamplesPerSec=6.329082340849397, CurrSamplesPerSec=5.701795467246513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:12:29,171] [INFO] [timer.py:197:stop] 0/168, RunningAvgSamplesPerSec=6.328855084166909, CurrSamplesPerSec=5.6554294390015665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:12:40,492] [INFO] [timer.py:197:stop] 0/170, RunningAvgSamplesPerSec=6.328759333849463, CurrSamplesPerSec=5.682248067814558, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:12:51,842] [INFO] [timer.py:197:stop] 0/172, RunningAvgSamplesPerSec=6.32877886699518, CurrSamplesPerSec=5.699245769208656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:13:03,165] [INFO] [timer.py:197:stop] 0/174, RunningAvgSamplesPerSec=6.328900436940702, CurrSamplesPerSec=5.718768188351172, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:13:14,464] [INFO] [timer.py:197:stop] 0/176, RunningAvgSamplesPerSec=6.328756221210079, CurrSamplesPerSec=5.694242591398553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:13:25,810] [INFO] [timer.py:197:stop] 0/178, RunningAvgSamplesPerSec=6.328672475411587, CurrSamplesPerSec=5.678090078488593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:13:37,168] [INFO] [logging.py:68:log_dist] [Rank 0] step=90, skipped=3, lr=[7.186146009413563e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:13:37,170] [INFO] [timer.py:197:stop] 0/180, RunningAvgSamplesPerSec=6.328216223080796, CurrSamplesPerSec=5.67523899346736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:13:48,530] [INFO] [timer.py:197:stop] 0/182, RunningAvgSamplesPerSec=6.327964880274314, CurrSamplesPerSec=5.66142064582372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:13:59,901] [INFO] [timer.py:197:stop] 0/184, RunningAvgSamplesPerSec=6.3276568043461445, CurrSamplesPerSec=5.6757672183922665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:14:11,195] [INFO] [timer.py:197:stop] 0/186, RunningAvgSamplesPerSec=6.32755591494186, CurrSamplesPerSec=5.684328504353401, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:14:22,576] [INFO] [timer.py:197:stop] 0/188, RunningAvgSamplesPerSec=6.327390350075128, CurrSamplesPerSec=5.682272605440604, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:14:33,954] [INFO] [timer.py:197:stop] 0/190, RunningAvgSamplesPerSec=6.327054551930756, CurrSamplesPerSec=5.669776521185562, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:14:45,644] [INFO] [timer.py:197:stop] 0/192, RunningAvgSamplesPerSec=6.326579129921109, CurrSamplesPerSec=5.653444632792826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:14:57,344] [INFO] [timer.py:197:stop] 0/194, RunningAvgSamplesPerSec=6.325978009508057, CurrSamplesPerSec=5.6384486702391445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:15:09,240] [INFO] [timer.py:197:stop] 0/196, RunningAvgSamplesPerSec=6.325550947850442, CurrSamplesPerSec=5.655988065840153, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:15:20,778] [INFO] [timer.py:197:stop] 0/198, RunningAvgSamplesPerSec=6.325389163849206, CurrSamplesPerSec=5.687417433165994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:15:32,255] [INFO] [logging.py:68:log_dist] [Rank 0] step=100, skipped=3, lr=[7.361221988663844e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:15:32,255] [INFO] [timer.py:197:stop] 0/200, RunningAvgSamplesPerSec=6.324926355796128, CurrSamplesPerSec=5.656474332607332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.2244, 'learning_rate': 7.361221988663844e-06, 'epoch': 0.75} +[2022-12-19 00:15:43,680] [INFO] [timer.py:197:stop] 0/202, RunningAvgSamplesPerSec=6.32453033793769, CurrSamplesPerSec=5.638279076686136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:15:55,010] [INFO] [timer.py:197:stop] 0/204, RunningAvgSamplesPerSec=6.324453419893308, CurrSamplesPerSec=5.681493038000189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:16:06,556] [INFO] [timer.py:197:stop] 0/206, RunningAvgSamplesPerSec=6.324486996475307, CurrSamplesPerSec=5.691360786182658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:16:17,887] [INFO] [timer.py:197:stop] 0/208, RunningAvgSamplesPerSec=6.324509560193469, CurrSamplesPerSec=5.712404627253756, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:16:29,421] [INFO] [timer.py:197:stop] 0/210, RunningAvgSamplesPerSec=6.324302705124975, CurrSamplesPerSec=5.645062238684859, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:16:40,802] [INFO] [timer.py:197:stop] 0/212, RunningAvgSamplesPerSec=6.324122228524852, CurrSamplesPerSec=5.669440509847704, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:16:52,142] [INFO] [timer.py:197:stop] 0/214, RunningAvgSamplesPerSec=6.324259002477085, CurrSamplesPerSec=5.714291893712712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:17:03,615] [INFO] [timer.py:197:stop] 0/216, RunningAvgSamplesPerSec=6.32430448459279, CurrSamplesPerSec=5.696339558174375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:17:14,994] [INFO] [timer.py:197:stop] 0/218, RunningAvgSamplesPerSec=6.324204261308274, CurrSamplesPerSec=5.680810821437903, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:17:26,480] [INFO] [logging.py:68:log_dist] [Rank 0] step=110, skipped=3, lr=[7.5191046007362515e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:17:26,482] [INFO] [timer.py:197:stop] 0/220, RunningAvgSamplesPerSec=6.324113089030932, CurrSamplesPerSec=5.690471118949904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:17:37,346] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 16384.0, reducing to 8192.0 +[2022-12-19 00:17:37,348] [INFO] [timer.py:197:stop] 0/222, RunningAvgSamplesPerSec=6.327646311552264, CurrSamplesPerSec=6.392320018044726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:17:48,819] [INFO] [timer.py:197:stop] 0/224, RunningAvgSamplesPerSec=6.3274537514777744, CurrSamplesPerSec=5.669941068705244, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:18:00,164] [INFO] [timer.py:197:stop] 0/226, RunningAvgSamplesPerSec=6.327342475748278, CurrSamplesPerSec=5.693449834266986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:18:11,544] [INFO] [timer.py:197:stop] 0/228, RunningAvgSamplesPerSec=6.327154317339907, CurrSamplesPerSec=5.678557569498987, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:18:22,915] [INFO] [timer.py:197:stop] 0/230, RunningAvgSamplesPerSec=6.327103220844448, CurrSamplesPerSec=5.686582002582689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:18:34,385] [INFO] [timer.py:197:stop] 0/232, RunningAvgSamplesPerSec=6.326879425525643, CurrSamplesPerSec=5.673301650733854, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:18:45,742] [INFO] [timer.py:197:stop] 0/234, RunningAvgSamplesPerSec=6.326778545660543, CurrSamplesPerSec=5.673341938597102, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:18:57,269] [INFO] [timer.py:197:stop] 0/236, RunningAvgSamplesPerSec=6.326404490464032, CurrSamplesPerSec=5.697001811848725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:19:08,626] [INFO] [timer.py:197:stop] 0/238, RunningAvgSamplesPerSec=6.326351983297122, CurrSamplesPerSec=5.681701559150151, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:19:20,130] [INFO] [logging.py:68:log_dist] [Rank 0] step=120, skipped=4, lr=[7.649058662787184e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:19:20,132] [INFO] [timer.py:197:stop] 0/240, RunningAvgSamplesPerSec=6.326111817729406, CurrSamplesPerSec=5.663624945048574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:19:31,444] [INFO] [timer.py:197:stop] 0/242, RunningAvgSamplesPerSec=6.3261978165459585, CurrSamplesPerSec=5.6900899525945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:19:42,809] [INFO] [timer.py:197:stop] 0/244, RunningAvgSamplesPerSec=6.3261069382885795, CurrSamplesPerSec=5.673907467917805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:19:54,332] [INFO] [timer.py:197:stop] 0/246, RunningAvgSamplesPerSec=6.325872059251878, CurrSamplesPerSec=5.662446971142885, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:20:05,683] [INFO] [timer.py:197:stop] 0/248, RunningAvgSamplesPerSec=6.3258389816017795, CurrSamplesPerSec=5.705126509772607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:20:17,028] [INFO] [timer.py:197:stop] 0/250, RunningAvgSamplesPerSec=6.325799453129848, CurrSamplesPerSec=5.664985840039145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.2247, 'learning_rate': 7.716963756434345e-06, 'epoch': 0.94} +[2022-12-19 00:20:28,473] [INFO] [timer.py:197:stop] 0/252, RunningAvgSamplesPerSec=6.325563266341907, CurrSamplesPerSec=5.671827460611994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:20:39,976] [INFO] [timer.py:197:stop] 0/254, RunningAvgSamplesPerSec=6.325461516386534, CurrSamplesPerSec=5.6731141278529895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:20:51,316] [INFO] [timer.py:197:stop] 0/256, RunningAvgSamplesPerSec=6.325444122591116, CurrSamplesPerSec=5.6907811558264605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:21:02,778] [INFO] [timer.py:197:stop] 0/258, RunningAvgSamplesPerSec=6.325442583698493, CurrSamplesPerSec=5.695032186259812, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:21:14,145] [INFO] [logging.py:68:log_dist] [Rank 0] step=130, skipped=4, lr=[7.782118888847307e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:21:14,147] [INFO] [timer.py:197:stop] 0/260, RunningAvgSamplesPerSec=6.325243906111729, CurrSamplesPerSec=5.662732937205337, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:21:25,617] [INFO] [timer.py:197:stop] 0/262, RunningAvgSamplesPerSec=6.325108230507205, CurrSamplesPerSec=5.668189738662974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:21:36,989] [INFO] [timer.py:197:stop] 0/264, RunningAvgSamplesPerSec=6.3250036158640786, CurrSamplesPerSec=5.690783809984471, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:21:48,328] [INFO] [timer.py:197:stop] 0/266, RunningAvgSamplesPerSec=6.324874077498679, CurrSamplesPerSec=5.657228926707887, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:21:58,736] [INFO] [timer.py:197:stop] 0/268, RunningAvgSamplesPerSec=6.328685879831648, CurrSamplesPerSec=5.699145338776445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:22:10,078] [INFO] [timer.py:197:stop] 0/270, RunningAvgSamplesPerSec=6.328592651378525, CurrSamplesPerSec=5.701187070786945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:22:21,600] [INFO] [timer.py:197:stop] 0/272, RunningAvgSamplesPerSec=6.3283978797948786, CurrSamplesPerSec=5.677870293029516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:22:32,908] [INFO] [timer.py:197:stop] 0/274, RunningAvgSamplesPerSec=6.328381746136948, CurrSamplesPerSec=5.686675003479556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:22:44,249] [INFO] [timer.py:197:stop] 0/276, RunningAvgSamplesPerSec=6.328281744558154, CurrSamplesPerSec=5.695803388164962, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:22:55,672] [INFO] [timer.py:197:stop] 0/278, RunningAvgSamplesPerSec=6.328182902409602, CurrSamplesPerSec=5.677010771022341, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:23:07,023] [INFO] [logging.py:68:log_dist] [Rank 0] step=140, skipped=4, lr=[7.905011559752758e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:23:07,024] [INFO] [timer.py:197:stop] 0/280, RunningAvgSamplesPerSec=6.328057150363153, CurrSamplesPerSec=5.680155451498959, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:23:18,360] [INFO] [timer.py:197:stop] 0/282, RunningAvgSamplesPerSec=6.3279484577468885, CurrSamplesPerSec=5.688211162924068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:23:29,711] [INFO] [timer.py:197:stop] 0/284, RunningAvgSamplesPerSec=6.327916346392182, CurrSamplesPerSec=5.692176377840019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:23:41,046] [INFO] [timer.py:197:stop] 0/286, RunningAvgSamplesPerSec=6.327953084244207, CurrSamplesPerSec=5.706254866097372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:23:52,428] [INFO] [timer.py:197:stop] 0/288, RunningAvgSamplesPerSec=6.327738478754996, CurrSamplesPerSec=5.672490499622081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:24:03,757] [INFO] [timer.py:197:stop] 0/290, RunningAvgSamplesPerSec=6.32769283480951, CurrSamplesPerSec=5.696189430066696, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:24:15,155] [INFO] [timer.py:197:stop] 0/292, RunningAvgSamplesPerSec=6.327483742968983, CurrSamplesPerSec=5.67373885259688, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:24:26,544] [INFO] [timer.py:197:stop] 0/294, RunningAvgSamplesPerSec=6.327302278086137, CurrSamplesPerSec=5.670200963063309, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:24:37,904] [INFO] [timer.py:197:stop] 0/296, RunningAvgSamplesPerSec=6.327141303932303, CurrSamplesPerSec=5.677278758344328, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:24:49,254] [INFO] [timer.py:197:stop] 0/298, RunningAvgSamplesPerSec=6.327068725258702, CurrSamplesPerSec=5.679531716333437, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:25:00,570] [INFO] [logging.py:68:log_dist] [Rank 0] step=150, skipped=4, lr=[8.019180844200955e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:25:00,572] [INFO] [timer.py:197:stop] 0/300, RunningAvgSamplesPerSec=6.327055388465258, CurrSamplesPerSec=5.691934258615557, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.156, 'learning_rate': 8.019180844200955e-06, 'epoch': 1.13} +[2022-12-19 00:25:11,937] [INFO] [timer.py:197:stop] 0/302, RunningAvgSamplesPerSec=6.326950257178029, CurrSamplesPerSec=5.681354513379643, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:25:23,300] [INFO] [timer.py:197:stop] 0/304, RunningAvgSamplesPerSec=6.326869748973422, CurrSamplesPerSec=5.693791837691181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:25:34,640] [INFO] [timer.py:197:stop] 0/306, RunningAvgSamplesPerSec=6.32679394074768, CurrSamplesPerSec=5.678414142951313, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:25:46,013] [INFO] [timer.py:197:stop] 0/308, RunningAvgSamplesPerSec=6.326659795175516, CurrSamplesPerSec=5.668974039823971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:25:57,321] [INFO] [timer.py:197:stop] 0/310, RunningAvgSamplesPerSec=6.326684211766849, CurrSamplesPerSec=5.698074946109996, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:26:08,668] [INFO] [timer.py:197:stop] 0/312, RunningAvgSamplesPerSec=6.326634615929587, CurrSamplesPerSec=5.702059501331119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:26:20,030] [INFO] [timer.py:197:stop] 0/314, RunningAvgSamplesPerSec=6.326579766010395, CurrSamplesPerSec=5.67752083315708, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:26:31,372] [INFO] [timer.py:197:stop] 0/316, RunningAvgSamplesPerSec=6.326458371297783, CurrSamplesPerSec=5.6651921947508415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:26:42,734] [INFO] [timer.py:197:stop] 0/318, RunningAvgSamplesPerSec=6.326282034641276, CurrSamplesPerSec=5.650637983067839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:26:54,070] [INFO] [logging.py:68:log_dist] [Rank 0] step=160, skipped=4, lr=[8.125783520495252e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:26:54,072] [INFO] [timer.py:197:stop] 0/320, RunningAvgSamplesPerSec=6.3262625622797435, CurrSamplesPerSec=5.677650764759339, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:27:05,444] [INFO] [timer.py:197:stop] 0/322, RunningAvgSamplesPerSec=6.326144083145681, CurrSamplesPerSec=5.688546991999535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:27:16,841] [INFO] [timer.py:197:stop] 0/324, RunningAvgSamplesPerSec=6.32597403277753, CurrSamplesPerSec=5.6585590759436, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:27:28,245] [INFO] [timer.py:197:stop] 0/326, RunningAvgSamplesPerSec=6.3257032208764565, CurrSamplesPerSec=5.644172745055525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:27:39,596] [INFO] [timer.py:197:stop] 0/328, RunningAvgSamplesPerSec=6.325608758756463, CurrSamplesPerSec=5.678952570507874, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:27:51,006] [INFO] [timer.py:197:stop] 0/330, RunningAvgSamplesPerSec=6.325326156655979, CurrSamplesPerSec=5.634799687918671, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:28:02,361] [INFO] [timer.py:197:stop] 0/332, RunningAvgSamplesPerSec=6.3252073965144975, CurrSamplesPerSec=5.669204631170644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:28:13,791] [INFO] [timer.py:197:stop] 0/334, RunningAvgSamplesPerSec=6.324794313794967, CurrSamplesPerSec=5.586366238664078, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:28:25,155] [INFO] [timer.py:197:stop] 0/336, RunningAvgSamplesPerSec=6.324742033512023, CurrSamplesPerSec=5.667610749573001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:28:36,456] [INFO] [timer.py:197:stop] 0/338, RunningAvgSamplesPerSec=6.324778784386403, CurrSamplesPerSec=5.707499680324429, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:28:47,764] [INFO] [logging.py:68:log_dist] [Rank 0] step=170, skipped=4, lr=[8.225760510392298e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:28:47,766] [INFO] [timer.py:197:stop] 0/340, RunningAvgSamplesPerSec=6.324802333232001, CurrSamplesPerSec=5.682101568162961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:28:59,116] [INFO] [timer.py:197:stop] 0/342, RunningAvgSamplesPerSec=6.324733283338533, CurrSamplesPerSec=5.692450868910799, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:29:10,453] [INFO] [timer.py:197:stop] 0/344, RunningAvgSamplesPerSec=6.3246292325159965, CurrSamplesPerSec=5.679481967635714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:29:21,899] [INFO] [timer.py:197:stop] 0/346, RunningAvgSamplesPerSec=6.324397729817437, CurrSamplesPerSec=5.648484903577274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:29:33,812] [INFO] [timer.py:197:stop] 0/348, RunningAvgSamplesPerSec=6.32420868006303, CurrSamplesPerSec=5.651256815401826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:29:45,616] [INFO] [timer.py:197:stop] 0/350, RunningAvgSamplesPerSec=6.3242070956410465, CurrSamplesPerSec=5.699356367779722, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.1278, 'learning_rate': 8.27351214279797e-06, 'epoch': 1.31} +[2022-12-19 00:29:57,281] [INFO] [timer.py:197:stop] 0/352, RunningAvgSamplesPerSec=6.324083302896628, CurrSamplesPerSec=5.681366537818554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:30:08,710] [INFO] [timer.py:197:stop] 0/354, RunningAvgSamplesPerSec=6.323959804275063, CurrSamplesPerSec=5.659636628479426, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:30:20,253] [INFO] [timer.py:197:stop] 0/356, RunningAvgSamplesPerSec=6.323825506537739, CurrSamplesPerSec=5.675835143588331, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:30:31,710] [INFO] [timer.py:197:stop] 0/358, RunningAvgSamplesPerSec=6.32375855750815, CurrSamplesPerSec=5.67932864138317, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:30:43,099] [INFO] [logging.py:68:log_dist] [Rank 0] step=180, skipped=4, lr=[8.31988745412743e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:30:43,101] [INFO] [timer.py:197:stop] 0/360, RunningAvgSamplesPerSec=6.323504983802095, CurrSamplesPerSec=5.63266976909398, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:30:54,557] [INFO] [timer.py:197:stop] 0/362, RunningAvgSamplesPerSec=6.32343982359596, CurrSamplesPerSec=5.671728952857804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:31:05,889] [INFO] [timer.py:197:stop] 0/364, RunningAvgSamplesPerSec=6.323428401977776, CurrSamplesPerSec=5.677055914068001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:31:17,276] [INFO] [timer.py:197:stop] 0/366, RunningAvgSamplesPerSec=6.323384431083906, CurrSamplesPerSec=5.67629362117504, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:31:28,785] [INFO] [timer.py:197:stop] 0/368, RunningAvgSamplesPerSec=6.323434262245826, CurrSamplesPerSec=5.70523515407605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:31:40,153] [INFO] [timer.py:197:stop] 0/370, RunningAvgSamplesPerSec=6.32337652578926, CurrSamplesPerSec=5.676626363854563, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:31:51,613] [INFO] [timer.py:197:stop] 0/372, RunningAvgSamplesPerSec=6.323447379362817, CurrSamplesPerSec=5.696824567133537, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:32:03,153] [INFO] [timer.py:197:stop] 0/374, RunningAvgSamplesPerSec=6.323490219781556, CurrSamplesPerSec=5.694769285809873, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:32:14,534] [INFO] [timer.py:197:stop] 0/376, RunningAvgSamplesPerSec=6.323369584243994, CurrSamplesPerSec=5.650268556480972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:32:25,876] [INFO] [timer.py:197:stop] 0/378, RunningAvgSamplesPerSec=6.323304923151756, CurrSamplesPerSec=5.6597320913591185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:32:37,173] [INFO] [logging.py:68:log_dist] [Rank 0] step=190, skipped=4, lr=[8.408811289387583e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:32:37,175] [INFO] [timer.py:197:stop] 0/380, RunningAvgSamplesPerSec=6.323406777084629, CurrSamplesPerSec=5.7226377858585264, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:32:48,540] [INFO] [timer.py:197:stop] 0/382, RunningAvgSamplesPerSec=6.323297263414538, CurrSamplesPerSec=5.668962067784482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:33:00,073] [INFO] [timer.py:197:stop] 0/384, RunningAvgSamplesPerSec=6.323267466569768, CurrSamplesPerSec=5.682340445862556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:33:11,430] [INFO] [timer.py:197:stop] 0/386, RunningAvgSamplesPerSec=6.32315065138806, CurrSamplesPerSec=5.6789946206884006, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:33:22,740] [INFO] [timer.py:197:stop] 0/388, RunningAvgSamplesPerSec=6.323156288131074, CurrSamplesPerSec=5.700271330203468, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:33:34,129] [INFO] [timer.py:197:stop] 0/390, RunningAvgSamplesPerSec=6.322983044550158, CurrSamplesPerSec=5.6641335606902645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:33:45,528] [INFO] [timer.py:197:stop] 0/392, RunningAvgSamplesPerSec=6.322861784436344, CurrSamplesPerSec=5.654990050723027, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:33:56,985] [INFO] [timer.py:197:stop] 0/394, RunningAvgSamplesPerSec=6.3225094264140544, CurrSamplesPerSec=5.604591817666873, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:34:08,559] [INFO] [timer.py:197:stop] 0/396, RunningAvgSamplesPerSec=6.322321804464229, CurrSamplesPerSec=5.650546395025515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:34:19,969] [INFO] [timer.py:197:stop] 0/398, RunningAvgSamplesPerSec=6.322231207848553, CurrSamplesPerSec=5.662744166203131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:34:31,349] [INFO] [logging.py:68:log_dist] [Rank 0] step=200, skipped=4, lr=[8.49307723936858e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:34:31,351] [INFO] [timer.py:197:stop] 0/400, RunningAvgSamplesPerSec=6.322110726538844, CurrSamplesPerSec=5.658764724125701, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.1276, 'learning_rate': 8.49307723936858e-06, 'epoch': 1.5} +[2022-12-19 00:34:42,926] [INFO] [timer.py:197:stop] 0/402, RunningAvgSamplesPerSec=6.321981469962649, CurrSamplesPerSec=5.6540464545364975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:34:54,329] [INFO] [timer.py:197:stop] 0/404, RunningAvgSamplesPerSec=6.321788711822746, CurrSamplesPerSec=5.6213456603519765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:35:05,696] [INFO] [timer.py:197:stop] 0/406, RunningAvgSamplesPerSec=6.321723107311507, CurrSamplesPerSec=5.674815716520611, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:35:17,050] [INFO] [timer.py:197:stop] 0/408, RunningAvgSamplesPerSec=6.321691644638525, CurrSamplesPerSec=5.695411356584488, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:35:28,467] [INFO] [timer.py:197:stop] 0/410, RunningAvgSamplesPerSec=6.321686939549397, CurrSamplesPerSec=5.687924787257544, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:35:39,902] [INFO] [timer.py:197:stop] 0/412, RunningAvgSamplesPerSec=6.321612949819477, CurrSamplesPerSec=5.685645657888461, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:35:51,290] [INFO] [timer.py:197:stop] 0/414, RunningAvgSamplesPerSec=6.321465406676154, CurrSamplesPerSec=5.630122462060419, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:36:02,870] [INFO] [timer.py:197:stop] 0/416, RunningAvgSamplesPerSec=6.321325864365297, CurrSamplesPerSec=5.662598431608348, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:36:14,252] [INFO] [timer.py:197:stop] 0/418, RunningAvgSamplesPerSec=6.321178347260513, CurrSamplesPerSec=5.649072355016578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:36:25,590] [INFO] [logging.py:68:log_dist] [Rank 0] step=210, skipped=4, lr=[8.573149077803088e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:36:25,592] [INFO] [timer.py:197:stop] 0/420, RunningAvgSamplesPerSec=6.321159177303354, CurrSamplesPerSec=5.677153166153527, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:36:36,968] [INFO] [timer.py:197:stop] 0/422, RunningAvgSamplesPerSec=6.321009637171506, CurrSamplesPerSec=5.652632959889085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:36:48,457] [INFO] [timer.py:197:stop] 0/424, RunningAvgSamplesPerSec=6.320906470732945, CurrSamplesPerSec=5.689232756123447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:36:59,774] [INFO] [timer.py:197:stop] 0/426, RunningAvgSamplesPerSec=6.32096035849853, CurrSamplesPerSec=5.697304338195877, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:37:11,122] [INFO] [timer.py:197:stop] 0/428, RunningAvgSamplesPerSec=6.3208982453379585, CurrSamplesPerSec=5.677523234796336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:37:22,458] [INFO] [timer.py:197:stop] 0/430, RunningAvgSamplesPerSec=6.320848728617728, CurrSamplesPerSec=5.681654418081767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:37:33,837] [INFO] [timer.py:197:stop] 0/432, RunningAvgSamplesPerSec=6.320680960808628, CurrSamplesPerSec=5.648207267718699, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:37:45,200] [INFO] [timer.py:197:stop] 0/434, RunningAvgSamplesPerSec=6.320654285102583, CurrSamplesPerSec=5.663154652477177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:37:56,541] [INFO] [timer.py:197:stop] 0/436, RunningAvgSamplesPerSec=6.320597654109512, CurrSamplesPerSec=5.679820612469546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:38:07,937] [INFO] [timer.py:197:stop] 0/438, RunningAvgSamplesPerSec=6.320499502263493, CurrSamplesPerSec=5.669001336263085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:38:19,332] [INFO] [logging.py:68:log_dist] [Rank 0] step=220, skipped=4, lr=[8.64942458567722e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:38:19,334] [INFO] [timer.py:197:stop] 0/440, RunningAvgSamplesPerSec=6.3203861817805675, CurrSamplesPerSec=5.6623002963329085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:38:30,695] [INFO] [timer.py:197:stop] 0/442, RunningAvgSamplesPerSec=6.320332690166886, CurrSamplesPerSec=5.669673773648277, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:38:42,055] [INFO] [timer.py:197:stop] 0/444, RunningAvgSamplesPerSec=6.32031966390362, CurrSamplesPerSec=5.680336468504409, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:38:53,544] [INFO] [timer.py:197:stop] 0/446, RunningAvgSamplesPerSec=6.320290405265886, CurrSamplesPerSec=5.687557458672238, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:39:05,003] [INFO] [timer.py:197:stop] 0/448, RunningAvgSamplesPerSec=6.3203053824383435, CurrSamplesPerSec=5.679599491324056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:39:16,341] [INFO] [timer.py:197:stop] 0/450, RunningAvgSamplesPerSec=6.3203527615007316, CurrSamplesPerSec=5.69232919150211, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.1281, 'learning_rate': 8.686247975778677e-06, 'epoch': 1.69} +[2022-12-19 00:39:27,692] [INFO] [timer.py:197:stop] 0/452, RunningAvgSamplesPerSec=6.320273727186458, CurrSamplesPerSec=5.680921907926451, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:39:39,064] [INFO] [timer.py:197:stop] 0/454, RunningAvgSamplesPerSec=6.320116458786455, CurrSamplesPerSec=5.694291391097104, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:39:50,443] [INFO] [timer.py:197:stop] 0/456, RunningAvgSamplesPerSec=6.320100873827638, CurrSamplesPerSec=5.683626352197276, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:40:01,851] [INFO] [timer.py:197:stop] 0/458, RunningAvgSamplesPerSec=6.319997103671828, CurrSamplesPerSec=5.649200274442475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:40:13,216] [INFO] [logging.py:68:log_dist] [Rank 0] step=230, skipped=4, lr=[8.722247506883805e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:40:13,217] [INFO] [timer.py:197:stop] 0/460, RunningAvgSamplesPerSec=6.319995105649749, CurrSamplesPerSec=5.693608996003589, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:40:24,778] [INFO] [timer.py:197:stop] 0/462, RunningAvgSamplesPerSec=6.319954386340108, CurrSamplesPerSec=5.664920804358087, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:40:36,134] [INFO] [timer.py:197:stop] 0/464, RunningAvgSamplesPerSec=6.319953448551577, CurrSamplesPerSec=5.682989582576036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:40:47,495] [INFO] [timer.py:197:stop] 0/466, RunningAvgSamplesPerSec=6.319850342084705, CurrSamplesPerSec=5.673877485798156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:40:58,887] [INFO] [timer.py:197:stop] 0/468, RunningAvgSamplesPerSec=6.31987292974135, CurrSamplesPerSec=5.691756122229372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:41:10,220] [INFO] [timer.py:197:stop] 0/470, RunningAvgSamplesPerSec=6.319830973205626, CurrSamplesPerSec=5.66767991567336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:41:21,552] [INFO] [timer.py:197:stop] 0/472, RunningAvgSamplesPerSec=6.319829689531446, CurrSamplesPerSec=5.683424909642516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:41:33,093] [INFO] [timer.py:197:stop] 0/474, RunningAvgSamplesPerSec=6.319750643940945, CurrSamplesPerSec=5.671027992840407, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:41:44,649] [INFO] [timer.py:197:stop] 0/476, RunningAvgSamplesPerSec=6.319549105580965, CurrSamplesPerSec=5.640950412424254, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:41:55,993] [INFO] [timer.py:197:stop] 0/478, RunningAvgSamplesPerSec=6.319514624812288, CurrSamplesPerSec=5.671870843457645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:42:07,309] [INFO] [logging.py:68:log_dist] [Rank 0] step=240, skipped=4, lr=[8.79191691333329e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:42:07,311] [INFO] [timer.py:197:stop] 0/480, RunningAvgSamplesPerSec=6.319601782661979, CurrSamplesPerSec=5.711505695751001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:42:18,609] [INFO] [timer.py:197:stop] 0/482, RunningAvgSamplesPerSec=6.3197343890317885, CurrSamplesPerSec=5.704929360030577, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:42:29,977] [INFO] [timer.py:197:stop] 0/484, RunningAvgSamplesPerSec=6.319707372523856, CurrSamplesPerSec=5.692903342052109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:42:41,333] [INFO] [timer.py:197:stop] 0/486, RunningAvgSamplesPerSec=6.319773976031884, CurrSamplesPerSec=5.699332166412615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:42:52,687] [INFO] [timer.py:197:stop] 0/488, RunningAvgSamplesPerSec=6.319784253710412, CurrSamplesPerSec=5.710645436820501, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:43:04,227] [INFO] [timer.py:197:stop] 0/490, RunningAvgSamplesPerSec=6.3197963137288236, CurrSamplesPerSec=5.686959325603847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:43:15,598] [INFO] [timer.py:197:stop] 0/492, RunningAvgSamplesPerSec=6.319719178697002, CurrSamplesPerSec=5.671861495716578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:43:26,930] [INFO] [timer.py:197:stop] 0/494, RunningAvgSamplesPerSec=6.31966065634711, CurrSamplesPerSec=5.682334912717824, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:43:38,308] [INFO] [timer.py:197:stop] 0/496, RunningAvgSamplesPerSec=6.319628259143031, CurrSamplesPerSec=5.69025399261467, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:43:49,622] [INFO] [timer.py:197:stop] 0/498, RunningAvgSamplesPerSec=6.3196567565582455, CurrSamplesPerSec=5.701364344885284, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:44:01,045] [INFO] [logging.py:68:log_dist] [Rank 0] step=250, skipped=4, lr=[8.858694625217149e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:44:01,046] [INFO] [timer.py:197:stop] 0/500, RunningAvgSamplesPerSec=6.319445590061159, CurrSamplesPerSec=5.670494420814168, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.1256, 'learning_rate': 8.858694625217149e-06, 'epoch': 1.88} +[2022-12-19 00:44:12,381] [INFO] [timer.py:197:stop] 0/502, RunningAvgSamplesPerSec=6.319467580761168, CurrSamplesPerSec=5.6888558547442925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:44:23,796] [INFO] [timer.py:197:stop] 0/504, RunningAvgSamplesPerSec=6.319435192509141, CurrSamplesPerSec=5.6658453120372965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:44:35,191] [INFO] [timer.py:197:stop] 0/506, RunningAvgSamplesPerSec=6.319352589227274, CurrSamplesPerSec=5.649429735511968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:44:46,632] [INFO] [timer.py:197:stop] 0/508, RunningAvgSamplesPerSec=6.3192937352392375, CurrSamplesPerSec=5.6509520217547236, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:44:58,011] [INFO] [timer.py:197:stop] 0/510, RunningAvgSamplesPerSec=6.3192807023787605, CurrSamplesPerSec=5.696140839572408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:45:09,378] [INFO] [timer.py:197:stop] 0/512, RunningAvgSamplesPerSec=6.319277059254272, CurrSamplesPerSec=5.676434059756074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:45:20,765] [INFO] [timer.py:197:stop] 0/514, RunningAvgSamplesPerSec=6.3192279699869065, CurrSamplesPerSec=5.675098853971721, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:45:32,232] [INFO] [timer.py:197:stop] 0/516, RunningAvgSamplesPerSec=6.319209292569757, CurrSamplesPerSec=5.68089569881787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:45:43,723] [INFO] [timer.py:197:stop] 0/518, RunningAvgSamplesPerSec=6.319173952639591, CurrSamplesPerSec=5.686136796905354, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:45:55,092] [INFO] [logging.py:68:log_dist] [Rank 0] step=260, skipped=4, lr=[8.922811151820517e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:45:55,094] [INFO] [timer.py:197:stop] 0/520, RunningAvgSamplesPerSec=6.319140726211304, CurrSamplesPerSec=5.679437987628754, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:46:06,511] [INFO] [timer.py:197:stop] 0/522, RunningAvgSamplesPerSec=6.319064093557645, CurrSamplesPerSec=5.674828433105231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:46:18,067] [INFO] [timer.py:197:stop] 0/524, RunningAvgSamplesPerSec=6.318941947109328, CurrSamplesPerSec=5.663134341803495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:46:29,444] [INFO] [timer.py:197:stop] 0/526, RunningAvgSamplesPerSec=6.318919080123138, CurrSamplesPerSec=5.672882499055666, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:46:40,816] [INFO] [timer.py:197:stop] 0/528, RunningAvgSamplesPerSec=6.318897867858227, CurrSamplesPerSec=5.669731493834369, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:46:52,235] [INFO] [timer.py:197:stop] 0/530, RunningAvgSamplesPerSec=6.3188783359901946, CurrSamplesPerSec=5.682390485226022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:47:03,591] [INFO] [timer.py:197:stop] 0/532, RunningAvgSamplesPerSec=6.318838528980661, CurrSamplesPerSec=5.677959165974818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:47:14,013] [INFO] [timer.py:197:stop] 0/534, RunningAvgSamplesPerSec=6.320788890088867, CurrSamplesPerSec=6.659617335939272, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:47:25,363] [INFO] [timer.py:197:stop] 0/536, RunningAvgSamplesPerSec=6.320844322352108, CurrSamplesPerSec=5.717355274172742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:47:37,096] [INFO] [timer.py:197:stop] 0/538, RunningAvgSamplesPerSec=6.320376506280682, CurrSamplesPerSec=5.4944077632686446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:47:48,467] [INFO] [logging.py:68:log_dist] [Rank 0] step=270, skipped=4, lr=[8.984470493319244e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:47:48,468] [INFO] [timer.py:197:stop] 0/540, RunningAvgSamplesPerSec=6.32036588035628, CurrSamplesPerSec=5.688256243202118, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:47:59,824] [INFO] [timer.py:197:stop] 0/542, RunningAvgSamplesPerSec=6.320436921654575, CurrSamplesPerSec=5.705787412416891, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:48:11,212] [INFO] [timer.py:197:stop] 0/544, RunningAvgSamplesPerSec=6.320454577403126, CurrSamplesPerSec=5.69246197464763, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:48:22,607] [INFO] [timer.py:197:stop] 0/546, RunningAvgSamplesPerSec=6.32036708144675, CurrSamplesPerSec=5.642555899959179, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:48:34,030] [INFO] [timer.py:197:stop] 0/548, RunningAvgSamplesPerSec=6.320323704117513, CurrSamplesPerSec=5.674647527055657, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:48:45,493] [INFO] [timer.py:197:stop] 0/550, RunningAvgSamplesPerSec=6.320330352988736, CurrSamplesPerSec=5.676665258402395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:48:56,826] [INFO] [timer.py:197:stop] 0/552, RunningAvgSamplesPerSec=6.3203738826027624, CurrSamplesPerSec=5.696218197963952, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.1083, 'learning_rate': 9.020362953730323e-06, 'epoch': 2.07} +[2022-12-19 00:49:08,314] [INFO] [timer.py:197:stop] 0/554, RunningAvgSamplesPerSec=6.320345104895774, CurrSamplesPerSec=5.672650409668064, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:49:19,652] [INFO] [timer.py:197:stop] 0/556, RunningAvgSamplesPerSec=6.320329448586411, CurrSamplesPerSec=5.678675054933501, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:49:31,070] [INFO] [timer.py:197:stop] 0/558, RunningAvgSamplesPerSec=6.3202936505881135, CurrSamplesPerSec=5.667847453017027, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:49:42,444] [INFO] [logging.py:68:log_dist] [Rank 0] step=280, skipped=4, lr=[9.043854055968706e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:49:42,446] [INFO] [timer.py:197:stop] 0/560, RunningAvgSamplesPerSec=6.320316517487314, CurrSamplesPerSec=5.696863980761914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:49:53,798] [INFO] [timer.py:197:stop] 0/562, RunningAvgSamplesPerSec=6.320314246239234, CurrSamplesPerSec=5.668374063442858, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:50:05,171] [INFO] [timer.py:197:stop] 0/564, RunningAvgSamplesPerSec=6.320295849098979, CurrSamplesPerSec=5.689046107739862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:50:16,610] [INFO] [timer.py:197:stop] 0/566, RunningAvgSamplesPerSec=6.320233403134386, CurrSamplesPerSec=5.679131588451052, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:50:28,160] [INFO] [timer.py:197:stop] 0/568, RunningAvgSamplesPerSec=6.320158012531864, CurrSamplesPerSec=5.676459267416466, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:50:39,541] [INFO] [timer.py:197:stop] 0/570, RunningAvgSamplesPerSec=6.320134681569571, CurrSamplesPerSec=5.676231926422744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:50:50,985] [INFO] [timer.py:197:stop] 0/572, RunningAvgSamplesPerSec=6.320096534587018, CurrSamplesPerSec=5.69315616920031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:51:02,426] [INFO] [timer.py:197:stop] 0/574, RunningAvgSamplesPerSec=6.320148404746433, CurrSamplesPerSec=5.712264104970907, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:51:13,799] [INFO] [timer.py:197:stop] 0/576, RunningAvgSamplesPerSec=6.320137746983063, CurrSamplesPerSec=5.682742710294148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:51:25,139] [INFO] [timer.py:197:stop] 0/578, RunningAvgSamplesPerSec=6.320093385678466, CurrSamplesPerSec=5.659379610683581, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:51:36,491] [INFO] [logging.py:68:log_dist] [Rank 0] step=290, skipped=4, lr=[9.10112387015335e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:51:36,493] [INFO] [timer.py:197:stop] 0/580, RunningAvgSamplesPerSec=6.320056277682747, CurrSamplesPerSec=5.678806000461184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:51:47,842] [INFO] [timer.py:197:stop] 0/582, RunningAvgSamplesPerSec=6.3199956995218045, CurrSamplesPerSec=5.685229255815799, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:51:59,186] [INFO] [timer.py:197:stop] 0/584, RunningAvgSamplesPerSec=6.3200186748889315, CurrSamplesPerSec=5.70428368696425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:52:10,538] [INFO] [timer.py:197:stop] 0/586, RunningAvgSamplesPerSec=6.319900223803636, CurrSamplesPerSec=5.69582804300266, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:52:21,887] [INFO] [timer.py:197:stop] 0/588, RunningAvgSamplesPerSec=6.319919982329858, CurrSamplesPerSec=5.702778088537659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:52:33,225] [INFO] [timer.py:197:stop] 0/590, RunningAvgSamplesPerSec=6.319943779386075, CurrSamplesPerSec=5.687501303036996, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:52:44,603] [INFO] [timer.py:197:stop] 0/592, RunningAvgSamplesPerSec=6.3198457896600235, CurrSamplesPerSec=5.643944897479634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:52:55,943] [INFO] [timer.py:197:stop] 0/594, RunningAvgSamplesPerSec=6.319856297433864, CurrSamplesPerSec=5.680393925185212, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:53:07,248] [INFO] [timer.py:197:stop] 0/596, RunningAvgSamplesPerSec=6.319840349532547, CurrSamplesPerSec=5.692354057692489, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:53:18,597] [INFO] [timer.py:197:stop] 0/598, RunningAvgSamplesPerSec=6.319815416165195, CurrSamplesPerSec=5.690553872569714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:53:29,925] [INFO] [logging.py:68:log_dist] [Rank 0] step=300, skipped=4, lr=[9.156425255148058e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:53:29,926] [INFO] [timer.py:197:stop] 0/600, RunningAvgSamplesPerSec=6.319814986271745, CurrSamplesPerSec=5.681727054135516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:53:41,253] [INFO] [timer.py:197:stop] 0/602, RunningAvgSamplesPerSec=6.319860543595978, CurrSamplesPerSec=5.6961142481105895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0598, 'learning_rate': 9.161852281961698e-06, 'epoch': 2.25} +[2022-12-19 00:53:52,640] [INFO] [timer.py:197:stop] 0/604, RunningAvgSamplesPerSec=6.319816884425725, CurrSamplesPerSec=5.683669675042479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:54:03,959] [INFO] [timer.py:197:stop] 0/606, RunningAvgSamplesPerSec=6.319839531991534, CurrSamplesPerSec=5.694245248786347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:54:15,332] [INFO] [timer.py:197:stop] 0/608, RunningAvgSamplesPerSec=6.319810550595329, CurrSamplesPerSec=5.662503827591686, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:54:26,672] [INFO] [timer.py:197:stop] 0/610, RunningAvgSamplesPerSec=6.3198110898553725, CurrSamplesPerSec=5.6773870650588725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:54:38,028] [INFO] [timer.py:197:stop] 0/612, RunningAvgSamplesPerSec=6.3197758122270375, CurrSamplesPerSec=5.679294516603755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:54:49,400] [INFO] [timer.py:197:stop] 0/614, RunningAvgSamplesPerSec=6.319767573686687, CurrSamplesPerSec=5.683025676812096, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:55:00,771] [INFO] [timer.py:197:stop] 0/616, RunningAvgSamplesPerSec=6.319750107435414, CurrSamplesPerSec=5.679651405176822, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:55:12,146] [INFO] [timer.py:197:stop] 0/618, RunningAvgSamplesPerSec=6.319709566145599, CurrSamplesPerSec=5.675407457980364, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:55:23,511] [INFO] [logging.py:68:log_dist] [Rank 0] step=310, skipped=4, lr=[9.209889040960644e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:55:23,512] [INFO] [timer.py:197:stop] 0/620, RunningAvgSamplesPerSec=6.319705553102689, CurrSamplesPerSec=5.6907237301048665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:55:34,920] [INFO] [timer.py:197:stop] 0/622, RunningAvgSamplesPerSec=6.319562927733921, CurrSamplesPerSec=5.69239244379155, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:55:46,210] [INFO] [timer.py:197:stop] 0/624, RunningAvgSamplesPerSec=6.3196183156998735, CurrSamplesPerSec=5.691242775260119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:55:57,573] [INFO] [timer.py:197:stop] 0/626, RunningAvgSamplesPerSec=6.319590430642667, CurrSamplesPerSec=5.681516126093696, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:56:08,941] [INFO] [timer.py:197:stop] 0/628, RunningAvgSamplesPerSec=6.319582780848161, CurrSamplesPerSec=5.679966273412695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:56:20,248] [INFO] [timer.py:197:stop] 0/630, RunningAvgSamplesPerSec=6.319579604732076, CurrSamplesPerSec=5.698700099493294, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:56:31,629] [INFO] [timer.py:197:stop] 0/632, RunningAvgSamplesPerSec=6.319548177013225, CurrSamplesPerSec=5.675483294273378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:56:42,997] [INFO] [timer.py:197:stop] 0/634, RunningAvgSamplesPerSec=6.319542135253894, CurrSamplesPerSec=5.691982535979329, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:56:54,303] [INFO] [timer.py:197:stop] 0/636, RunningAvgSamplesPerSec=6.319527501736275, CurrSamplesPerSec=5.694565361369183, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:57:05,638] [INFO] [timer.py:197:stop] 0/638, RunningAvgSamplesPerSec=6.319541868477244, CurrSamplesPerSec=5.690798769830471, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:57:16,990] [INFO] [logging.py:68:log_dist] [Rank 0] step=320, skipped=4, lr=[9.261633432763397e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:57:16,992] [INFO] [timer.py:197:stop] 0/640, RunningAvgSamplesPerSec=6.319518687891041, CurrSamplesPerSec=5.67143488801959, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:57:28,592] [INFO] [timer.py:197:stop] 0/642, RunningAvgSamplesPerSec=6.319465908527522, CurrSamplesPerSec=5.64993699384335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:57:40,476] [INFO] [timer.py:197:stop] 0/644, RunningAvgSamplesPerSec=6.319390943798744, CurrSamplesPerSec=5.668378372477213, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:57:52,243] [INFO] [timer.py:197:stop] 0/646, RunningAvgSamplesPerSec=6.3193639859566435, CurrSamplesPerSec=5.680033337645695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:58:03,735] [INFO] [timer.py:197:stop] 0/648, RunningAvgSamplesPerSec=6.319346198603127, CurrSamplesPerSec=5.697315704720048, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:58:15,088] [INFO] [timer.py:197:stop] 0/650, RunningAvgSamplesPerSec=6.319315472059445, CurrSamplesPerSec=5.665524832890148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:58:26,580] [INFO] [timer.py:197:stop] 0/652, RunningAvgSamplesPerSec=6.319297685088794, CurrSamplesPerSec=5.670232583173057, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.063, 'learning_rate': 9.29189975311636e-06, 'epoch': 2.44} +[2022-12-19 00:58:38,053] [INFO] [timer.py:197:stop] 0/654, RunningAvgSamplesPerSec=6.319277747601288, CurrSamplesPerSec=5.687855126157084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:58:49,431] [INFO] [timer.py:197:stop] 0/656, RunningAvgSamplesPerSec=6.319245498514143, CurrSamplesPerSec=5.668320679838351, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:59:00,802] [INFO] [timer.py:197:stop] 0/658, RunningAvgSamplesPerSec=6.319197370830487, CurrSamplesPerSec=5.671775450008826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:59:12,252] [INFO] [logging.py:68:log_dist] [Rank 0] step=330, skipped=4, lr=[9.311765584761373e-06], mom=[[0.9, 0.999]] +[2022-12-19 00:59:12,254] [INFO] [timer.py:197:stop] 0/660, RunningAvgSamplesPerSec=6.319190796656681, CurrSamplesPerSec=5.67663380659717, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:59:23,576] [INFO] [timer.py:197:stop] 0/662, RunningAvgSamplesPerSec=6.31916084556495, CurrSamplesPerSec=5.678397806718739, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:59:35,169] [INFO] [timer.py:197:stop] 0/664, RunningAvgSamplesPerSec=6.319083573895808, CurrSamplesPerSec=5.662233172447872, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:59:46,486] [INFO] [timer.py:197:stop] 0/666, RunningAvgSamplesPerSec=6.319091632760076, CurrSamplesPerSec=5.680942346389958, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 00:59:57,979] [INFO] [timer.py:197:stop] 0/668, RunningAvgSamplesPerSec=6.319017800660259, CurrSamplesPerSec=5.660799346909331, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:00:09,328] [INFO] [timer.py:197:stop] 0/670, RunningAvgSamplesPerSec=6.3190160982731856, CurrSamplesPerSec=5.678764914150066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:00:20,717] [INFO] [timer.py:197:stop] 0/672, RunningAvgSamplesPerSec=6.318926060512557, CurrSamplesPerSec=5.646853233351514, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:00:32,070] [INFO] [timer.py:197:stop] 0/674, RunningAvgSamplesPerSec=6.318884045785445, CurrSamplesPerSec=5.676124623958836, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:00:43,441] [INFO] [timer.py:197:stop] 0/676, RunningAvgSamplesPerSec=6.318818520642245, CurrSamplesPerSec=5.657839425000005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:00:54,800] [INFO] [timer.py:197:stop] 0/678, RunningAvgSamplesPerSec=6.3188269070500445, CurrSamplesPerSec=5.682595944542649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:01:06,321] [INFO] [logging.py:68:log_dist] [Rank 0] step=340, skipped=4, lr=[9.360382936198493e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:01:06,323] [INFO] [timer.py:197:stop] 0/680, RunningAvgSamplesPerSec=6.318803423863587, CurrSamplesPerSec=5.678195773729671, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:01:17,676] [INFO] [timer.py:197:stop] 0/682, RunningAvgSamplesPerSec=6.318788339294621, CurrSamplesPerSec=5.6738146443034605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:01:29,144] [INFO] [timer.py:197:stop] 0/684, RunningAvgSamplesPerSec=6.318843082261069, CurrSamplesPerSec=5.70212829954872, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:01:40,484] [INFO] [timer.py:197:stop] 0/686, RunningAvgSamplesPerSec=6.318877418981399, CurrSamplesPerSec=5.707351390334674, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:01:52,010] [INFO] [timer.py:197:stop] 0/688, RunningAvgSamplesPerSec=6.318834208561251, CurrSamplesPerSec=5.678378107269179, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:02:03,349] [INFO] [timer.py:197:stop] 0/690, RunningAvgSamplesPerSec=6.318792311453393, CurrSamplesPerSec=5.665985234079397, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:02:14,745] [INFO] [timer.py:197:stop] 0/692, RunningAvgSamplesPerSec=6.318835723815898, CurrSamplesPerSec=5.695138271531494, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:02:26,244] [INFO] [timer.py:197:stop] 0/694, RunningAvgSamplesPerSec=6.318826920348641, CurrSamplesPerSec=5.658641858275141, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:02:37,669] [INFO] [timer.py:197:stop] 0/696, RunningAvgSamplesPerSec=6.318804790379597, CurrSamplesPerSec=5.675080137234234, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:02:49,030] [INFO] [timer.py:197:stop] 0/698, RunningAvgSamplesPerSec=6.318774542107063, CurrSamplesPerSec=5.670887102818869, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:03:00,447] [INFO] [logging.py:68:log_dist] [Rank 0] step=350, skipped=4, lr=[9.407574351377137e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:03:00,448] [INFO] [timer.py:197:stop] 0/700, RunningAvgSamplesPerSec=6.318746771411151, CurrSamplesPerSec=5.685630965973367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:03:11,800] [INFO] [timer.py:197:stop] 0/702, RunningAvgSamplesPerSec=6.318741795333457, CurrSamplesPerSec=5.681823023160319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0622, 'learning_rate': 9.412218256259678e-06, 'epoch': 2.63} +[2022-12-19 01:03:23,174] [INFO] [timer.py:197:stop] 0/704, RunningAvgSamplesPerSec=6.318707761718272, CurrSamplesPerSec=5.669857476413636, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:03:34,662] [INFO] [timer.py:197:stop] 0/706, RunningAvgSamplesPerSec=6.318680061373025, CurrSamplesPerSec=5.683818421837514, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:03:46,225] [INFO] [timer.py:197:stop] 0/708, RunningAvgSamplesPerSec=6.318588820848263, CurrSamplesPerSec=5.6424873457028735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:03:57,614] [INFO] [timer.py:197:stop] 0/710, RunningAvgSamplesPerSec=6.31855631018623, CurrSamplesPerSec=5.667990585621616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:04:08,965] [INFO] [timer.py:197:stop] 0/712, RunningAvgSamplesPerSec=6.318527528178411, CurrSamplesPerSec=5.661748781060366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:04:20,320] [INFO] [timer.py:197:stop] 0/714, RunningAvgSamplesPerSec=6.31855001651633, CurrSamplesPerSec=5.698780431002723, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:04:31,005] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 8192.0, reducing to 4096.0 +[2022-12-19 01:04:31,007] [INFO] [timer.py:197:stop] 0/716, RunningAvgSamplesPerSec=6.319610920055632, CurrSamplesPerSec=6.373794873163514, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:04:42,331] [INFO] [timer.py:197:stop] 0/718, RunningAvgSamplesPerSec=6.31961222646377, CurrSamplesPerSec=5.68433909693796, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:04:53,753] [INFO] [logging.py:68:log_dist] [Rank 0] step=360, skipped=5, lr=[9.44889475969735e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:04:53,755] [INFO] [timer.py:197:stop] 0/720, RunningAvgSamplesPerSec=6.319594889338643, CurrSamplesPerSec=5.675628972838387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:05:05,101] [INFO] [timer.py:197:stop] 0/722, RunningAvgSamplesPerSec=6.319598071660983, CurrSamplesPerSec=5.689632137987318, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:05:16,579] [INFO] [timer.py:197:stop] 0/724, RunningAvgSamplesPerSec=6.319450696538225, CurrSamplesPerSec=5.677897915403727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:05:28,088] [INFO] [timer.py:197:stop] 0/726, RunningAvgSamplesPerSec=6.319403493486101, CurrSamplesPerSec=5.682042633652292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:05:39,565] [INFO] [timer.py:197:stop] 0/728, RunningAvgSamplesPerSec=6.319410687188341, CurrSamplesPerSec=5.72953678288705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:05:50,922] [INFO] [timer.py:197:stop] 0/730, RunningAvgSamplesPerSec=6.319406220205962, CurrSamplesPerSec=5.698383634739568, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:06:02,290] [INFO] [timer.py:197:stop] 0/732, RunningAvgSamplesPerSec=6.319386981148773, CurrSamplesPerSec=5.684696138275903, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:06:13,615] [INFO] [timer.py:197:stop] 0/734, RunningAvgSamplesPerSec=6.319375963701701, CurrSamplesPerSec=5.689230585723028, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:06:24,899] [INFO] [timer.py:197:stop] 0/736, RunningAvgSamplesPerSec=6.319415544190312, CurrSamplesPerSec=5.696515322069078, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:06:36,246] [INFO] [timer.py:197:stop] 0/738, RunningAvgSamplesPerSec=6.3194329538262926, CurrSamplesPerSec=5.699937988449255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:06:47,589] [INFO] [logging.py:68:log_dist] [Rank 0] step=370, skipped=5, lr=[9.493595187571683e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:06:47,590] [INFO] [timer.py:197:stop] 0/740, RunningAvgSamplesPerSec=6.31943826834621, CurrSamplesPerSec=5.6954609013696835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:06:58,917] [INFO] [timer.py:197:stop] 0/742, RunningAvgSamplesPerSec=6.319459772220872, CurrSamplesPerSec=5.711683126014281, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:07:10,261] [INFO] [timer.py:197:stop] 0/744, RunningAvgSamplesPerSec=6.319469109357799, CurrSamplesPerSec=5.6932674975392175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:07:21,616] [INFO] [timer.py:197:stop] 0/746, RunningAvgSamplesPerSec=6.319434855078194, CurrSamplesPerSec=5.665744141942675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:07:32,969] [INFO] [timer.py:197:stop] 0/748, RunningAvgSamplesPerSec=6.319418016141363, CurrSamplesPerSec=5.678815611379994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:07:44,316] [INFO] [timer.py:197:stop] 0/750, RunningAvgSamplesPerSec=6.319421415241671, CurrSamplesPerSec=5.689901318263332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:07:55,634] [INFO] [timer.py:197:stop] 0/752, RunningAvgSamplesPerSec=6.3194597168832, CurrSamplesPerSec=5.712570685852921, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0652, 'learning_rate': 9.519831289296397e-06, 'epoch': 2.82} +[2022-12-19 01:08:06,913] [INFO] [timer.py:197:stop] 0/754, RunningAvgSamplesPerSec=6.319525749954582, CurrSamplesPerSec=5.7049080211463465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:08:18,248] [INFO] [timer.py:197:stop] 0/756, RunningAvgSamplesPerSec=6.319546004304064, CurrSamplesPerSec=5.692388822439334, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:08:29,604] [INFO] [timer.py:197:stop] 0/758, RunningAvgSamplesPerSec=6.319534862240435, CurrSamplesPerSec=5.682529300958316, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:08:40,905] [INFO] [logging.py:68:log_dist] [Rank 0] step=380, skipped=5, lr=[9.53708734662638e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:08:40,906] [INFO] [timer.py:197:stop] 0/760, RunningAvgSamplesPerSec=6.319604730318235, CurrSamplesPerSec=5.717760562955475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:08:52,215] [INFO] [timer.py:197:stop] 0/762, RunningAvgSamplesPerSec=6.319657120004174, CurrSamplesPerSec=5.709749244940733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:09:03,553] [INFO] [timer.py:197:stop] 0/764, RunningAvgSamplesPerSec=6.319688978421141, CurrSamplesPerSec=5.688741323272288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:09:14,877] [INFO] [timer.py:197:stop] 0/766, RunningAvgSamplesPerSec=6.319741276214747, CurrSamplesPerSec=5.717071557504972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:09:26,193] [INFO] [timer.py:197:stop] 0/768, RunningAvgSamplesPerSec=6.319783373051262, CurrSamplesPerSec=5.699022891038163, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:09:37,497] [INFO] [timer.py:197:stop] 0/770, RunningAvgSamplesPerSec=6.3198730313567, CurrSamplesPerSec=5.726977370333311, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:09:48,757] [INFO] [timer.py:197:stop] 0/772, RunningAvgSamplesPerSec=6.31995469276013, CurrSamplesPerSec=5.714096299436316, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:10:00,099] [INFO] [timer.py:197:stop] 0/774, RunningAvgSamplesPerSec=6.319947080734109, CurrSamplesPerSec=5.685517527677231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:10:11,371] [INFO] [timer.py:197:stop] 0/776, RunningAvgSamplesPerSec=6.320053388805825, CurrSamplesPerSec=5.748864513648623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:10:22,700] [INFO] [timer.py:197:stop] 0/778, RunningAvgSamplesPerSec=6.320085880488491, CurrSamplesPerSec=5.699564265989264, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:10:34,026] [INFO] [logging.py:68:log_dist] [Rank 0] step=390, skipped=5, lr=[9.57943484127219e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:10:34,028] [INFO] [timer.py:197:stop] 0/780, RunningAvgSamplesPerSec=6.3200970319439085, CurrSamplesPerSec=5.69235864468338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:10:45,324] [INFO] [timer.py:197:stop] 0/782, RunningAvgSamplesPerSec=6.320140295527232, CurrSamplesPerSec=5.68812245088063, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:10:56,613] [INFO] [timer.py:197:stop] 0/784, RunningAvgSamplesPerSec=6.320181813428936, CurrSamplesPerSec=5.711024259030779, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:11:07,921] [INFO] [timer.py:197:stop] 0/786, RunningAvgSamplesPerSec=6.320240390498994, CurrSamplesPerSec=5.7178124460165165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:11:19,247] [INFO] [timer.py:197:stop] 0/788, RunningAvgSamplesPerSec=6.320257443701773, CurrSamplesPerSec=5.674380027974973, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:11:30,579] [INFO] [timer.py:197:stop] 0/790, RunningAvgSamplesPerSec=6.320281969155325, CurrSamplesPerSec=5.6852624886749235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:11:41,832] [INFO] [timer.py:197:stop] 0/792, RunningAvgSamplesPerSec=6.320364834849939, CurrSamplesPerSec=5.7186095661235115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:11:53,155] [INFO] [timer.py:197:stop] 0/794, RunningAvgSamplesPerSec=6.32039959699047, CurrSamplesPerSec=5.690001664381216, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:12:04,476] [INFO] [timer.py:197:stop] 0/796, RunningAvgSamplesPerSec=6.320444255259934, CurrSamplesPerSec=5.700093639913698, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:12:16,468] [INFO] [timer.py:197:stop] 0/798, RunningAvgSamplesPerSec=6.320384566034095, CurrSamplesPerSec=5.644303528380221, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:12:28,042] [INFO] [logging.py:68:log_dist] [Rank 0] step=400, skipped=5, lr=[9.620696382156558e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:12:28,043] [INFO] [timer.py:197:stop] 0/800, RunningAvgSamplesPerSec=6.320384545227413, CurrSamplesPerSec=5.670629301789308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:12:38,647] [INFO] [timer.py:197:stop] 0/802, RunningAvgSamplesPerSec=6.321694783422982, CurrSamplesPerSec=5.697203976174061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0661, 'learning_rate': 9.624764935335318e-06, 'epoch': 3.01} +[2022-12-19 01:12:49,942] [INFO] [timer.py:197:stop] 0/804, RunningAvgSamplesPerSec=6.321769207023047, CurrSamplesPerSec=5.717762024437264, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:13:01,306] [INFO] [timer.py:197:stop] 0/806, RunningAvgSamplesPerSec=6.321793827491138, CurrSamplesPerSec=5.708415317168403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:13:12,608] [INFO] [timer.py:197:stop] 0/808, RunningAvgSamplesPerSec=6.321874553130169, CurrSamplesPerSec=5.710777374965387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:13:24,100] [INFO] [timer.py:197:stop] 0/810, RunningAvgSamplesPerSec=6.32190001086698, CurrSamplesPerSec=5.698906255990863, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:13:35,455] [INFO] [timer.py:197:stop] 0/812, RunningAvgSamplesPerSec=6.321901583642028, CurrSamplesPerSec=5.692360817471119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:13:46,764] [INFO] [timer.py:197:stop] 0/814, RunningAvgSamplesPerSec=6.321962138410091, CurrSamplesPerSec=5.709352619831969, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:13:58,112] [INFO] [timer.py:197:stop] 0/816, RunningAvgSamplesPerSec=6.322025629227291, CurrSamplesPerSec=5.717471691390685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:14:09,608] [INFO] [timer.py:197:stop] 0/818, RunningAvgSamplesPerSec=6.322055882105773, CurrSamplesPerSec=5.6919250860090225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:14:20,948] [INFO] [logging.py:68:log_dist] [Rank 0] step=410, skipped=5, lr=[9.660926275674324e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:14:20,950] [INFO] [timer.py:197:stop] 0/820, RunningAvgSamplesPerSec=6.322071366508863, CurrSamplesPerSec=5.689165956780512, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:14:32,299] [INFO] [timer.py:197:stop] 0/822, RunningAvgSamplesPerSec=6.322091012799124, CurrSamplesPerSec=5.680982742844537, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:14:43,785] [INFO] [timer.py:197:stop] 0/824, RunningAvgSamplesPerSec=6.322089750056571, CurrSamplesPerSec=5.686235805776385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:14:55,136] [INFO] [timer.py:197:stop] 0/826, RunningAvgSamplesPerSec=6.322084835751599, CurrSamplesPerSec=5.694642193744664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:15:06,446] [INFO] [timer.py:197:stop] 0/828, RunningAvgSamplesPerSec=6.322095494181754, CurrSamplesPerSec=5.693213402531381, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:15:17,770] [INFO] [timer.py:197:stop] 0/830, RunningAvgSamplesPerSec=6.322150099578422, CurrSamplesPerSec=5.700337906393788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:15:29,044] [INFO] [timer.py:197:stop] 0/832, RunningAvgSamplesPerSec=6.322206975503775, CurrSamplesPerSec=5.705411709894755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:15:40,424] [INFO] [timer.py:197:stop] 0/834, RunningAvgSamplesPerSec=6.3222355995601465, CurrSamplesPerSec=5.702684317896509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:15:51,780] [INFO] [timer.py:197:stop] 0/836, RunningAvgSamplesPerSec=6.322215416889453, CurrSamplesPerSec=5.672825433995187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:16:03,249] [INFO] [timer.py:197:stop] 0/838, RunningAvgSamplesPerSec=6.322242856868077, CurrSamplesPerSec=5.705001622437774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:16:14,724] [INFO] [logging.py:68:log_dist] [Rank 0] step=420, skipped=5, lr=[9.700174853763023e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:16:14,726] [INFO] [timer.py:197:stop] 0/840, RunningAvgSamplesPerSec=6.322253108652342, CurrSamplesPerSec=5.690793461489017, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:16:26,062] [INFO] [timer.py:197:stop] 0/842, RunningAvgSamplesPerSec=6.322262189911131, CurrSamplesPerSec=5.688311208358119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:16:37,452] [INFO] [timer.py:197:stop] 0/844, RunningAvgSamplesPerSec=6.322193084837088, CurrSamplesPerSec=5.617880313851533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:16:48,950] [INFO] [timer.py:197:stop] 0/846, RunningAvgSamplesPerSec=6.322181426493689, CurrSamplesPerSec=5.690715043962175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:17:00,262] [INFO] [timer.py:197:stop] 0/848, RunningAvgSamplesPerSec=6.322283976168414, CurrSamplesPerSec=5.720835223687506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:17:11,066] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 4096.0, reducing to 2048.0 +[2022-12-19 01:17:11,068] [INFO] [timer.py:197:stop] 0/850, RunningAvgSamplesPerSec=6.323227806105633, CurrSamplesPerSec=6.389358543624319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:17:22,524] [INFO] [timer.py:197:stop] 0/852, RunningAvgSamplesPerSec=6.323247375252913, CurrSamplesPerSec=5.700473968753976, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0342, 'learning_rate': 9.719445885591654e-06, 'epoch': 3.19} +[2022-12-19 01:17:33,816] [INFO] [timer.py:197:stop] 0/854, RunningAvgSamplesPerSec=6.323299274109773, CurrSamplesPerSec=5.7191917113865145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:17:45,115] [INFO] [timer.py:197:stop] 0/856, RunningAvgSamplesPerSec=6.32335214957532, CurrSamplesPerSec=5.700958955586349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:17:56,451] [INFO] [timer.py:197:stop] 0/858, RunningAvgSamplesPerSec=6.323402909213706, CurrSamplesPerSec=5.723904893377566, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:18:07,893] [INFO] [logging.py:68:log_dist] [Rank 0] step=430, skipped=6, lr=[9.734698245522364e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:18:07,895] [INFO] [timer.py:197:stop] 0/860, RunningAvgSamplesPerSec=6.323411764163691, CurrSamplesPerSec=5.699367258461975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:18:19,276] [INFO] [timer.py:197:stop] 0/862, RunningAvgSamplesPerSec=6.32344425337955, CurrSamplesPerSec=5.711898730772658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:18:30,599] [INFO] [timer.py:197:stop] 0/864, RunningAvgSamplesPerSec=6.323450454321044, CurrSamplesPerSec=5.712499933497748, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:18:42,093] [INFO] [timer.py:197:stop] 0/866, RunningAvgSamplesPerSec=6.32334593858172, CurrSamplesPerSec=5.618560669677461, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:18:53,481] [INFO] [timer.py:197:stop] 0/868, RunningAvgSamplesPerSec=6.32340933639266, CurrSamplesPerSec=5.698487425498509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:19:04,854] [INFO] [timer.py:197:stop] 0/870, RunningAvgSamplesPerSec=6.323423345732062, CurrSamplesPerSec=5.697551026143254, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:19:16,313] [INFO] [timer.py:197:stop] 0/872, RunningAvgSamplesPerSec=6.32338831580146, CurrSamplesPerSec=5.650067330898288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:19:27,612] [INFO] [timer.py:197:stop] 0/874, RunningAvgSamplesPerSec=6.323402354094561, CurrSamplesPerSec=5.688818239689118, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:19:39,042] [INFO] [timer.py:197:stop] 0/876, RunningAvgSamplesPerSec=6.323316905278167, CurrSamplesPerSec=5.6311098282192935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:19:50,355] [INFO] [timer.py:197:stop] 0/878, RunningAvgSamplesPerSec=6.323354232497454, CurrSamplesPerSec=5.697809345830576, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:20:01,724] [INFO] [logging.py:68:log_dist] [Rank 0] step=440, skipped=6, lr=[9.7722083805128e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:20:01,725] [INFO] [timer.py:197:stop] 0/880, RunningAvgSamplesPerSec=6.32338878115084, CurrSamplesPerSec=5.700413684095496, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:20:13,158] [INFO] [timer.py:197:stop] 0/882, RunningAvgSamplesPerSec=6.323436968059735, CurrSamplesPerSec=5.703073958759219, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:20:24,649] [INFO] [timer.py:197:stop] 0/884, RunningAvgSamplesPerSec=6.323396519975937, CurrSamplesPerSec=5.6685287139997245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:20:36,012] [INFO] [timer.py:197:stop] 0/886, RunningAvgSamplesPerSec=6.323400921048054, CurrSamplesPerSec=5.666818929995109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:20:47,383] [INFO] [timer.py:197:stop] 0/888, RunningAvgSamplesPerSec=6.323362386669525, CurrSamplesPerSec=5.668898616819385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:20:58,706] [INFO] [timer.py:197:stop] 0/890, RunningAvgSamplesPerSec=6.323378027172579, CurrSamplesPerSec=5.702674868313015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:21:10,066] [INFO] [timer.py:197:stop] 0/892, RunningAvgSamplesPerSec=6.32336703470946, CurrSamplesPerSec=5.667214692479574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:21:21,400] [INFO] [timer.py:197:stop] 0/894, RunningAvgSamplesPerSec=6.3233560277876695, CurrSamplesPerSec=5.6769286509397885, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:21:32,744] [INFO] [timer.py:197:stop] 0/896, RunningAvgSamplesPerSec=6.323348702379982, CurrSamplesPerSec=5.688389077480841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:21:44,108] [INFO] [timer.py:197:stop] 0/898, RunningAvgSamplesPerSec=6.323314143836581, CurrSamplesPerSec=5.672014658742165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:21:55,445] [INFO] [logging.py:68:log_dist] [Rank 0] step=450, skipped=6, lr=[9.808863995752003e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:21:55,447] [INFO] [timer.py:197:stop] 0/900, RunningAvgSamplesPerSec=6.323313724616391, CurrSamplesPerSec=5.653171985653622, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:22:06,755] [INFO] [timer.py:197:stop] 0/902, RunningAvgSamplesPerSec=6.323335546365318, CurrSamplesPerSec=5.674087607163524, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.035, 'learning_rate': 9.812484046603779e-06, 'epoch': 3.38} +[2022-12-19 01:22:18,059] [INFO] [timer.py:197:stop] 0/904, RunningAvgSamplesPerSec=6.323352218581553, CurrSamplesPerSec=5.690782362261613, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:22:29,388] [INFO] [timer.py:197:stop] 0/906, RunningAvgSamplesPerSec=6.323354588910296, CurrSamplesPerSec=5.698235092305796, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:22:40,707] [INFO] [timer.py:197:stop] 0/908, RunningAvgSamplesPerSec=6.323370207784674, CurrSamplesPerSec=5.699608074182279, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:22:52,073] [INFO] [timer.py:197:stop] 0/910, RunningAvgSamplesPerSec=6.323344722975836, CurrSamplesPerSec=5.67844849739414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:23:03,420] [INFO] [timer.py:197:stop] 0/912, RunningAvgSamplesPerSec=6.323322930840565, CurrSamplesPerSec=5.684461155249277, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:23:14,743] [INFO] [timer.py:197:stop] 0/914, RunningAvgSamplesPerSec=6.323304140316191, CurrSamplesPerSec=5.674361555875228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:23:26,064] [INFO] [timer.py:197:stop] 0/916, RunningAvgSamplesPerSec=6.323299847573087, CurrSamplesPerSec=5.698749459550104, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:23:37,377] [INFO] [timer.py:197:stop] 0/918, RunningAvgSamplesPerSec=6.323292844634116, CurrSamplesPerSec=5.689933882120396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:23:48,716] [INFO] [logging.py:68:log_dist] [Rank 0] step=460, skipped=6, lr=[9.844703159310488e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:23:48,718] [INFO] [timer.py:197:stop] 0/920, RunningAvgSamplesPerSec=6.323303208444164, CurrSamplesPerSec=5.695446400367719, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:24:00,065] [INFO] [timer.py:197:stop] 0/922, RunningAvgSamplesPerSec=6.323269289437433, CurrSamplesPerSec=5.669692454741702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:24:11,407] [INFO] [timer.py:197:stop] 0/924, RunningAvgSamplesPerSec=6.323259471754332, CurrSamplesPerSec=5.679810517433488, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:24:22,740] [INFO] [timer.py:197:stop] 0/926, RunningAvgSamplesPerSec=6.323281103949813, CurrSamplesPerSec=5.700637397434466, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:24:34,081] [INFO] [timer.py:197:stop] 0/928, RunningAvgSamplesPerSec=6.3232830899059795, CurrSamplesPerSec=5.674602182386966, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:24:45,472] [INFO] [timer.py:197:stop] 0/930, RunningAvgSamplesPerSec=6.323205054575337, CurrSamplesPerSec=5.6307578326207715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:24:56,791] [INFO] [timer.py:197:stop] 0/932, RunningAvgSamplesPerSec=6.323213342725284, CurrSamplesPerSec=5.684693249022368, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:25:08,121] [INFO] [timer.py:197:stop] 0/934, RunningAvgSamplesPerSec=6.3232047024702505, CurrSamplesPerSec=5.675793380201075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:25:19,441] [INFO] [timer.py:197:stop] 0/936, RunningAvgSamplesPerSec=6.323231242771201, CurrSamplesPerSec=5.717024801752569, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:25:30,788] [INFO] [timer.py:197:stop] 0/938, RunningAvgSamplesPerSec=6.323211393401402, CurrSamplesPerSec=5.680549232323541, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:25:42,101] [INFO] [logging.py:68:log_dist] [Rank 0] step=470, skipped=6, lr=[9.879761450742313e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:25:42,102] [INFO] [timer.py:197:stop] 0/940, RunningAvgSamplesPerSec=6.3232060179743215, CurrSamplesPerSec=5.676508242939549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:25:53,427] [INFO] [timer.py:197:stop] 0/942, RunningAvgSamplesPerSec=6.323238049010313, CurrSamplesPerSec=5.6956842261249525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:26:04,757] [INFO] [timer.py:197:stop] 0/944, RunningAvgSamplesPerSec=6.323240741546512, CurrSamplesPerSec=5.70115195632424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:26:16,089] [INFO] [timer.py:197:stop] 0/946, RunningAvgSamplesPerSec=6.323283876059346, CurrSamplesPerSec=5.732785974417554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:26:27,424] [INFO] [timer.py:197:stop] 0/948, RunningAvgSamplesPerSec=6.323312334629156, CurrSamplesPerSec=5.704214351871154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:26:38,764] [INFO] [timer.py:197:stop] 0/950, RunningAvgSamplesPerSec=6.323316030948574, CurrSamplesPerSec=5.692720074519824, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:26:50,113] [INFO] [timer.py:197:stop] 0/952, RunningAvgSamplesPerSec=6.3232855482566075, CurrSamplesPerSec=5.666921574084832, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0359, 'learning_rate': 9.900435550016748e-06, 'epoch': 3.57} +[2022-12-19 01:27:01,958] [INFO] [timer.py:197:stop] 0/954, RunningAvgSamplesPerSec=6.323210304497367, CurrSamplesPerSec=5.653149365456727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:27:13,917] [INFO] [timer.py:197:stop] 0/956, RunningAvgSamplesPerSec=6.323053201249547, CurrSamplesPerSec=5.644260803578544, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:27:25,649] [INFO] [timer.py:197:stop] 0/958, RunningAvgSamplesPerSec=6.323061985415543, CurrSamplesPerSec=5.697902472435686, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:27:37,082] [INFO] [logging.py:68:log_dist] [Rank 0] step=480, skipped=6, lr=[9.91407217336734e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:27:37,083] [INFO] [timer.py:197:stop] 0/960, RunningAvgSamplesPerSec=6.323074868472379, CurrSamplesPerSec=5.677153886551778, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:27:48,423] [INFO] [timer.py:197:stop] 0/962, RunningAvgSamplesPerSec=6.323072695087377, CurrSamplesPerSec=5.680810340553457, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:27:59,870] [INFO] [timer.py:197:stop] 0/964, RunningAvgSamplesPerSec=6.323061451196268, CurrSamplesPerSec=5.6770407862652705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:28:11,187] [INFO] [timer.py:197:stop] 0/966, RunningAvgSamplesPerSec=6.323070853307474, CurrSamplesPerSec=5.683402046694541, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:28:22,565] [INFO] [timer.py:197:stop] 0/968, RunningAvgSamplesPerSec=6.323063793453601, CurrSamplesPerSec=5.69253199007323, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:28:34,005] [INFO] [timer.py:197:stop] 0/970, RunningAvgSamplesPerSec=6.323000504824408, CurrSamplesPerSec=5.649307512477133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:28:45,469] [INFO] [timer.py:197:stop] 0/972, RunningAvgSamplesPerSec=6.322978679455104, CurrSamplesPerSec=5.665946485745061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:28:56,797] [INFO] [timer.py:197:stop] 0/974, RunningAvgSamplesPerSec=6.323024918882465, CurrSamplesPerSec=5.723629557285836, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:29:08,196] [INFO] [timer.py:197:stop] 0/976, RunningAvgSamplesPerSec=6.322984791446119, CurrSamplesPerSec=5.6574342397492785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:29:19,660] [INFO] [timer.py:197:stop] 0/978, RunningAvgSamplesPerSec=6.322970862618648, CurrSamplesPerSec=5.695188536548119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:29:30,980] [INFO] [logging.py:68:log_dist] [Rank 0] step=490, skipped=6, lr=[9.947666544389474e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:29:30,981] [INFO] [timer.py:197:stop] 0/980, RunningAvgSamplesPerSec=6.322983041075797, CurrSamplesPerSec=5.683181368537643, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:29:42,438] [INFO] [timer.py:197:stop] 0/982, RunningAvgSamplesPerSec=6.322988408088883, CurrSamplesPerSec=5.6952731191076085, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:29:53,980] [INFO] [timer.py:197:stop] 0/984, RunningAvgSamplesPerSec=6.322996048898094, CurrSamplesPerSec=5.676496239056689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:30:05,278] [INFO] [timer.py:197:stop] 0/986, RunningAvgSamplesPerSec=6.3230382437678, CurrSamplesPerSec=5.7131319040698605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:30:16,608] [INFO] [timer.py:197:stop] 0/988, RunningAvgSamplesPerSec=6.323051433503477, CurrSamplesPerSec=5.711673403522599, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:30:27,992] [INFO] [timer.py:197:stop] 0/990, RunningAvgSamplesPerSec=6.323072542187161, CurrSamplesPerSec=5.701948071120495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:30:39,463] [INFO] [timer.py:197:stop] 0/992, RunningAvgSamplesPerSec=6.323062899615077, CurrSamplesPerSec=5.682492250688604, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:30:51,010] [INFO] [timer.py:197:stop] 0/994, RunningAvgSamplesPerSec=6.323011157687023, CurrSamplesPerSec=5.651232544913681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:31:02,366] [INFO] [timer.py:197:stop] 0/996, RunningAvgSamplesPerSec=6.322978668959763, CurrSamplesPerSec=5.673821599972912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:31:13,759] [INFO] [timer.py:197:stop] 0/998, RunningAvgSamplesPerSec=6.322917247758531, CurrSamplesPerSec=5.663485617511615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:31:25,101] [INFO] [logging.py:68:log_dist] [Rank 0] step=500, skipped=6, lr=[9.98057386557113e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:31:25,103] [INFO] [timer.py:197:stop] 0/1000, RunningAvgSamplesPerSec=6.322900554029162, CurrSamplesPerSec=5.689454869188859, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:31:36,430] [INFO] [timer.py:197:stop] 0/1002, RunningAvgSamplesPerSec=6.322925039750919, CurrSamplesPerSec=5.6937348342671745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0369, 'learning_rate': 9.98382788472848e-06, 'epoch': 3.76} +[2022-12-19 01:31:47,789] [INFO] [timer.py:197:stop] 0/1004, RunningAvgSamplesPerSec=6.32291374621284, CurrSamplesPerSec=5.698999418456137, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:31:59,234] [INFO] [timer.py:197:stop] 0/1006, RunningAvgSamplesPerSec=6.322916256308939, CurrSamplesPerSec=5.7036088323887855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:32:10,597] [INFO] [timer.py:197:stop] 0/1008, RunningAvgSamplesPerSec=6.3228884449951215, CurrSamplesPerSec=5.678974917097706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:32:21,979] [INFO] [timer.py:197:stop] 0/1010, RunningAvgSamplesPerSec=6.322889333406262, CurrSamplesPerSec=5.6726719874260825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:32:33,271] [INFO] [timer.py:197:stop] 0/1012, RunningAvgSamplesPerSec=6.322960707822512, CurrSamplesPerSec=5.72160489343192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:32:44,686] [INFO] [timer.py:197:stop] 0/1014, RunningAvgSamplesPerSec=6.3229558689351535, CurrSamplesPerSec=5.689121103228353, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:32:56,034] [INFO] [timer.py:197:stop] 0/1016, RunningAvgSamplesPerSec=6.322939953575906, CurrSamplesPerSec=5.68339747412702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:33:07,392] [INFO] [timer.py:197:stop] 0/1018, RunningAvgSamplesPerSec=6.322930569069281, CurrSamplesPerSec=5.681929097959649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:33:18,735] [INFO] [logging.py:68:log_dist] [Rank 0] step=510, skipped=6, lr=[9.993333333333333e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:33:18,736] [INFO] [timer.py:197:stop] 0/1020, RunningAvgSamplesPerSec=6.32292375774324, CurrSamplesPerSec=5.6816339744942725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:33:30,010] [INFO] [timer.py:197:stop] 0/1022, RunningAvgSamplesPerSec=6.322945414488586, CurrSamplesPerSec=5.696318283500739, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:33:41,334] [INFO] [timer.py:197:stop] 0/1024, RunningAvgSamplesPerSec=6.322984246719093, CurrSamplesPerSec=5.705652794140159, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:33:52,658] [INFO] [timer.py:197:stop] 0/1026, RunningAvgSamplesPerSec=6.323030763936265, CurrSamplesPerSec=5.712995966314802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:34:04,035] [INFO] [timer.py:197:stop] 0/1028, RunningAvgSamplesPerSec=6.322984603066996, CurrSamplesPerSec=5.640822866337973, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:34:15,328] [INFO] [timer.py:197:stop] 0/1030, RunningAvgSamplesPerSec=6.32303087310678, CurrSamplesPerSec=5.70957922130605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:34:26,770] [INFO] [timer.py:197:stop] 0/1032, RunningAvgSamplesPerSec=6.32305841440387, CurrSamplesPerSec=5.690779708104953, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:34:38,112] [INFO] [timer.py:197:stop] 0/1034, RunningAvgSamplesPerSec=6.32306724241664, CurrSamplesPerSec=5.691750812101744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:34:49,406] [INFO] [timer.py:197:stop] 0/1036, RunningAvgSamplesPerSec=6.32310307845081, CurrSamplesPerSec=5.708598382672515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:35:00,711] [INFO] [timer.py:197:stop] 0/1038, RunningAvgSamplesPerSec=6.3231051157395095, CurrSamplesPerSec=5.68638010955131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:35:12,152] [INFO] [logging.py:68:log_dist] [Rank 0] step=520, skipped=6, lr=[9.97111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:35:12,154] [INFO] [timer.py:197:stop] 0/1040, RunningAvgSamplesPerSec=6.32315279765852, CurrSamplesPerSec=5.699839470064783, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:35:23,483] [INFO] [timer.py:197:stop] 0/1042, RunningAvgSamplesPerSec=6.323164489487023, CurrSamplesPerSec=5.692513641036533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:35:34,762] [INFO] [timer.py:197:stop] 0/1044, RunningAvgSamplesPerSec=6.32322771359841, CurrSamplesPerSec=5.718281384936492, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:35:46,247] [INFO] [timer.py:197:stop] 0/1046, RunningAvgSamplesPerSec=6.323294008773126, CurrSamplesPerSec=5.7246219166991565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:35:57,534] [INFO] [timer.py:197:stop] 0/1048, RunningAvgSamplesPerSec=6.323331923594489, CurrSamplesPerSec=5.71667781030241, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:36:08,805] [INFO] [timer.py:197:stop] 0/1050, RunningAvgSamplesPerSec=6.323393744630139, CurrSamplesPerSec=5.723526069008761, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:36:20,121] [INFO] [timer.py:197:stop] 0/1052, RunningAvgSamplesPerSec=6.323404218137753, CurrSamplesPerSec=5.694187753131443, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0384, 'learning_rate': 9.957777777777779e-06, 'epoch': 3.94} +[2022-12-19 01:36:31,566] [INFO] [timer.py:197:stop] 0/1054, RunningAvgSamplesPerSec=6.323466996820582, CurrSamplesPerSec=5.719525846604936, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:36:43,097] [INFO] [timer.py:197:stop] 0/1056, RunningAvgSamplesPerSec=6.3234886046669905, CurrSamplesPerSec=5.690465811219692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:36:54,569] [INFO] [timer.py:197:stop] 0/1058, RunningAvgSamplesPerSec=6.323547134916665, CurrSamplesPerSec=5.709135993011103, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:37:06,006] [INFO] [logging.py:68:log_dist] [Rank 0] step=530, skipped=6, lr=[9.94888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:37:06,008] [INFO] [timer.py:197:stop] 0/1060, RunningAvgSamplesPerSec=6.3235982996251545, CurrSamplesPerSec=5.694189202587467, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:37:17,489] [INFO] [timer.py:197:stop] 0/1062, RunningAvgSamplesPerSec=6.323633751245574, CurrSamplesPerSec=5.715643417021246, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:37:28,779] [INFO] [timer.py:197:stop] 0/1064, RunningAvgSamplesPerSec=6.323661512678611, CurrSamplesPerSec=5.698340329223768, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:37:40,093] [INFO] [timer.py:197:stop] 0/1066, RunningAvgSamplesPerSec=6.323673481889273, CurrSamplesPerSec=5.695503196380515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:37:50,533] [INFO] [timer.py:197:stop] 0/1068, RunningAvgSamplesPerSec=6.324634287150376, CurrSamplesPerSec=6.682176428972918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:38:01,884] [INFO] [timer.py:197:stop] 0/1070, RunningAvgSamplesPerSec=6.32467691094481, CurrSamplesPerSec=5.700260678157333, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:38:13,456] [INFO] [timer.py:197:stop] 0/1072, RunningAvgSamplesPerSec=6.3245939777038425, CurrSamplesPerSec=5.699422196538105, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:38:24,747] [INFO] [timer.py:197:stop] 0/1074, RunningAvgSamplesPerSec=6.3246355004200465, CurrSamplesPerSec=5.718520390670772, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:38:36,077] [INFO] [timer.py:197:stop] 0/1076, RunningAvgSamplesPerSec=6.324659660489291, CurrSamplesPerSec=5.70697038592648, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:38:47,579] [INFO] [timer.py:197:stop] 0/1078, RunningAvgSamplesPerSec=6.324683406855854, CurrSamplesPerSec=5.711681424575847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:38:58,908] [INFO] [logging.py:68:log_dist] [Rank 0] step=540, skipped=6, lr=[9.926666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:38:58,910] [INFO] [timer.py:197:stop] 0/1080, RunningAvgSamplesPerSec=6.3246894487748575, CurrSamplesPerSec=5.69167478181544, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:39:10,232] [INFO] [timer.py:197:stop] 0/1082, RunningAvgSamplesPerSec=6.324724402554547, CurrSamplesPerSec=5.725545498402413, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:39:21,519] [INFO] [timer.py:197:stop] 0/1084, RunningAvgSamplesPerSec=6.32476391106619, CurrSamplesPerSec=5.698932147591169, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:39:32,830] [INFO] [timer.py:197:stop] 0/1086, RunningAvgSamplesPerSec=6.3247929630953355, CurrSamplesPerSec=5.703348290109618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:39:44,213] [INFO] [timer.py:197:stop] 0/1088, RunningAvgSamplesPerSec=6.324850666063967, CurrSamplesPerSec=5.721373434210019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:39:55,587] [INFO] [timer.py:197:stop] 0/1090, RunningAvgSamplesPerSec=6.324884669639123, CurrSamplesPerSec=5.724564294296037, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:40:06,880] [INFO] [timer.py:197:stop] 0/1092, RunningAvgSamplesPerSec=6.324903443332988, CurrSamplesPerSec=5.708613921929661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:40:18,455] [INFO] [timer.py:197:stop] 0/1094, RunningAvgSamplesPerSec=6.324914008563583, CurrSamplesPerSec=5.695235177477344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:40:29,906] [INFO] [timer.py:197:stop] 0/1096, RunningAvgSamplesPerSec=6.324924544190334, CurrSamplesPerSec=5.699242865146464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:40:41,231] [INFO] [timer.py:197:stop] 0/1098, RunningAvgSamplesPerSec=6.324970888911037, CurrSamplesPerSec=5.710541202707478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:40:52,558] [INFO] [logging.py:68:log_dist] [Rank 0] step=550, skipped=6, lr=[9.904444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:40:52,560] [INFO] [timer.py:197:stop] 0/1100, RunningAvgSamplesPerSec=6.324971563388574, CurrSamplesPerSec=5.69697932316346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:41:03,909] [INFO] [timer.py:197:stop] 0/1102, RunningAvgSamplesPerSec=6.324976909037321, CurrSamplesPerSec=5.691620234353525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:41:15,246] [INFO] [timer.py:197:stop] 0/1104, RunningAvgSamplesPerSec=6.324959432707775, CurrSamplesPerSec=5.658989713678185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0274, 'learning_rate': 9.9e-06, 'epoch': 4.13} +[2022-12-19 01:41:26,705] [INFO] [timer.py:197:stop] 0/1106, RunningAvgSamplesPerSec=6.32494071497359, CurrSamplesPerSec=5.666268687087448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:41:38,069] [INFO] [timer.py:197:stop] 0/1108, RunningAvgSamplesPerSec=6.324912812331334, CurrSamplesPerSec=5.6722661133204575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:41:49,397] [INFO] [timer.py:197:stop] 0/1110, RunningAvgSamplesPerSec=6.324924197247109, CurrSamplesPerSec=5.705353260948113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:42:00,730] [INFO] [timer.py:197:stop] 0/1112, RunningAvgSamplesPerSec=6.324923475715562, CurrSamplesPerSec=5.696250108912601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:42:12,075] [INFO] [timer.py:197:stop] 0/1114, RunningAvgSamplesPerSec=6.324928165247696, CurrSamplesPerSec=5.68334476980095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:42:23,365] [INFO] [timer.py:197:stop] 0/1116, RunningAvgSamplesPerSec=6.324958701477878, CurrSamplesPerSec=5.695486519986947, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:42:34,741] [INFO] [timer.py:197:stop] 0/1118, RunningAvgSamplesPerSec=6.324939926456509, CurrSamplesPerSec=5.680064586753917, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:42:46,083] [INFO] [logging.py:68:log_dist] [Rank 0] step=560, skipped=6, lr=[9.882222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:42:46,085] [INFO] [timer.py:197:stop] 0/1120, RunningAvgSamplesPerSec=6.324918290930486, CurrSamplesPerSec=5.67784651398351, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:42:57,608] [INFO] [timer.py:197:stop] 0/1122, RunningAvgSamplesPerSec=6.324764020850468, CurrSamplesPerSec=5.547015492313604, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:43:08,935] [INFO] [timer.py:197:stop] 0/1124, RunningAvgSamplesPerSec=6.324796596619278, CurrSamplesPerSec=5.713381423831785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:43:20,277] [INFO] [timer.py:197:stop] 0/1126, RunningAvgSamplesPerSec=6.3248052723591925, CurrSamplesPerSec=5.690979500636292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:43:31,681] [INFO] [timer.py:197:stop] 0/1128, RunningAvgSamplesPerSec=6.3247958738284344, CurrSamplesPerSec=5.6889424194851825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:43:43,057] [INFO] [timer.py:197:stop] 0/1130, RunningAvgSamplesPerSec=6.3248224663505415, CurrSamplesPerSec=5.689105911056516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:43:54,370] [INFO] [timer.py:197:stop] 0/1132, RunningAvgSamplesPerSec=6.324841766909368, CurrSamplesPerSec=5.702669537792552, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:44:05,742] [INFO] [timer.py:197:stop] 0/1134, RunningAvgSamplesPerSec=6.324824531402452, CurrSamplesPerSec=5.663527199969653, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:44:17,080] [INFO] [timer.py:197:stop] 0/1136, RunningAvgSamplesPerSec=6.324830973021693, CurrSamplesPerSec=5.6934302717541225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:44:28,419] [INFO] [timer.py:197:stop] 0/1138, RunningAvgSamplesPerSec=6.3248227899266665, CurrSamplesPerSec=5.690067518444597, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:44:39,735] [INFO] [logging.py:68:log_dist] [Rank 0] step=570, skipped=6, lr=[9.86e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:44:39,737] [INFO] [timer.py:197:stop] 0/1140, RunningAvgSamplesPerSec=6.324844393903044, CurrSamplesPerSec=5.700690665103304, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:44:51,047] [INFO] [timer.py:197:stop] 0/1142, RunningAvgSamplesPerSec=6.324839321410804, CurrSamplesPerSec=5.696933620898516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:45:02,419] [INFO] [timer.py:197:stop] 0/1144, RunningAvgSamplesPerSec=6.324853029775292, CurrSamplesPerSec=5.709043955873813, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:45:13,720] [INFO] [timer.py:197:stop] 0/1146, RunningAvgSamplesPerSec=6.324889788383863, CurrSamplesPerSec=5.7113647316193985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:45:25,076] [INFO] [timer.py:197:stop] 0/1148, RunningAvgSamplesPerSec=6.324897604613154, CurrSamplesPerSec=5.705412437482551, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:45:36,460] [INFO] [timer.py:197:stop] 0/1150, RunningAvgSamplesPerSec=6.324880461634712, CurrSamplesPerSec=5.681721041146652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:45:47,778] [INFO] [timer.py:197:stop] 0/1152, RunningAvgSamplesPerSec=6.3248681661360395, CurrSamplesPerSec=5.6859392712596915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:45:59,199] [INFO] [timer.py:197:stop] 0/1154, RunningAvgSamplesPerSec=6.32486256081392, CurrSamplesPerSec=5.6809637468211225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0236, 'learning_rate': 9.844444444444446e-06, 'epoch': 4.32} +[2022-12-19 01:46:10,526] [INFO] [timer.py:197:stop] 0/1156, RunningAvgSamplesPerSec=6.324858652698689, CurrSamplesPerSec=5.693544026025647, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:46:21,972] [INFO] [timer.py:197:stop] 0/1158, RunningAvgSamplesPerSec=6.32485053124946, CurrSamplesPerSec=5.68506165253418, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:46:33,471] [INFO] [logging.py:68:log_dist] [Rank 0] step=580, skipped=6, lr=[9.837777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:46:33,472] [INFO] [timer.py:197:stop] 0/1160, RunningAvgSamplesPerSec=6.324843769577704, CurrSamplesPerSec=5.6795591145391215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:46:44,777] [INFO] [timer.py:197:stop] 0/1162, RunningAvgSamplesPerSec=6.324900878253531, CurrSamplesPerSec=5.7268080295969455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:46:56,140] [INFO] [timer.py:197:stop] 0/1164, RunningAvgSamplesPerSec=6.32492556450326, CurrSamplesPerSec=5.691554102992158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:47:07,485] [INFO] [timer.py:197:stop] 0/1166, RunningAvgSamplesPerSec=6.324901005215093, CurrSamplesPerSec=5.673412683551026, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:47:18,816] [INFO] [timer.py:197:stop] 0/1168, RunningAvgSamplesPerSec=6.324909920715825, CurrSamplesPerSec=5.690371238777734, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:47:30,174] [INFO] [timer.py:197:stop] 0/1170, RunningAvgSamplesPerSec=6.3249256623144525, CurrSamplesPerSec=5.711775734406699, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:47:41,661] [INFO] [timer.py:197:stop] 0/1172, RunningAvgSamplesPerSec=6.32490763089565, CurrSamplesPerSec=5.678518649015999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:47:53,117] [INFO] [timer.py:197:stop] 0/1174, RunningAvgSamplesPerSec=6.324886759410804, CurrSamplesPerSec=5.682959745020392, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:48:04,480] [INFO] [timer.py:197:stop] 0/1176, RunningAvgSamplesPerSec=6.324882111428021, CurrSamplesPerSec=5.692186034068705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:48:15,949] [INFO] [timer.py:197:stop] 0/1178, RunningAvgSamplesPerSec=6.324891442697314, CurrSamplesPerSec=5.705409769661542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:48:27,280] [INFO] [logging.py:68:log_dist] [Rank 0] step=590, skipped=6, lr=[9.815555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:48:27,281] [INFO] [timer.py:197:stop] 0/1180, RunningAvgSamplesPerSec=6.3249009669893, CurrSamplesPerSec=5.69699189746022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:48:38,768] [INFO] [timer.py:197:stop] 0/1182, RunningAvgSamplesPerSec=6.324901413016358, CurrSamplesPerSec=5.682886596209592, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:48:50,057] [INFO] [timer.py:197:stop] 0/1184, RunningAvgSamplesPerSec=6.324928789090779, CurrSamplesPerSec=5.695716131091108, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:49:01,381] [INFO] [timer.py:197:stop] 0/1186, RunningAvgSamplesPerSec=6.324937469834437, CurrSamplesPerSec=5.69275580955522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:49:12,701] [INFO] [timer.py:197:stop] 0/1188, RunningAvgSamplesPerSec=6.324954616801356, CurrSamplesPerSec=5.70224991172664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:49:24,185] [INFO] [timer.py:197:stop] 0/1190, RunningAvgSamplesPerSec=6.324946494106825, CurrSamplesPerSec=5.684085607962462, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:49:35,633] [INFO] [timer.py:197:stop] 0/1192, RunningAvgSamplesPerSec=6.324946190116836, CurrSamplesPerSec=5.685711170302852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:49:47,012] [INFO] [timer.py:197:stop] 0/1194, RunningAvgSamplesPerSec=6.3249330850943775, CurrSamplesPerSec=5.676195918432251, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:49:58,318] [INFO] [timer.py:197:stop] 0/1196, RunningAvgSamplesPerSec=6.324939638226609, CurrSamplesPerSec=5.694087017745471, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:50:09,617] [INFO] [timer.py:197:stop] 0/1198, RunningAvgSamplesPerSec=6.324974933378967, CurrSamplesPerSec=5.707543125168769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:50:20,959] [INFO] [logging.py:68:log_dist] [Rank 0] step=600, skipped=6, lr=[9.793333333333333e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:50:20,961] [INFO] [timer.py:197:stop] 0/1200, RunningAvgSamplesPerSec=6.32499206618706, CurrSamplesPerSec=5.693619864732031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:50:32,282] [INFO] [timer.py:197:stop] 0/1202, RunningAvgSamplesPerSec=6.325001612094914, CurrSamplesPerSec=5.697246055241894, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:50:43,566] [INFO] [timer.py:197:stop] 0/1204, RunningAvgSamplesPerSec=6.325050703353141, CurrSamplesPerSec=5.7203190561748505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0246, 'learning_rate': 9.78888888888889e-06, 'epoch': 4.51} +[2022-12-19 01:50:54,880] [INFO] [timer.py:197:stop] 0/1206, RunningAvgSamplesPerSec=6.325062201120487, CurrSamplesPerSec=5.6983712962294675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:51:06,240] [INFO] [timer.py:197:stop] 0/1208, RunningAvgSamplesPerSec=6.325048528672647, CurrSamplesPerSec=5.674052825725717, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:51:17,590] [INFO] [timer.py:197:stop] 0/1210, RunningAvgSamplesPerSec=6.325051108575158, CurrSamplesPerSec=5.693770582043694, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:51:28,899] [INFO] [timer.py:197:stop] 0/1212, RunningAvgSamplesPerSec=6.325073728314405, CurrSamplesPerSec=5.714869023518775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:51:40,218] [INFO] [timer.py:197:stop] 0/1214, RunningAvgSamplesPerSec=6.325086425815481, CurrSamplesPerSec=5.703932422683251, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:51:51,559] [INFO] [timer.py:197:stop] 0/1216, RunningAvgSamplesPerSec=6.325093477784379, CurrSamplesPerSec=5.688901427496772, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:52:02,896] [INFO] [timer.py:197:stop] 0/1218, RunningAvgSamplesPerSec=6.325131467261908, CurrSamplesPerSec=5.706784755713206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:52:14,192] [INFO] [logging.py:68:log_dist] [Rank 0] step=610, skipped=6, lr=[9.771111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:52:14,194] [INFO] [timer.py:197:stop] 0/1220, RunningAvgSamplesPerSec=6.3251471976647045, CurrSamplesPerSec=5.702073067045441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:52:25,485] [INFO] [timer.py:197:stop] 0/1222, RunningAvgSamplesPerSec=6.325166622369134, CurrSamplesPerSec=5.693566970644037, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:52:36,876] [INFO] [timer.py:197:stop] 0/1224, RunningAvgSamplesPerSec=6.325120746866488, CurrSamplesPerSec=5.645968875302027, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:52:48,231] [INFO] [timer.py:197:stop] 0/1226, RunningAvgSamplesPerSec=6.3251296745900145, CurrSamplesPerSec=5.706563956413903, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:52:59,590] [INFO] [timer.py:197:stop] 0/1228, RunningAvgSamplesPerSec=6.32510891943198, CurrSamplesPerSec=5.7023630496254185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:53:10,916] [INFO] [timer.py:197:stop] 0/1230, RunningAvgSamplesPerSec=6.325104260058115, CurrSamplesPerSec=5.682448464629102, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:53:22,259] [INFO] [timer.py:197:stop] 0/1232, RunningAvgSamplesPerSec=6.325117850544401, CurrSamplesPerSec=5.701988040151647, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:53:33,603] [INFO] [timer.py:197:stop] 0/1234, RunningAvgSamplesPerSec=6.325133320435429, CurrSamplesPerSec=5.712330961596245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:53:44,961] [INFO] [timer.py:197:stop] 0/1236, RunningAvgSamplesPerSec=6.325110017499254, CurrSamplesPerSec=5.692100818981726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:53:56,339] [INFO] [timer.py:197:stop] 0/1238, RunningAvgSamplesPerSec=6.325094500754509, CurrSamplesPerSec=5.690047496718268, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:54:07,677] [INFO] [logging.py:68:log_dist] [Rank 0] step=620, skipped=6, lr=[9.74888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:54:07,678] [INFO] [timer.py:197:stop] 0/1240, RunningAvgSamplesPerSec=6.325107528450415, CurrSamplesPerSec=5.706040414941849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:54:19,039] [INFO] [timer.py:197:stop] 0/1242, RunningAvgSamplesPerSec=6.325086624742035, CurrSamplesPerSec=5.682877212123439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:54:30,385] [INFO] [timer.py:197:stop] 0/1244, RunningAvgSamplesPerSec=6.325093076058267, CurrSamplesPerSec=5.716175051816756, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:54:41,713] [INFO] [timer.py:197:stop] 0/1246, RunningAvgSamplesPerSec=6.3251015763933935, CurrSamplesPerSec=5.706320126491812, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:54:53,045] [INFO] [timer.py:197:stop] 0/1248, RunningAvgSamplesPerSec=6.32512076106113, CurrSamplesPerSec=5.704397390468149, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:55:04,341] [INFO] [timer.py:197:stop] 0/1250, RunningAvgSamplesPerSec=6.325156152355773, CurrSamplesPerSec=5.70754361058976, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:55:15,602] [INFO] [timer.py:197:stop] 0/1252, RunningAvgSamplesPerSec=6.325193247669528, CurrSamplesPerSec=5.702770819385449, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:55:26,935] [INFO] [timer.py:197:stop] 0/1254, RunningAvgSamplesPerSec=6.3252061087681595, CurrSamplesPerSec=5.711705001741585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0238, 'learning_rate': 9.733333333333334e-06, 'epoch': 4.7} +[2022-12-19 01:55:38,225] [INFO] [timer.py:197:stop] 0/1256, RunningAvgSamplesPerSec=6.325240632893343, CurrSamplesPerSec=5.71181389674396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:55:49,529] [INFO] [timer.py:197:stop] 0/1258, RunningAvgSamplesPerSec=6.32526863635693, CurrSamplesPerSec=5.699918139262459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:56:00,841] [INFO] [logging.py:68:log_dist] [Rank 0] step=630, skipped=6, lr=[9.726666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:56:00,843] [INFO] [timer.py:197:stop] 0/1260, RunningAvgSamplesPerSec=6.325295558883351, CurrSamplesPerSec=5.718118161105232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:56:12,501] [INFO] [timer.py:197:stop] 0/1262, RunningAvgSamplesPerSec=6.325251124463035, CurrSamplesPerSec=5.651728465684707, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:56:24,406] [INFO] [timer.py:197:stop] 0/1264, RunningAvgSamplesPerSec=6.325246251904512, CurrSamplesPerSec=5.6759393147153565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:56:36,290] [INFO] [timer.py:197:stop] 0/1266, RunningAvgSamplesPerSec=6.325183560356296, CurrSamplesPerSec=5.651439564595148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:56:47,872] [INFO] [timer.py:197:stop] 0/1268, RunningAvgSamplesPerSec=6.325224706749831, CurrSamplesPerSec=5.731369062859605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:56:59,366] [INFO] [timer.py:197:stop] 0/1270, RunningAvgSamplesPerSec=6.3252277813152284, CurrSamplesPerSec=5.700204997655028, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:57:10,635] [INFO] [timer.py:197:stop] 0/1272, RunningAvgSamplesPerSec=6.325287703851555, CurrSamplesPerSec=5.719149794917583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:57:22,023] [INFO] [timer.py:197:stop] 0/1274, RunningAvgSamplesPerSec=6.325255172337922, CurrSamplesPerSec=5.639411472787362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:57:33,373] [INFO] [timer.py:197:stop] 0/1276, RunningAvgSamplesPerSec=6.325258944356415, CurrSamplesPerSec=5.692668887277734, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:57:44,711] [INFO] [timer.py:197:stop] 0/1278, RunningAvgSamplesPerSec=6.325280754930628, CurrSamplesPerSec=5.707937556946912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:57:56,186] [INFO] [logging.py:68:log_dist] [Rank 0] step=640, skipped=6, lr=[9.704444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:57:56,187] [INFO] [timer.py:197:stop] 0/1280, RunningAvgSamplesPerSec=6.32531295051093, CurrSamplesPerSec=5.706398246845059, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:58:07,621] [INFO] [timer.py:197:stop] 0/1282, RunningAvgSamplesPerSec=6.325295195887252, CurrSamplesPerSec=5.665619298721056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:58:19,018] [INFO] [timer.py:197:stop] 0/1284, RunningAvgSamplesPerSec=6.325301561560688, CurrSamplesPerSec=5.67833510528705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:58:30,307] [INFO] [timer.py:197:stop] 0/1286, RunningAvgSamplesPerSec=6.325317038768302, CurrSamplesPerSec=5.701475025697852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:58:41,597] [INFO] [timer.py:197:stop] 0/1288, RunningAvgSamplesPerSec=6.325362471717625, CurrSamplesPerSec=5.718513568619511, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:58:52,942] [INFO] [timer.py:197:stop] 0/1290, RunningAvgSamplesPerSec=6.325373169120482, CurrSamplesPerSec=5.688314342368322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:59:04,385] [INFO] [timer.py:197:stop] 0/1292, RunningAvgSamplesPerSec=6.325377462346586, CurrSamplesPerSec=5.707404055455203, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:59:15,888] [INFO] [timer.py:197:stop] 0/1294, RunningAvgSamplesPerSec=6.325391194090459, CurrSamplesPerSec=5.6950611841192424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:59:27,209] [INFO] [timer.py:197:stop] 0/1296, RunningAvgSamplesPerSec=6.325429844742659, CurrSamplesPerSec=5.7111965559310365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:59:38,744] [INFO] [timer.py:197:stop] 0/1298, RunningAvgSamplesPerSec=6.32535981605339, CurrSamplesPerSec=5.616333253130525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 01:59:50,192] [INFO] [logging.py:68:log_dist] [Rank 0] step=650, skipped=6, lr=[9.682222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-19 01:59:50,194] [INFO] [timer.py:197:stop] 0/1300, RunningAvgSamplesPerSec=6.325359474569833, CurrSamplesPerSec=5.683351267541748, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:00:01,501] [INFO] [timer.py:197:stop] 0/1302, RunningAvgSamplesPerSec=6.3253607890454315, CurrSamplesPerSec=5.7196599016118315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:00:12,944] [INFO] [timer.py:197:stop] 0/1304, RunningAvgSamplesPerSec=6.325376961895668, CurrSamplesPerSec=5.705195624531413, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0239, 'learning_rate': 9.677777777777778e-06, 'epoch': 4.88} +[2022-12-19 02:00:24,284] [INFO] [timer.py:197:stop] 0/1306, RunningAvgSamplesPerSec=6.32538559015186, CurrSamplesPerSec=5.71115840184258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:00:35,691] [INFO] [timer.py:197:stop] 0/1308, RunningAvgSamplesPerSec=6.325362792814117, CurrSamplesPerSec=5.6685567243685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:00:46,991] [INFO] [timer.py:197:stop] 0/1310, RunningAvgSamplesPerSec=6.325388366129746, CurrSamplesPerSec=5.696379206853834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:00:58,317] [INFO] [timer.py:197:stop] 0/1312, RunningAvgSamplesPerSec=6.325374814942083, CurrSamplesPerSec=5.681701078114891, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:01:09,785] [INFO] [timer.py:197:stop] 0/1314, RunningAvgSamplesPerSec=6.32534065932617, CurrSamplesPerSec=5.653513691700786, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:01:21,161] [INFO] [timer.py:197:stop] 0/1316, RunningAvgSamplesPerSec=6.325353465877035, CurrSamplesPerSec=5.696653378070349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:01:32,470] [INFO] [timer.py:197:stop] 0/1318, RunningAvgSamplesPerSec=6.325380742160595, CurrSamplesPerSec=5.691755156750885, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:01:43,973] [INFO] [logging.py:68:log_dist] [Rank 0] step=660, skipped=6, lr=[9.66e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:01:43,975] [INFO] [timer.py:197:stop] 0/1320, RunningAvgSamplesPerSec=6.325358185107902, CurrSamplesPerSec=5.673640278438335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:01:55,322] [INFO] [timer.py:197:stop] 0/1322, RunningAvgSamplesPerSec=6.325343054374821, CurrSamplesPerSec=5.671462447813697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:02:06,772] [INFO] [timer.py:197:stop] 0/1324, RunningAvgSamplesPerSec=6.325350374130517, CurrSamplesPerSec=5.688341102134533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:02:18,127] [INFO] [timer.py:197:stop] 0/1326, RunningAvgSamplesPerSec=6.325337390775681, CurrSamplesPerSec=5.670405781393834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:02:29,572] [INFO] [timer.py:197:stop] 0/1328, RunningAvgSamplesPerSec=6.325329397145289, CurrSamplesPerSec=5.69145852899555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:02:40,951] [INFO] [timer.py:197:stop] 0/1330, RunningAvgSamplesPerSec=6.325278941667035, CurrSamplesPerSec=5.7062065889222895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:02:52,284] [INFO] [timer.py:197:stop] 0/1332, RunningAvgSamplesPerSec=6.325288222566702, CurrSamplesPerSec=5.693684594907549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:03:03,759] [INFO] [timer.py:197:stop] 0/1334, RunningAvgSamplesPerSec=6.325271247113316, CurrSamplesPerSec=5.685391329881376, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:03:14,161] [INFO] [timer.py:197:stop] 0/1336, RunningAvgSamplesPerSec=6.3260235870858965, CurrSamplesPerSec=5.679462741319367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:03:25,503] [INFO] [timer.py:197:stop] 0/1338, RunningAvgSamplesPerSec=6.326021446884541, CurrSamplesPerSec=5.683061530873832, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:03:36,882] [INFO] [logging.py:68:log_dist] [Rank 0] step=670, skipped=6, lr=[9.637777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:03:36,884] [INFO] [timer.py:197:stop] 0/1340, RunningAvgSamplesPerSec=6.326016064731757, CurrSamplesPerSec=5.696931686427287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:03:48,341] [INFO] [timer.py:197:stop] 0/1342, RunningAvgSamplesPerSec=6.326019465694644, CurrSamplesPerSec=5.689551581817069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:03:59,747] [INFO] [timer.py:197:stop] 0/1344, RunningAvgSamplesPerSec=6.325974472962758, CurrSamplesPerSec=5.6777204163106045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:04:11,186] [INFO] [timer.py:197:stop] 0/1346, RunningAvgSamplesPerSec=6.325981009495647, CurrSamplesPerSec=5.690097430683778, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:04:22,604] [INFO] [timer.py:197:stop] 0/1348, RunningAvgSamplesPerSec=6.325975026369539, CurrSamplesPerSec=5.696325536212535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:04:34,031] [INFO] [timer.py:197:stop] 0/1350, RunningAvgSamplesPerSec=6.325996807819703, CurrSamplesPerSec=5.719779825233946, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:04:45,388] [INFO] [timer.py:197:stop] 0/1352, RunningAvgSamplesPerSec=6.3259849220790665, CurrSamplesPerSec=5.685303187415706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:04:56,736] [INFO] [timer.py:197:stop] 0/1354, RunningAvgSamplesPerSec=6.3259316249294715, CurrSamplesPerSec=5.639803653185618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0204, 'learning_rate': 9.622222222222222e-06, 'epoch': 5.07} +[2022-12-19 02:05:08,029] [INFO] [timer.py:197:stop] 0/1356, RunningAvgSamplesPerSec=6.325972569272529, CurrSamplesPerSec=5.730181091217941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:05:19,358] [INFO] [timer.py:197:stop] 0/1358, RunningAvgSamplesPerSec=6.325991540198074, CurrSamplesPerSec=5.728088718850003, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:05:30,704] [INFO] [logging.py:68:log_dist] [Rank 0] step=680, skipped=6, lr=[9.615555555555558e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:05:30,706] [INFO] [timer.py:197:stop] 0/1360, RunningAvgSamplesPerSec=6.325995144047257, CurrSamplesPerSec=5.692229728913043, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:05:42,056] [INFO] [timer.py:197:stop] 0/1362, RunningAvgSamplesPerSec=6.325994995404326, CurrSamplesPerSec=5.705406374256594, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:05:53,345] [INFO] [timer.py:197:stop] 0/1364, RunningAvgSamplesPerSec=6.3260101476904165, CurrSamplesPerSec=5.704354235939349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:06:04,671] [INFO] [timer.py:197:stop] 0/1366, RunningAvgSamplesPerSec=6.32601471723045, CurrSamplesPerSec=5.703956420729281, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:06:15,986] [INFO] [timer.py:197:stop] 0/1368, RunningAvgSamplesPerSec=6.326009693836045, CurrSamplesPerSec=5.686034659980005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:06:27,308] [INFO] [timer.py:197:stop] 0/1370, RunningAvgSamplesPerSec=6.326006313683808, CurrSamplesPerSec=5.693818407473715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:06:38,658] [INFO] [timer.py:197:stop] 0/1372, RunningAvgSamplesPerSec=6.325990283395943, CurrSamplesPerSec=5.690380406376861, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:06:49,979] [INFO] [timer.py:197:stop] 0/1374, RunningAvgSamplesPerSec=6.325993190789163, CurrSamplesPerSec=5.689745981734343, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:07:01,326] [INFO] [timer.py:197:stop] 0/1376, RunningAvgSamplesPerSec=6.3259889124295565, CurrSamplesPerSec=5.703390459989499, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:07:12,691] [INFO] [timer.py:197:stop] 0/1378, RunningAvgSamplesPerSec=6.325969040929874, CurrSamplesPerSec=5.681856456739923, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:07:24,025] [INFO] [logging.py:68:log_dist] [Rank 0] step=690, skipped=6, lr=[9.593333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:07:24,026] [INFO] [timer.py:197:stop] 0/1380, RunningAvgSamplesPerSec=6.325971148816176, CurrSamplesPerSec=5.686399864561237, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:07:35,332] [INFO] [timer.py:197:stop] 0/1382, RunningAvgSamplesPerSec=6.325999826421312, CurrSamplesPerSec=5.709484983995986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:07:46,616] [INFO] [timer.py:197:stop] 0/1384, RunningAvgSamplesPerSec=6.326018551744674, CurrSamplesPerSec=5.71132851954344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:07:57,958] [INFO] [timer.py:197:stop] 0/1386, RunningAvgSamplesPerSec=6.326009808995005, CurrSamplesPerSec=5.695271669099709, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:08:09,275] [INFO] [timer.py:197:stop] 0/1388, RunningAvgSamplesPerSec=6.326024737831474, CurrSamplesPerSec=5.69747169670837, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:08:20,624] [INFO] [timer.py:197:stop] 0/1390, RunningAvgSamplesPerSec=6.326001563249644, CurrSamplesPerSec=5.673787781188459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:08:31,978] [INFO] [timer.py:197:stop] 0/1392, RunningAvgSamplesPerSec=6.326002101638358, CurrSamplesPerSec=5.704283444530918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:08:43,303] [INFO] [timer.py:197:stop] 0/1394, RunningAvgSamplesPerSec=6.326007086245346, CurrSamplesPerSec=5.709605209977455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:08:54,663] [INFO] [timer.py:197:stop] 0/1396, RunningAvgSamplesPerSec=6.325976996367318, CurrSamplesPerSec=5.67580754128076, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:09:06,061] [INFO] [timer.py:197:stop] 0/1398, RunningAvgSamplesPerSec=6.3259534631195535, CurrSamplesPerSec=5.664270768711359, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:09:17,415] [INFO] [logging.py:68:log_dist] [Rank 0] step=700, skipped=6, lr=[9.571111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:09:17,417] [INFO] [timer.py:197:stop] 0/1400, RunningAvgSamplesPerSec=6.325942329797055, CurrSamplesPerSec=5.692385201091724, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:09:28,768] [INFO] [timer.py:197:stop] 0/1402, RunningAvgSamplesPerSec=6.325922639188353, CurrSamplesPerSec=5.6779937551164466, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:09:40,089] [INFO] [timer.py:197:stop] 0/1404, RunningAvgSamplesPerSec=6.3259300216188254, CurrSamplesPerSec=5.695464768316012, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0146, 'learning_rate': 9.566666666666668e-06, 'epoch': 5.26} +[2022-12-19 02:09:51,414] [INFO] [timer.py:197:stop] 0/1406, RunningAvgSamplesPerSec=6.325946166681824, CurrSamplesPerSec=5.699702953903435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:10:02,782] [INFO] [timer.py:197:stop] 0/1408, RunningAvgSamplesPerSec=6.325905383744529, CurrSamplesPerSec=5.637545394202437, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:10:14,134] [INFO] [timer.py:197:stop] 0/1410, RunningAvgSamplesPerSec=6.3258835375422535, CurrSamplesPerSec=5.680435275482285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:10:25,472] [INFO] [timer.py:197:stop] 0/1412, RunningAvgSamplesPerSec=6.325894852324781, CurrSamplesPerSec=5.713314299413726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:10:36,835] [INFO] [timer.py:197:stop] 0/1414, RunningAvgSamplesPerSec=6.325883353898903, CurrSamplesPerSec=5.686369268445658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:10:48,197] [INFO] [timer.py:197:stop] 0/1416, RunningAvgSamplesPerSec=6.325873910552596, CurrSamplesPerSec=5.69080697365038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:10:59,981] [INFO] [timer.py:197:stop] 0/1418, RunningAvgSamplesPerSec=6.325839118282026, CurrSamplesPerSec=5.662679420891574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:11:11,860] [INFO] [logging.py:68:log_dist] [Rank 0] step=710, skipped=6, lr=[9.54888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:11:11,861] [INFO] [timer.py:197:stop] 0/1420, RunningAvgSamplesPerSec=6.325789873694106, CurrSamplesPerSec=5.651609712814878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:11:23,685] [INFO] [timer.py:197:stop] 0/1422, RunningAvgSamplesPerSec=6.3257407152584335, CurrSamplesPerSec=5.660914904869334, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:11:35,206] [INFO] [timer.py:197:stop] 0/1424, RunningAvgSamplesPerSec=6.32572380394987, CurrSamplesPerSec=5.683606135095532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:11:46,531] [INFO] [timer.py:197:stop] 0/1426, RunningAvgSamplesPerSec=6.325727512231239, CurrSamplesPerSec=5.686158236490062, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:11:57,997] [INFO] [timer.py:197:stop] 0/1428, RunningAvgSamplesPerSec=6.325718381177641, CurrSamplesPerSec=5.67422889614679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:12:09,344] [INFO] [timer.py:197:stop] 0/1430, RunningAvgSamplesPerSec=6.325703349002757, CurrSamplesPerSec=5.684987968011892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:12:20,752] [INFO] [timer.py:197:stop] 0/1432, RunningAvgSamplesPerSec=6.325704946902877, CurrSamplesPerSec=5.7040150834697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:12:32,089] [INFO] [timer.py:197:stop] 0/1434, RunningAvgSamplesPerSec=6.325683975887975, CurrSamplesPerSec=5.679657413785873, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:12:43,432] [INFO] [timer.py:197:stop] 0/1436, RunningAvgSamplesPerSec=6.325679520445578, CurrSamplesPerSec=5.659775050705646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:12:54,930] [INFO] [timer.py:197:stop] 0/1438, RunningAvgSamplesPerSec=6.325646700713397, CurrSamplesPerSec=5.664626727825863, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:13:06,289] [INFO] [logging.py:68:log_dist] [Rank 0] step=720, skipped=6, lr=[9.526666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:13:06,291] [INFO] [timer.py:197:stop] 0/1440, RunningAvgSamplesPerSec=6.325638282584947, CurrSamplesPerSec=5.6699123260919695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:13:17,859] [INFO] [timer.py:197:stop] 0/1442, RunningAvgSamplesPerSec=6.3255749779462835, CurrSamplesPerSec=5.617487649751459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:13:29,233] [INFO] [timer.py:197:stop] 0/1444, RunningAvgSamplesPerSec=6.325593953770169, CurrSamplesPerSec=5.722883500338872, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:13:40,683] [INFO] [timer.py:197:stop] 0/1446, RunningAvgSamplesPerSec=6.325594187741323, CurrSamplesPerSec=5.69692636663818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:13:52,154] [INFO] [timer.py:197:stop] 0/1448, RunningAvgSamplesPerSec=6.325575935958007, CurrSamplesPerSec=5.674743976856191, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:14:03,517] [INFO] [timer.py:197:stop] 0/1450, RunningAvgSamplesPerSec=6.3255565955518085, CurrSamplesPerSec=5.663227533152615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:14:14,843] [INFO] [timer.py:197:stop] 0/1452, RunningAvgSamplesPerSec=6.325584850421658, CurrSamplesPerSec=5.7131411451538705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:14:26,207] [INFO] [timer.py:197:stop] 0/1454, RunningAvgSamplesPerSec=6.325590370650787, CurrSamplesPerSec=5.703681788492748, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0159, 'learning_rate': 9.511111111111112e-06, 'epoch': 5.45} +[2022-12-19 02:14:37,594] [INFO] [timer.py:197:stop] 0/1456, RunningAvgSamplesPerSec=6.325554718829132, CurrSamplesPerSec=5.652222093218099, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:14:49,097] [INFO] [timer.py:197:stop] 0/1458, RunningAvgSamplesPerSec=6.325545425297475, CurrSamplesPerSec=5.662100362617642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:15:00,498] [INFO] [logging.py:68:log_dist] [Rank 0] step=730, skipped=6, lr=[9.504444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:15:00,500] [INFO] [timer.py:197:stop] 0/1460, RunningAvgSamplesPerSec=6.325526949863666, CurrSamplesPerSec=5.670537543696346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:15:11,884] [INFO] [timer.py:197:stop] 0/1462, RunningAvgSamplesPerSec=6.325526579423683, CurrSamplesPerSec=5.691162414794898, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:15:23,188] [INFO] [timer.py:197:stop] 0/1464, RunningAvgSamplesPerSec=6.325554018392089, CurrSamplesPerSec=5.703167499876349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:15:34,499] [INFO] [timer.py:197:stop] 0/1466, RunningAvgSamplesPerSec=6.325567539019158, CurrSamplesPerSec=5.6901553262336755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:15:45,974] [INFO] [timer.py:197:stop] 0/1468, RunningAvgSamplesPerSec=6.325562001152822, CurrSamplesPerSec=5.687878024946484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:15:57,285] [INFO] [timer.py:197:stop] 0/1470, RunningAvgSamplesPerSec=6.325575268581615, CurrSamplesPerSec=5.701323658051909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:16:08,677] [INFO] [timer.py:197:stop] 0/1472, RunningAvgSamplesPerSec=6.325601845452154, CurrSamplesPerSec=5.720548479549482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:16:20,033] [INFO] [timer.py:197:stop] 0/1474, RunningAvgSamplesPerSec=6.325589536878379, CurrSamplesPerSec=5.676665498493852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:16:31,533] [INFO] [timer.py:197:stop] 0/1476, RunningAvgSamplesPerSec=6.3255580968111, CurrSamplesPerSec=5.661283575845675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:16:42,888] [INFO] [timer.py:197:stop] 0/1478, RunningAvgSamplesPerSec=6.325540940915712, CurrSamplesPerSec=5.686236528481549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:16:54,233] [INFO] [logging.py:68:log_dist] [Rank 0] step=740, skipped=6, lr=[9.482222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:16:54,234] [INFO] [timer.py:197:stop] 0/1480, RunningAvgSamplesPerSec=6.325526796553384, CurrSamplesPerSec=5.688321092555874, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:17:05,614] [INFO] [timer.py:197:stop] 0/1482, RunningAvgSamplesPerSec=6.325530489114055, CurrSamplesPerSec=5.709013601268204, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:17:17,036] [INFO] [timer.py:197:stop] 0/1484, RunningAvgSamplesPerSec=6.325499610822527, CurrSamplesPerSec=5.665652541876707, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:17:28,392] [INFO] [timer.py:197:stop] 0/1486, RunningAvgSamplesPerSec=6.325476841720645, CurrSamplesPerSec=5.678753621525439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:17:39,881] [INFO] [timer.py:197:stop] 0/1488, RunningAvgSamplesPerSec=6.325445538504084, CurrSamplesPerSec=5.680460999517352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:17:51,238] [INFO] [timer.py:197:stop] 0/1490, RunningAvgSamplesPerSec=6.32541570068041, CurrSamplesPerSec=5.679860031525407, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:18:02,709] [INFO] [timer.py:197:stop] 0/1492, RunningAvgSamplesPerSec=6.325397274117322, CurrSamplesPerSec=5.680096076587714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:18:14,212] [INFO] [timer.py:197:stop] 0/1494, RunningAvgSamplesPerSec=6.325386249492268, CurrSamplesPerSec=5.668855518860688, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:18:25,564] [INFO] [timer.py:197:stop] 0/1496, RunningAvgSamplesPerSec=6.3253606197968955, CurrSamplesPerSec=5.662164616955378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:18:36,886] [INFO] [timer.py:197:stop] 0/1498, RunningAvgSamplesPerSec=6.325354537006481, CurrSamplesPerSec=5.6885477152923105, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:18:48,265] [INFO] [logging.py:68:log_dist] [Rank 0] step=750, skipped=6, lr=[9.460000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:18:48,266] [INFO] [timer.py:197:stop] 0/1500, RunningAvgSamplesPerSec=6.325339923746025, CurrSamplesPerSec=5.6820378227258015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:18:59,749] [INFO] [timer.py:197:stop] 0/1502, RunningAvgSamplesPerSec=6.3253589293965415, CurrSamplesPerSec=5.704349387156692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:19:11,076] [INFO] [timer.py:197:stop] 0/1504, RunningAvgSamplesPerSec=6.32537638650134, CurrSamplesPerSec=5.70284302711964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0167, 'learning_rate': 9.455555555555557e-06, 'epoch': 5.64} +[2022-12-19 02:19:22,392] [INFO] [timer.py:197:stop] 0/1506, RunningAvgSamplesPerSec=6.325383304052213, CurrSamplesPerSec=5.7053251282720945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:19:33,733] [INFO] [timer.py:197:stop] 0/1508, RunningAvgSamplesPerSec=6.325384086421241, CurrSamplesPerSec=5.697731460462139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:19:45,058] [INFO] [timer.py:197:stop] 0/1510, RunningAvgSamplesPerSec=6.325395211325426, CurrSamplesPerSec=5.700483895265787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:19:56,375] [INFO] [timer.py:197:stop] 0/1512, RunningAvgSamplesPerSec=6.325385845716689, CurrSamplesPerSec=5.689374559205892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:20:07,694] [INFO] [timer.py:197:stop] 0/1514, RunningAvgSamplesPerSec=6.325411697148107, CurrSamplesPerSec=5.711216240723798, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:20:18,967] [INFO] [timer.py:197:stop] 0/1516, RunningAvgSamplesPerSec=6.325465119639401, CurrSamplesPerSec=5.7294550927893315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:20:30,284] [INFO] [timer.py:197:stop] 0/1518, RunningAvgSamplesPerSec=6.325485745584025, CurrSamplesPerSec=5.713452684450866, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:20:41,635] [INFO] [logging.py:68:log_dist] [Rank 0] step=760, skipped=6, lr=[9.437777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:20:41,636] [INFO] [timer.py:197:stop] 0/1520, RunningAvgSamplesPerSec=6.325484895874861, CurrSamplesPerSec=5.702245308780833, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:20:52,950] [INFO] [timer.py:197:stop] 0/1522, RunningAvgSamplesPerSec=6.325485427164552, CurrSamplesPerSec=5.687078122844323, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:21:04,380] [INFO] [timer.py:197:stop] 0/1524, RunningAvgSamplesPerSec=6.3254233996255325, CurrSamplesPerSec=5.605694566585853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:21:15,732] [INFO] [timer.py:197:stop] 0/1526, RunningAvgSamplesPerSec=6.325413754162813, CurrSamplesPerSec=5.679617757201075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:21:27,149] [INFO] [timer.py:197:stop] 0/1528, RunningAvgSamplesPerSec=6.3253569762311175, CurrSamplesPerSec=5.6295462648154535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:21:38,521] [INFO] [timer.py:197:stop] 0/1530, RunningAvgSamplesPerSec=6.325317329565446, CurrSamplesPerSec=5.651307498563991, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:21:49,868] [INFO] [timer.py:197:stop] 0/1532, RunningAvgSamplesPerSec=6.325315798593318, CurrSamplesPerSec=5.692157065480932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:22:01,206] [INFO] [timer.py:197:stop] 0/1534, RunningAvgSamplesPerSec=6.3253135865072885, CurrSamplesPerSec=5.700478810950541, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:22:12,512] [INFO] [timer.py:197:stop] 0/1536, RunningAvgSamplesPerSec=6.3253266475622505, CurrSamplesPerSec=5.704682275228334, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:22:23,812] [INFO] [timer.py:197:stop] 0/1538, RunningAvgSamplesPerSec=6.3253494266803765, CurrSamplesPerSec=5.706959708828725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:22:35,107] [INFO] [logging.py:68:log_dist] [Rank 0] step=770, skipped=6, lr=[9.415555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:22:35,109] [INFO] [timer.py:197:stop] 0/1540, RunningAvgSamplesPerSec=6.325347090700444, CurrSamplesPerSec=5.67853570669287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:22:46,408] [INFO] [timer.py:197:stop] 0/1542, RunningAvgSamplesPerSec=6.325361356934764, CurrSamplesPerSec=5.700227269725416, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:22:57,762] [INFO] [timer.py:197:stop] 0/1544, RunningAvgSamplesPerSec=6.3253531798553215, CurrSamplesPerSec=5.672072906007402, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:23:09,175] [INFO] [timer.py:197:stop] 0/1546, RunningAvgSamplesPerSec=6.325296778479612, CurrSamplesPerSec=5.653853771898734, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:23:20,512] [INFO] [timer.py:197:stop] 0/1548, RunningAvgSamplesPerSec=6.325296549805419, CurrSamplesPerSec=5.685531978164002, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:23:31,873] [INFO] [timer.py:197:stop] 0/1550, RunningAvgSamplesPerSec=6.3252743759482435, CurrSamplesPerSec=5.680035260657811, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:23:43,186] [INFO] [timer.py:197:stop] 0/1552, RunningAvgSamplesPerSec=6.325286683540324, CurrSamplesPerSec=5.696729299633806, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:23:54,504] [INFO] [timer.py:197:stop] 0/1554, RunningAvgSamplesPerSec=6.325283556792846, CurrSamplesPerSec=5.6884356071073165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0154, 'learning_rate': 9.4e-06, 'epoch': 5.82} +[2022-12-19 02:24:05,796] [INFO] [timer.py:197:stop] 0/1556, RunningAvgSamplesPerSec=6.325296142420349, CurrSamplesPerSec=5.687520583816793, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:24:17,110] [INFO] [timer.py:197:stop] 0/1558, RunningAvgSamplesPerSec=6.325307812381287, CurrSamplesPerSec=5.722122756745596, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:24:28,435] [INFO] [logging.py:68:log_dist] [Rank 0] step=780, skipped=6, lr=[9.393333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:24:28,437] [INFO] [timer.py:197:stop] 0/1560, RunningAvgSamplesPerSec=6.32531039592822, CurrSamplesPerSec=5.6997230436373805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:24:39,792] [INFO] [timer.py:197:stop] 0/1562, RunningAvgSamplesPerSec=6.325303100967628, CurrSamplesPerSec=5.681589480135922, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:24:51,121] [INFO] [timer.py:197:stop] 0/1564, RunningAvgSamplesPerSec=6.325310072132719, CurrSamplesPerSec=5.683654030605526, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:25:02,518] [INFO] [timer.py:197:stop] 0/1566, RunningAvgSamplesPerSec=6.325255663678181, CurrSamplesPerSec=5.686427569989322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:25:13,851] [INFO] [timer.py:197:stop] 0/1568, RunningAvgSamplesPerSec=6.325272184808881, CurrSamplesPerSec=5.71290112985152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:25:25,186] [INFO] [timer.py:197:stop] 0/1570, RunningAvgSamplesPerSec=6.325260929817044, CurrSamplesPerSec=5.673587275258184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:25:36,601] [INFO] [timer.py:197:stop] 0/1572, RunningAvgSamplesPerSec=6.325239951309191, CurrSamplesPerSec=5.655233326143737, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:25:48,497] [INFO] [timer.py:197:stop] 0/1574, RunningAvgSamplesPerSec=6.325200312012156, CurrSamplesPerSec=5.666222758598824, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:26:00,277] [INFO] [timer.py:197:stop] 0/1576, RunningAvgSamplesPerSec=6.325160012079392, CurrSamplesPerSec=5.681873775007791, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:26:11,749] [INFO] [timer.py:197:stop] 0/1578, RunningAvgSamplesPerSec=6.325158573437108, CurrSamplesPerSec=5.71841757433845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:26:23,254] [INFO] [logging.py:68:log_dist] [Rank 0] step=790, skipped=6, lr=[9.371111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:26:23,255] [INFO] [timer.py:197:stop] 0/1580, RunningAvgSamplesPerSec=6.325146316912055, CurrSamplesPerSec=5.688942660616274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:26:34,595] [INFO] [timer.py:197:stop] 0/1582, RunningAvgSamplesPerSec=6.325152542720042, CurrSamplesPerSec=5.693014419026782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:26:46,045] [INFO] [timer.py:197:stop] 0/1584, RunningAvgSamplesPerSec=6.325102719512724, CurrSamplesPerSec=5.619288477684616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:26:57,460] [INFO] [timer.py:197:stop] 0/1586, RunningAvgSamplesPerSec=6.325082437519121, CurrSamplesPerSec=5.684310208163963, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:27:08,788] [INFO] [timer.py:197:stop] 0/1588, RunningAvgSamplesPerSec=6.325094838420507, CurrSamplesPerSec=5.706207316712834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:27:20,158] [INFO] [timer.py:197:stop] 0/1590, RunningAvgSamplesPerSec=6.3251074104796, CurrSamplesPerSec=5.709533073780991, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:27:31,680] [INFO] [timer.py:197:stop] 0/1592, RunningAvgSamplesPerSec=6.325076136652226, CurrSamplesPerSec=5.664126150690167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:27:43,015] [INFO] [timer.py:197:stop] 0/1594, RunningAvgSamplesPerSec=6.325066510248264, CurrSamplesPerSec=5.6844806561751735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:27:54,382] [INFO] [timer.py:197:stop] 0/1596, RunningAvgSamplesPerSec=6.325067674676402, CurrSamplesPerSec=5.705342104853738, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:28:05,781] [INFO] [timer.py:197:stop] 0/1598, RunningAvgSamplesPerSec=6.325027181698902, CurrSamplesPerSec=5.651239921216458, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:28:17,146] [INFO] [logging.py:68:log_dist] [Rank 0] step=800, skipped=6, lr=[9.348888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:28:17,148] [INFO] [timer.py:197:stop] 0/1600, RunningAvgSamplesPerSec=6.32501249041159, CurrSamplesPerSec=5.68100558633207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:28:27,665] [INFO] [timer.py:197:stop] 0/1602, RunningAvgSamplesPerSec=6.325645315988089, CurrSamplesPerSec=6.654803377278157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:28:39,158] [INFO] [timer.py:197:stop] 0/1604, RunningAvgSamplesPerSec=6.325621578162325, CurrSamplesPerSec=5.670118321260013, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:28:50,557] [INFO] [timer.py:197:stop] 0/1606, RunningAvgSamplesPerSec=6.325600692649266, CurrSamplesPerSec=5.672696442416951, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0149, 'learning_rate': 9.342222222222223e-06, 'epoch': 6.01} +[2022-12-19 02:29:01,903] [INFO] [timer.py:197:stop] 0/1608, RunningAvgSamplesPerSec=6.325625393612252, CurrSamplesPerSec=5.689381794246565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:29:13,354] [INFO] [timer.py:197:stop] 0/1610, RunningAvgSamplesPerSec=6.325636374601636, CurrSamplesPerSec=5.696881390636447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:29:24,697] [INFO] [timer.py:197:stop] 0/1612, RunningAvgSamplesPerSec=6.325636938945637, CurrSamplesPerSec=5.701277643880583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:29:36,257] [INFO] [timer.py:197:stop] 0/1614, RunningAvgSamplesPerSec=6.325583033474992, CurrSamplesPerSec=5.637129615058002, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:29:47,627] [INFO] [timer.py:197:stop] 0/1616, RunningAvgSamplesPerSec=6.3255813139357375, CurrSamplesPerSec=5.677452147134369, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:29:58,959] [INFO] [timer.py:197:stop] 0/1618, RunningAvgSamplesPerSec=6.3255863262793675, CurrSamplesPerSec=5.705448817108916, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:30:10,421] [INFO] [logging.py:68:log_dist] [Rank 0] step=810, skipped=6, lr=[9.326666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:30:10,423] [INFO] [timer.py:197:stop] 0/1620, RunningAvgSamplesPerSec=6.3255948828713215, CurrSamplesPerSec=5.700696718310479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:30:21,722] [INFO] [timer.py:197:stop] 0/1622, RunningAvgSamplesPerSec=6.325620317873395, CurrSamplesPerSec=5.722774182850378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:30:33,099] [INFO] [timer.py:197:stop] 0/1624, RunningAvgSamplesPerSec=6.325656950420237, CurrSamplesPerSec=5.715720332420203, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:30:44,564] [INFO] [timer.py:197:stop] 0/1626, RunningAvgSamplesPerSec=6.325680769220093, CurrSamplesPerSec=5.704646147840817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:30:55,879] [INFO] [timer.py:197:stop] 0/1628, RunningAvgSamplesPerSec=6.325698925432605, CurrSamplesPerSec=5.698962153104518, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:31:07,227] [INFO] [timer.py:197:stop] 0/1630, RunningAvgSamplesPerSec=6.325683709263267, CurrSamplesPerSec=5.670111374667464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:31:18,597] [INFO] [timer.py:197:stop] 0/1632, RunningAvgSamplesPerSec=6.325685383388617, CurrSamplesPerSec=5.673938889519243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:31:29,945] [INFO] [timer.py:197:stop] 0/1634, RunningAvgSamplesPerSec=6.3256888907837325, CurrSamplesPerSec=5.695137304905255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:31:41,475] [INFO] [timer.py:197:stop] 0/1636, RunningAvgSamplesPerSec=6.325681431225448, CurrSamplesPerSec=5.6871410175103945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:31:52,726] [INFO] [timer.py:197:stop] 0/1638, RunningAvgSamplesPerSec=6.3257232715578295, CurrSamplesPerSec=5.726327185825193, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:32:04,025] [INFO] [logging.py:68:log_dist] [Rank 0] step=820, skipped=6, lr=[9.304444444444444e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:32:04,027] [INFO] [timer.py:197:stop] 0/1640, RunningAvgSamplesPerSec=6.325740082961842, CurrSamplesPerSec=5.7176721446971275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:32:15,501] [INFO] [timer.py:197:stop] 0/1642, RunningAvgSamplesPerSec=6.325819022242892, CurrSamplesPerSec=5.753343860876287, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:32:26,796] [INFO] [timer.py:197:stop] 0/1644, RunningAvgSamplesPerSec=6.325840567805974, CurrSamplesPerSec=5.710349266393622, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:32:38,126] [INFO] [timer.py:197:stop] 0/1646, RunningAvgSamplesPerSec=6.325834868772187, CurrSamplesPerSec=5.6911592776455855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:32:49,590] [INFO] [timer.py:197:stop] 0/1648, RunningAvgSamplesPerSec=6.325838228256779, CurrSamplesPerSec=5.704953608956427, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:33:00,960] [INFO] [timer.py:197:stop] 0/1650, RunningAvgSamplesPerSec=6.325823450903929, CurrSamplesPerSec=5.680258819690628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:33:12,573] [INFO] [timer.py:197:stop] 0/1652, RunningAvgSamplesPerSec=6.325804218173235, CurrSamplesPerSec=5.679047484656009, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:33:23,893] [INFO] [timer.py:197:stop] 0/1654, RunningAvgSamplesPerSec=6.32579980520833, CurrSamplesPerSec=5.700284645317121, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:33:35,313] [INFO] [timer.py:197:stop] 0/1656, RunningAvgSamplesPerSec=6.325790308407279, CurrSamplesPerSec=5.678851172062457, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0107, 'learning_rate': 9.286666666666667e-06, 'epoch': 6.2} +[2022-12-19 02:33:46,832] [INFO] [timer.py:197:stop] 0/1658, RunningAvgSamplesPerSec=6.325774841378826, CurrSamplesPerSec=5.688787376425617, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:33:58,384] [INFO] [logging.py:68:log_dist] [Rank 0] step=830, skipped=6, lr=[9.282222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:33:58,386] [INFO] [timer.py:197:stop] 0/1660, RunningAvgSamplesPerSec=6.325765133165544, CurrSamplesPerSec=5.691613234976137, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:34:09,852] [INFO] [timer.py:197:stop] 0/1662, RunningAvgSamplesPerSec=6.325790063809963, CurrSamplesPerSec=5.723681058742841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:34:21,445] [INFO] [timer.py:197:stop] 0/1664, RunningAvgSamplesPerSec=6.325726072017138, CurrSamplesPerSec=5.675937394475658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:34:32,936] [INFO] [timer.py:197:stop] 0/1666, RunningAvgSamplesPerSec=6.325747487353222, CurrSamplesPerSec=5.727400154986362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:34:44,481] [INFO] [timer.py:197:stop] 0/1668, RunningAvgSamplesPerSec=6.325736716605278, CurrSamplesPerSec=5.686584411893967, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:34:55,846] [INFO] [timer.py:197:stop] 0/1670, RunningAvgSamplesPerSec=6.325709496133252, CurrSamplesPerSec=5.670677218422788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:35:07,233] [INFO] [timer.py:197:stop] 0/1672, RunningAvgSamplesPerSec=6.325692081907887, CurrSamplesPerSec=5.669443623098537, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:35:18,607] [INFO] [timer.py:197:stop] 0/1674, RunningAvgSamplesPerSec=6.325669740927831, CurrSamplesPerSec=5.670831755082847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:35:30,032] [INFO] [timer.py:197:stop] 0/1676, RunningAvgSamplesPerSec=6.32565147997511, CurrSamplesPerSec=5.685375916794034, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:35:41,367] [INFO] [timer.py:197:stop] 0/1678, RunningAvgSamplesPerSec=6.3256527981282185, CurrSamplesPerSec=5.694455431858697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:35:52,673] [INFO] [logging.py:68:log_dist] [Rank 0] step=840, skipped=6, lr=[9.260000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:35:52,675] [INFO] [timer.py:197:stop] 0/1680, RunningAvgSamplesPerSec=6.3256940710872165, CurrSamplesPerSec=5.719125912553076, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:36:03,996] [INFO] [timer.py:197:stop] 0/1682, RunningAvgSamplesPerSec=6.325705088917455, CurrSamplesPerSec=5.707339741038364, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:36:15,447] [INFO] [timer.py:197:stop] 0/1684, RunningAvgSamplesPerSec=6.325731287157256, CurrSamplesPerSec=5.697480645345056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:36:26,815] [INFO] [timer.py:197:stop] 0/1686, RunningAvgSamplesPerSec=6.325751333721884, CurrSamplesPerSec=5.714498693268473, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:36:38,126] [INFO] [timer.py:197:stop] 0/1688, RunningAvgSamplesPerSec=6.325778904785094, CurrSamplesPerSec=5.72610511603857, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:36:49,425] [INFO] [timer.py:197:stop] 0/1690, RunningAvgSamplesPerSec=6.325807550331122, CurrSamplesPerSec=5.7276685212131735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:37:00,693] [INFO] [timer.py:197:stop] 0/1692, RunningAvgSamplesPerSec=6.325853586564008, CurrSamplesPerSec=5.729620432047673, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:37:12,023] [INFO] [timer.py:197:stop] 0/1694, RunningAvgSamplesPerSec=6.325859705877085, CurrSamplesPerSec=5.695849314014534, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:37:23,290] [INFO] [timer.py:197:stop] 0/1696, RunningAvgSamplesPerSec=6.325888818823823, CurrSamplesPerSec=5.716556555853943, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:37:34,576] [INFO] [timer.py:197:stop] 0/1698, RunningAvgSamplesPerSec=6.3258855149655515, CurrSamplesPerSec=5.69654409325819, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:37:45,931] [INFO] [logging.py:68:log_dist] [Rank 0] step=850, skipped=6, lr=[9.237777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:37:45,933] [INFO] [timer.py:197:stop] 0/1700, RunningAvgSamplesPerSec=6.32589653935007, CurrSamplesPerSec=5.69615606933963, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:37:57,348] [INFO] [timer.py:197:stop] 0/1702, RunningAvgSamplesPerSec=6.325929302907298, CurrSamplesPerSec=5.716632278410412, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:38:08,900] [INFO] [timer.py:197:stop] 0/1704, RunningAvgSamplesPerSec=6.325942865495223, CurrSamplesPerSec=5.695347312163867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:38:20,206] [INFO] [timer.py:197:stop] 0/1706, RunningAvgSamplesPerSec=6.325962562735163, CurrSamplesPerSec=5.703012165144726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0107, 'learning_rate': 9.231111111111111e-06, 'epoch': 6.39} +[2022-12-19 02:38:31,605] [INFO] [timer.py:197:stop] 0/1708, RunningAvgSamplesPerSec=6.325945546495006, CurrSamplesPerSec=5.682345497873678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:38:43,024] [INFO] [timer.py:197:stop] 0/1710, RunningAvgSamplesPerSec=6.3259615202519575, CurrSamplesPerSec=5.694182438465663, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:38:54,319] [INFO] [timer.py:197:stop] 0/1712, RunningAvgSamplesPerSec=6.326003940697372, CurrSamplesPerSec=5.730686805961176, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:39:05,779] [INFO] [timer.py:197:stop] 0/1714, RunningAvgSamplesPerSec=6.326035083565533, CurrSamplesPerSec=5.728669372586125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:39:17,081] [INFO] [timer.py:197:stop] 0/1716, RunningAvgSamplesPerSec=6.326062416380829, CurrSamplesPerSec=5.712293035463973, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:39:28,435] [INFO] [timer.py:197:stop] 0/1718, RunningAvgSamplesPerSec=6.326086066699801, CurrSamplesPerSec=5.708741395591852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:39:39,741] [INFO] [logging.py:68:log_dist] [Rank 0] step=860, skipped=6, lr=[9.215555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:39:39,742] [INFO] [timer.py:197:stop] 0/1720, RunningAvgSamplesPerSec=6.326086014131042, CurrSamplesPerSec=5.68582293025891, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:39:51,073] [INFO] [timer.py:197:stop] 0/1722, RunningAvgSamplesPerSec=6.32609881723683, CurrSamplesPerSec=5.704984162896505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:40:02,377] [INFO] [timer.py:197:stop] 0/1724, RunningAvgSamplesPerSec=6.326118499130401, CurrSamplesPerSec=5.690084645575323, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:40:13,672] [INFO] [timer.py:197:stop] 0/1726, RunningAvgSamplesPerSec=6.326140877664618, CurrSamplesPerSec=5.721628796504794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:40:24,889] [INFO] [timer.py:197:stop] 0/1728, RunningAvgSamplesPerSec=6.32619310797244, CurrSamplesPerSec=5.738877753763001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:40:36,201] [INFO] [timer.py:197:stop] 0/1730, RunningAvgSamplesPerSec=6.326237576628503, CurrSamplesPerSec=5.728241266578177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:40:47,473] [INFO] [timer.py:197:stop] 0/1732, RunningAvgSamplesPerSec=6.326262172904851, CurrSamplesPerSec=5.730709806237929, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:40:58,837] [INFO] [timer.py:197:stop] 0/1734, RunningAvgSamplesPerSec=6.326255019400872, CurrSamplesPerSec=5.733883656497388, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:41:10,136] [INFO] [timer.py:197:stop] 0/1736, RunningAvgSamplesPerSec=6.326269407190393, CurrSamplesPerSec=5.707664482986645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:41:21,517] [INFO] [timer.py:197:stop] 0/1738, RunningAvgSamplesPerSec=6.326293877908873, CurrSamplesPerSec=5.706402613884454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:41:32,827] [INFO] [logging.py:68:log_dist] [Rank 0] step=870, skipped=6, lr=[9.193333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:41:32,828] [INFO] [timer.py:197:stop] 0/1740, RunningAvgSamplesPerSec=6.3262987345584145, CurrSamplesPerSec=5.704881347765542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:41:44,314] [INFO] [timer.py:197:stop] 0/1742, RunningAvgSamplesPerSec=6.32632362263264, CurrSamplesPerSec=5.731097413139867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:41:55,637] [INFO] [timer.py:197:stop] 0/1744, RunningAvgSamplesPerSec=6.32633615446047, CurrSamplesPerSec=5.703901637606746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:42:07,101] [INFO] [timer.py:197:stop] 0/1746, RunningAvgSamplesPerSec=6.326362263159016, CurrSamplesPerSec=5.713981479183356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:42:18,482] [INFO] [timer.py:197:stop] 0/1748, RunningAvgSamplesPerSec=6.326385100761066, CurrSamplesPerSec=5.709265918559529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:42:29,775] [INFO] [timer.py:197:stop] 0/1750, RunningAvgSamplesPerSec=6.3263899011333615, CurrSamplesPerSec=5.690672578757388, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:42:41,118] [INFO] [timer.py:197:stop] 0/1752, RunningAvgSamplesPerSec=6.326382952351564, CurrSamplesPerSec=5.673505973662093, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:42:52,555] [INFO] [timer.py:197:stop] 0/1754, RunningAvgSamplesPerSec=6.326335881692181, CurrSamplesPerSec=5.618049387935346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:43:03,893] [INFO] [timer.py:197:stop] 0/1756, RunningAvgSamplesPerSec=6.326334393712957, CurrSamplesPerSec=5.6876541068041035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0108, 'learning_rate': 9.175555555555557e-06, 'epoch': 6.58} +[2022-12-19 02:43:15,226] [INFO] [timer.py:197:stop] 0/1758, RunningAvgSamplesPerSec=6.3263368233047546, CurrSamplesPerSec=5.693883625263997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:43:26,560] [INFO] [logging.py:68:log_dist] [Rank 0] step=880, skipped=6, lr=[9.171111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:43:26,562] [INFO] [timer.py:197:stop] 0/1760, RunningAvgSamplesPerSec=6.3263320887501004, CurrSamplesPerSec=5.6971232054522165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:43:37,884] [INFO] [timer.py:197:stop] 0/1762, RunningAvgSamplesPerSec=6.326337614422194, CurrSamplesPerSec=5.689881056495939, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:43:49,200] [INFO] [timer.py:197:stop] 0/1764, RunningAvgSamplesPerSec=6.326341517798706, CurrSamplesPerSec=5.6905075495471955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:44:00,621] [INFO] [timer.py:197:stop] 0/1766, RunningAvgSamplesPerSec=6.326333758352931, CurrSamplesPerSec=5.678917729401463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:44:12,101] [INFO] [timer.py:197:stop] 0/1768, RunningAvgSamplesPerSec=6.32628541309422, CurrSamplesPerSec=5.621093756380219, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:44:23,452] [INFO] [timer.py:197:stop] 0/1770, RunningAvgSamplesPerSec=6.3262677174406745, CurrSamplesPerSec=5.67610229979496, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:44:34,782] [INFO] [timer.py:197:stop] 0/1772, RunningAvgSamplesPerSec=6.326270866523503, CurrSamplesPerSec=5.692841768640046, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:44:46,153] [INFO] [timer.py:197:stop] 0/1774, RunningAvgSamplesPerSec=6.326288454194321, CurrSamplesPerSec=5.708204830629907, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:44:57,488] [INFO] [timer.py:197:stop] 0/1776, RunningAvgSamplesPerSec=6.326286738967611, CurrSamplesPerSec=5.691912051303222, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:45:08,815] [INFO] [timer.py:197:stop] 0/1778, RunningAvgSamplesPerSec=6.326285530290613, CurrSamplesPerSec=5.703175254718095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:45:20,177] [INFO] [logging.py:68:log_dist] [Rank 0] step=890, skipped=6, lr=[9.14888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:45:20,178] [INFO] [timer.py:197:stop] 0/1780, RunningAvgSamplesPerSec=6.3263004923385475, CurrSamplesPerSec=5.714217936093594, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:45:31,559] [INFO] [timer.py:197:stop] 0/1782, RunningAvgSamplesPerSec=6.326297856203596, CurrSamplesPerSec=5.6967970020979015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:45:43,019] [INFO] [timer.py:197:stop] 0/1784, RunningAvgSamplesPerSec=6.326290658807318, CurrSamplesPerSec=5.692538508838428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:45:54,358] [INFO] [timer.py:197:stop] 0/1786, RunningAvgSamplesPerSec=6.3262856842098865, CurrSamplesPerSec=5.697424777289617, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:46:05,658] [INFO] [timer.py:197:stop] 0/1788, RunningAvgSamplesPerSec=6.326306571710764, CurrSamplesPerSec=5.718891486053244, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:46:17,122] [INFO] [timer.py:197:stop] 0/1790, RunningAvgSamplesPerSec=6.326285683438104, CurrSamplesPerSec=5.663022994110601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:46:28,493] [INFO] [timer.py:197:stop] 0/1792, RunningAvgSamplesPerSec=6.326273363068191, CurrSamplesPerSec=5.679099869012911, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:46:39,834] [INFO] [timer.py:197:stop] 0/1794, RunningAvgSamplesPerSec=6.326264734650878, CurrSamplesPerSec=5.688326637364776, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:46:51,160] [INFO] [timer.py:197:stop] 0/1796, RunningAvgSamplesPerSec=6.326274303607092, CurrSamplesPerSec=5.713274171438841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:47:02,510] [INFO] [timer.py:197:stop] 0/1798, RunningAvgSamplesPerSec=6.326277232252197, CurrSamplesPerSec=5.703496856883192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:47:13,952] [INFO] [logging.py:68:log_dist] [Rank 0] step=900, skipped=6, lr=[9.126666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:47:13,954] [INFO] [timer.py:197:stop] 0/1800, RunningAvgSamplesPerSec=6.326290437167872, CurrSamplesPerSec=5.705799297939293, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:47:25,315] [INFO] [timer.py:197:stop] 0/1802, RunningAvgSamplesPerSec=6.326289168268624, CurrSamplesPerSec=5.698488635203304, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:47:36,792] [INFO] [timer.py:197:stop] 0/1804, RunningAvgSamplesPerSec=6.32627364734703, CurrSamplesPerSec=5.67324385783393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:47:48,200] [INFO] [timer.py:197:stop] 0/1806, RunningAvgSamplesPerSec=6.326275620745158, CurrSamplesPerSec=5.6986081565692555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0123, 'learning_rate': 9.12e-06, 'epoch': 6.76} +[2022-12-19 02:47:59,543] [INFO] [timer.py:197:stop] 0/1808, RunningAvgSamplesPerSec=6.326266962597003, CurrSamplesPerSec=5.709048812640666, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:48:11,019] [INFO] [timer.py:197:stop] 0/1810, RunningAvgSamplesPerSec=6.326251483715628, CurrSamplesPerSec=5.674714945601174, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:48:22,345] [INFO] [timer.py:197:stop] 0/1812, RunningAvgSamplesPerSec=6.326247948283754, CurrSamplesPerSec=5.681360044615221, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:48:33,717] [INFO] [timer.py:197:stop] 0/1814, RunningAvgSamplesPerSec=6.3262437806871485, CurrSamplesPerSec=5.697264918474079, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:48:45,242] [INFO] [timer.py:197:stop] 0/1816, RunningAvgSamplesPerSec=6.326224041904885, CurrSamplesPerSec=5.669574622213104, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:48:56,523] [INFO] [timer.py:197:stop] 0/1818, RunningAvgSamplesPerSec=6.32624519155112, CurrSamplesPerSec=5.709860251548659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:49:07,836] [INFO] [logging.py:68:log_dist] [Rank 0] step=910, skipped=6, lr=[9.104444444444444e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:49:07,838] [INFO] [timer.py:197:stop] 0/1820, RunningAvgSamplesPerSec=6.326258283689595, CurrSamplesPerSec=5.690251580192909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:49:19,161] [INFO] [timer.py:197:stop] 0/1822, RunningAvgSamplesPerSec=6.326272794780122, CurrSamplesPerSec=5.680835827541261, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:49:30,458] [INFO] [timer.py:197:stop] 0/1824, RunningAvgSamplesPerSec=6.326289842706597, CurrSamplesPerSec=5.686278445695439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:49:41,771] [INFO] [timer.py:197:stop] 0/1826, RunningAvgSamplesPerSec=6.3263006088827645, CurrSamplesPerSec=5.69110522269296, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:49:53,109] [INFO] [timer.py:197:stop] 0/1828, RunningAvgSamplesPerSec=6.326305863765418, CurrSamplesPerSec=5.686967759321349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:50:04,455] [INFO] [timer.py:197:stop] 0/1830, RunningAvgSamplesPerSec=6.326295565006565, CurrSamplesPerSec=5.687589513673625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:50:15,833] [INFO] [timer.py:197:stop] 0/1832, RunningAvgSamplesPerSec=6.326279534779694, CurrSamplesPerSec=5.6756726538654725, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:50:27,102] [INFO] [timer.py:197:stop] 0/1834, RunningAvgSamplesPerSec=6.32628702156709, CurrSamplesPerSec=5.707074732393959, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:50:38,443] [INFO] [timer.py:197:stop] 0/1836, RunningAvgSamplesPerSec=6.326280456792154, CurrSamplesPerSec=5.6858901328649205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:50:49,764] [INFO] [timer.py:197:stop] 0/1838, RunningAvgSamplesPerSec=6.326293705340914, CurrSamplesPerSec=5.700904230026162, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:51:01,110] [INFO] [logging.py:68:log_dist] [Rank 0] step=920, skipped=6, lr=[9.082222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:51:01,112] [INFO] [timer.py:197:stop] 0/1840, RunningAvgSamplesPerSec=6.326294305995962, CurrSamplesPerSec=5.69232943292039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:51:12,423] [INFO] [timer.py:197:stop] 0/1842, RunningAvgSamplesPerSec=6.326313116856321, CurrSamplesPerSec=5.705744479411721, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:51:23,741] [INFO] [timer.py:197:stop] 0/1844, RunningAvgSamplesPerSec=6.32631566493016, CurrSamplesPerSec=5.700933772012223, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:51:35,094] [INFO] [timer.py:197:stop] 0/1846, RunningAvgSamplesPerSec=6.326311378399368, CurrSamplesPerSec=5.69037003251689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:51:46,431] [INFO] [timer.py:197:stop] 0/1848, RunningAvgSamplesPerSec=6.326334065030902, CurrSamplesPerSec=5.71306527187534, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:51:57,727] [INFO] [timer.py:197:stop] 0/1850, RunningAvgSamplesPerSec=6.326353006935345, CurrSamplesPerSec=5.700662094138969, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:52:09,037] [INFO] [timer.py:197:stop] 0/1852, RunningAvgSamplesPerSec=6.326371952645956, CurrSamplesPerSec=5.706567595824296, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:52:20,373] [INFO] [timer.py:197:stop] 0/1854, RunningAvgSamplesPerSec=6.326381012581383, CurrSamplesPerSec=5.707263051024697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:52:31,697] [INFO] [timer.py:197:stop] 0/1856, RunningAvgSamplesPerSec=6.326385405224525, CurrSamplesPerSec=5.7047732018864235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0113, 'learning_rate': 9.064444444444447e-06, 'epoch': 6.95} +[2022-12-19 02:52:43,074] [INFO] [timer.py:197:stop] 0/1858, RunningAvgSamplesPerSec=6.326360052318767, CurrSamplesPerSec=5.62437144846066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:52:54,427] [INFO] [logging.py:68:log_dist] [Rank 0] step=930, skipped=6, lr=[9.060000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:52:54,429] [INFO] [timer.py:197:stop] 0/1860, RunningAvgSamplesPerSec=6.326355866282421, CurrSamplesPerSec=5.6719077553516275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:53:05,747] [INFO] [timer.py:197:stop] 0/1862, RunningAvgSamplesPerSec=6.326375388595798, CurrSamplesPerSec=5.70627912629274, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:53:17,021] [INFO] [timer.py:197:stop] 0/1864, RunningAvgSamplesPerSec=6.326393589824482, CurrSamplesPerSec=5.701821627330498, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:53:28,389] [INFO] [timer.py:197:stop] 0/1866, RunningAvgSamplesPerSec=6.326390856696775, CurrSamplesPerSec=5.694816161501644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:53:39,718] [INFO] [timer.py:197:stop] 0/1868, RunningAvgSamplesPerSec=6.326391776659984, CurrSamplesPerSec=5.700212018180171, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:53:50,140] [INFO] [timer.py:197:stop] 0/1870, RunningAvgSamplesPerSec=6.326924106132798, CurrSamplesPerSec=5.671705944240296, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:54:01,529] [INFO] [timer.py:197:stop] 0/1872, RunningAvgSamplesPerSec=6.326892313786539, CurrSamplesPerSec=5.644772119484631, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:54:12,878] [INFO] [timer.py:197:stop] 0/1874, RunningAvgSamplesPerSec=6.326871510060623, CurrSamplesPerSec=5.669993764253113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:54:24,195] [INFO] [timer.py:197:stop] 0/1876, RunningAvgSamplesPerSec=6.32687887416701, CurrSamplesPerSec=5.675905710708204, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:54:35,525] [INFO] [timer.py:197:stop] 0/1878, RunningAvgSamplesPerSec=6.326893190924998, CurrSamplesPerSec=5.704296293525905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:54:46,865] [INFO] [logging.py:68:log_dist] [Rank 0] step=940, skipped=6, lr=[9.037777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:54:46,867] [INFO] [timer.py:197:stop] 0/1880, RunningAvgSamplesPerSec=6.326893723872482, CurrSamplesPerSec=5.686018520754404, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:54:58,219] [INFO] [timer.py:197:stop] 0/1882, RunningAvgSamplesPerSec=6.326882888179903, CurrSamplesPerSec=5.674984155904965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:55:09,747] [INFO] [timer.py:197:stop] 0/1884, RunningAvgSamplesPerSec=6.326854854129708, CurrSamplesPerSec=5.647342920570599, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:55:21,604] [INFO] [timer.py:197:stop] 0/1886, RunningAvgSamplesPerSec=6.3268220619883175, CurrSamplesPerSec=5.651489537135508, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:55:33,286] [INFO] [timer.py:197:stop] 0/1888, RunningAvgSamplesPerSec=6.326784679478798, CurrSamplesPerSec=5.65706368557152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:55:44,857] [INFO] [timer.py:197:stop] 0/1890, RunningAvgSamplesPerSec=6.326785351859433, CurrSamplesPerSec=5.689782161845206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:55:56,230] [INFO] [timer.py:197:stop] 0/1892, RunningAvgSamplesPerSec=6.326768671987495, CurrSamplesPerSec=5.67753668401372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:56:07,707] [INFO] [timer.py:197:stop] 0/1894, RunningAvgSamplesPerSec=6.32673878130999, CurrSamplesPerSec=5.653865203876641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:56:19,060] [INFO] [timer.py:197:stop] 0/1896, RunningAvgSamplesPerSec=6.326736501509452, CurrSamplesPerSec=5.696846329192205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:56:30,555] [INFO] [timer.py:197:stop] 0/1898, RunningAvgSamplesPerSec=6.32674735316769, CurrSamplesPerSec=5.70047929517065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:56:41,901] [INFO] [logging.py:68:log_dist] [Rank 0] step=950, skipped=6, lr=[9.015555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:56:41,902] [INFO] [timer.py:197:stop] 0/1900, RunningAvgSamplesPerSec=6.3267513569449605, CurrSamplesPerSec=5.689371906362256, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:56:53,290] [INFO] [timer.py:197:stop] 0/1902, RunningAvgSamplesPerSec=6.3267480362392785, CurrSamplesPerSec=5.687192346372233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:57:04,582] [INFO] [timer.py:197:stop] 0/1904, RunningAvgSamplesPerSec=6.326770316587786, CurrSamplesPerSec=5.708168658612521, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:57:15,866] [INFO] [timer.py:197:stop] 0/1906, RunningAvgSamplesPerSec=6.326787952251769, CurrSamplesPerSec=5.702025587327711, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0086, 'learning_rate': 9.008888888888889e-06, 'epoch': 7.14} +[2022-12-19 02:57:27,382] [INFO] [timer.py:197:stop] 0/1908, RunningAvgSamplesPerSec=6.326766090302302, CurrSamplesPerSec=5.660147153665728, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:57:38,708] [INFO] [timer.py:197:stop] 0/1910, RunningAvgSamplesPerSec=6.326780368445381, CurrSamplesPerSec=5.694250080406871, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:57:50,025] [INFO] [timer.py:197:stop] 0/1912, RunningAvgSamplesPerSec=6.326786052597571, CurrSamplesPerSec=5.697235656333974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:58:01,406] [INFO] [timer.py:197:stop] 0/1914, RunningAvgSamplesPerSec=6.3267708296819265, CurrSamplesPerSec=5.665626712628445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:58:12,849] [INFO] [timer.py:197:stop] 0/1916, RunningAvgSamplesPerSec=6.326775654447717, CurrSamplesPerSec=5.69344597005626, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:58:24,285] [INFO] [timer.py:197:stop] 0/1918, RunningAvgSamplesPerSec=6.3267440628330505, CurrSamplesPerSec=5.641554793761365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:58:35,776] [INFO] [logging.py:68:log_dist] [Rank 0] step=960, skipped=6, lr=[8.993333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-19 02:58:35,777] [INFO] [timer.py:197:stop] 0/1920, RunningAvgSamplesPerSec=6.3267433567481985, CurrSamplesPerSec=5.6891596868866126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:58:47,244] [INFO] [timer.py:197:stop] 0/1922, RunningAvgSamplesPerSec=6.326729414038707, CurrSamplesPerSec=5.666347628416906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:58:58,611] [INFO] [timer.py:197:stop] 0/1924, RunningAvgSamplesPerSec=6.326715766360123, CurrSamplesPerSec=5.680182374944682, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:59:10,086] [INFO] [timer.py:197:stop] 0/1926, RunningAvgSamplesPerSec=6.326698267238547, CurrSamplesPerSec=5.690238311909788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:59:21,478] [INFO] [timer.py:197:stop] 0/1928, RunningAvgSamplesPerSec=6.326682552091262, CurrSamplesPerSec=5.671927409932752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:59:33,005] [INFO] [timer.py:197:stop] 0/1930, RunningAvgSamplesPerSec=6.3266662201263, CurrSamplesPerSec=5.673839588852302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:59:44,394] [INFO] [timer.py:197:stop] 0/1932, RunningAvgSamplesPerSec=6.326647452117153, CurrSamplesPerSec=5.67752227414039, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 02:59:55,928] [INFO] [timer.py:197:stop] 0/1934, RunningAvgSamplesPerSec=6.326632547130804, CurrSamplesPerSec=5.696782010699283, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:00:07,308] [INFO] [timer.py:197:stop] 0/1936, RunningAvgSamplesPerSec=6.326612853608152, CurrSamplesPerSec=5.679080645283451, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:00:18,847] [INFO] [timer.py:197:stop] 0/1938, RunningAvgSamplesPerSec=6.326582143690067, CurrSamplesPerSec=5.660932812000199, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:00:30,250] [INFO] [logging.py:68:log_dist] [Rank 0] step=970, skipped=6, lr=[8.971111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-19 03:00:30,252] [INFO] [timer.py:197:stop] 0/1940, RunningAvgSamplesPerSec=6.3265648412872775, CurrSamplesPerSec=5.666948850761994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:00:41,691] [INFO] [timer.py:197:stop] 0/1942, RunningAvgSamplesPerSec=6.326569243293468, CurrSamplesPerSec=5.711902863153607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:00:53,099] [INFO] [timer.py:197:stop] 0/1944, RunningAvgSamplesPerSec=6.326572842640973, CurrSamplesPerSec=5.695367371204844, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:01:04,422] [INFO] [timer.py:197:stop] 0/1946, RunningAvgSamplesPerSec=6.32657294453668, CurrSamplesPerSec=5.70359210849502, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:01:15,862] [INFO] [timer.py:197:stop] 0/1948, RunningAvgSamplesPerSec=6.326541206508122, CurrSamplesPerSec=5.651977410741608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:01:27,324] [INFO] [timer.py:197:stop] 0/1950, RunningAvgSamplesPerSec=6.3265403664385085, CurrSamplesPerSec=5.702550815146744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:01:38,645] [INFO] [timer.py:197:stop] 0/1952, RunningAvgSamplesPerSec=6.326553403004653, CurrSamplesPerSec=5.705305484067998, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:01:49,928] [INFO] [timer.py:197:stop] 0/1954, RunningAvgSamplesPerSec=6.326570279442877, CurrSamplesPerSec=5.703029370291108, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:02:01,286] [INFO] [timer.py:197:stop] 0/1956, RunningAvgSamplesPerSec=6.32656451884155, CurrSamplesPerSec=5.677128432591143, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0077, 'learning_rate': 8.953333333333335e-06, 'epoch': 7.33} +[2022-12-19 03:02:12,657] [INFO] [timer.py:197:stop] 0/1958, RunningAvgSamplesPerSec=6.326569271199113, CurrSamplesPerSec=5.707258197295713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:02:24,099] [INFO] [logging.py:68:log_dist] [Rank 0] step=980, skipped=6, lr=[8.94888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 03:02:24,101] [INFO] [timer.py:197:stop] 0/1960, RunningAvgSamplesPerSec=6.326584482872275, CurrSamplesPerSec=5.7166067126927596, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:02:35,402] [INFO] [timer.py:197:stop] 0/1962, RunningAvgSamplesPerSec=6.326595460253956, CurrSamplesPerSec=5.7105045151488145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:02:46,761] [INFO] [timer.py:197:stop] 0/1964, RunningAvgSamplesPerSec=6.326610964543361, CurrSamplesPerSec=5.704105988860188, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:02:58,277] [INFO] [timer.py:197:stop] 0/1966, RunningAvgSamplesPerSec=6.326613509952827, CurrSamplesPerSec=5.690958265981202, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:03:09,639] [INFO] [timer.py:197:stop] 0/1968, RunningAvgSamplesPerSec=6.326608995818242, CurrSamplesPerSec=5.677601049007638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:03:20,990] [INFO] [timer.py:197:stop] 0/1970, RunningAvgSamplesPerSec=6.326593487791063, CurrSamplesPerSec=5.678826663976843, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:03:32,292] [INFO] [timer.py:197:stop] 0/1972, RunningAvgSamplesPerSec=6.326593152000837, CurrSamplesPerSec=5.699780167028214, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:03:43,656] [INFO] [timer.py:197:stop] 0/1974, RunningAvgSamplesPerSec=6.326590465498463, CurrSamplesPerSec=5.681751106218245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:03:54,964] [INFO] [timer.py:197:stop] 0/1976, RunningAvgSamplesPerSec=6.32659258352864, CurrSamplesPerSec=5.6757112953345175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:04:06,280] [INFO] [timer.py:197:stop] 0/1978, RunningAvgSamplesPerSec=6.3266049487473435, CurrSamplesPerSec=5.706643781880916, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:04:17,582] [INFO] [logging.py:68:log_dist] [Rank 0] step=990, skipped=6, lr=[8.926666666666669e-06], mom=[[0.9, 0.999]] +[2022-12-19 03:04:17,584] [INFO] [timer.py:197:stop] 0/1980, RunningAvgSamplesPerSec=6.3266198016964, CurrSamplesPerSec=5.708158219793414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:04:28,954] [INFO] [timer.py:197:stop] 0/1982, RunningAvgSamplesPerSec=6.326592497307985, CurrSamplesPerSec=5.714028914957275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:04:40,380] [INFO] [timer.py:197:stop] 0/1984, RunningAvgSamplesPerSec=6.326541972860564, CurrSamplesPerSec=5.600063420060266, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:04:51,717] [INFO] [timer.py:197:stop] 0/1986, RunningAvgSamplesPerSec=6.326534796318091, CurrSamplesPerSec=5.69550633832067, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:05:03,067] [INFO] [timer.py:197:stop] 0/1988, RunningAvgSamplesPerSec=6.326535161708194, CurrSamplesPerSec=5.678836274965597, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:05:14,353] [INFO] [timer.py:197:stop] 0/1990, RunningAvgSamplesPerSec=6.32654844691735, CurrSamplesPerSec=5.72301112403001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:05:25,711] [INFO] [timer.py:197:stop] 0/1992, RunningAvgSamplesPerSec=6.326516071676392, CurrSamplesPerSec=5.706936898799184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:05:36,991] [INFO] [timer.py:197:stop] 0/1994, RunningAvgSamplesPerSec=6.326542117678622, CurrSamplesPerSec=5.71591116943807, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:05:48,312] [INFO] [timer.py:197:stop] 0/1996, RunningAvgSamplesPerSec=6.326543655886002, CurrSamplesPerSec=5.698899722633566, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:05:59,647] [INFO] [timer.py:197:stop] 0/1998, RunningAvgSamplesPerSec=6.326541233446142, CurrSamplesPerSec=5.6959965234396215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:06:11,045] [INFO] [logging.py:68:log_dist] [Rank 0] step=1000, skipped=6, lr=[8.904444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-19 03:06:11,047] [INFO] [timer.py:197:stop] 0/2000, RunningAvgSamplesPerSec=6.3265184925558104, CurrSamplesPerSec=5.647131924107018, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:06:22,401] [INFO] [timer.py:197:stop] 0/2002, RunningAvgSamplesPerSec=6.3265128000256885, CurrSamplesPerSec=5.694423782610695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:06:33,753] [INFO] [timer.py:197:stop] 0/2004, RunningAvgSamplesPerSec=6.326514866019954, CurrSamplesPerSec=5.705248977420315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 03:06:45,054] [INFO] [timer.py:197:stop] 0/2006, RunningAvgSamplesPerSec=6.326516704682161, CurrSamplesPerSec=5.684222339947914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0076, 'learning_rate': 8.897777777777779e-06, 'epoch': 7.52} +{'eval_loss': 0.2607421875, 'eval_wer': 16.033204862140526, 'eval_runtime': 1390.3082, 'eval_samples_per_second': 3.331, 'eval_steps_per_second': 0.416, 'epoch': 7.52} +[2022-12-19 03:30:04,054] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step1003 is begin to save! +[2022-12-19 03:30:04,065] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-1000/global_step1003/mp_rank_00_model_states.pt +[2022-12-19 03:30:04,065] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1003/mp_rank_00_model_states.pt... +[2022-12-19 03:30:07,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1003/mp_rank_00_model_states.pt. +[2022-12-19 03:30:07,700] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-1000/global_step1003/zero_pp_rank_0_mp_rank_00_optim_states.pt... +[2022-12-19 03:30:24,042] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-1000/global_step1003/zero_pp_rank_0_mp_rank_00_optim_states.pt. +[2022-12-19 03:30:24,042] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-1000/global_step1003/zero_pp_rank_0_mp_rank_00_optim_states.pt +[2022-12-19 03:30:24,042] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1003 is ready now!