diff --git "a/log/debug_0.log" "b/log/debug_0.log" new file mode 100644--- /dev/null +++ "b/log/debug_0.log" @@ -0,0 +1,1311 @@ +02/24/2022 01:15:12 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +02/24/2022 01:15:13 - WARNING - huggingface_hub.repository - Revision `gallant-salad-7` does not exist. Created and checked out branch `gallant-salad-7`. +02/24/2022 01:15:13 - WARNING - huggingface_hub.repository - +02/24/2022 01:19:48 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +02/24/2022 01:19:49 - WARNING - huggingface_hub.repository - Revision `splendid-planet-8` does not exist. Created and checked out branch `splendid-planet-8`. +02/24/2022 01:19:49 - WARNING - huggingface_hub.repository - +02/24/2022 01:21:44 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +02/24/2022 01:21:44 - WARNING - huggingface_hub.repository - Revision `rich-lake-9` does not exist. Created and checked out branch `rich-lake-9`. +02/24/2022 01:21:44 - WARNING - huggingface_hub.repository - +02/24/2022 01:21:57 - WARNING - datasets.builder - Using custom data configuration lvwerra--codeparrot-clean-train-a1efdd1059bd841d +02/24/2022 01:21:59 - WARNING - datasets.builder - Using custom data configuration lvwerra--codeparrot-clean-valid-a800eb55c299abc0 +02/24/2022 01:22:07 - INFO - codeparrot_training - Step 1: {'lr': 0.0, 'samples': 16, 'steps': 0, 'loss/train': 10.91077709197998} +02/24/2022 01:22:19 - INFO - codeparrot_training - Step 2: {'lr': 0.0, 'samples': 32, 'steps': 0, 'loss/train': 10.942980766296387} +02/24/2022 01:22:29 - INFO - codeparrot_training - Step 3: {'lr': 0.0, 'samples': 48, 'steps': 0, 'loss/train': 11.002518653869629} +02/24/2022 01:22:30 - INFO - codeparrot_training - Step 4: {'lr': 0.0, 'samples': 64, 'steps': 0, 'loss/train': 10.910642623901367} +02/24/2022 01:22:30 - INFO - codeparrot_training - Step 5: {'lr': 0.0, 'samples': 80, 'steps': 0, 'loss/train': 10.943111419677734} +02/24/2022 01:22:30 - INFO - codeparrot_training - Step 6: {'lr': 0.0, 'samples': 96, 'steps': 0, 'loss/train': 10.949981689453125} +02/24/2022 01:22:30 - INFO - codeparrot_training - Step 7: {'lr': 0.0, 'samples': 112, 'steps': 0, 'loss/train': 10.960782051086426} +02/24/2022 01:22:30 - INFO - codeparrot_training - Step 8: {'lr': 0.0, 'samples': 128, 'steps': 0, 'loss/train': 10.970269203186035} +02/24/2022 01:22:30 - INFO - codeparrot_training - Step 9: {'lr': 0.0, 'samples': 144, 'steps': 0, 'loss/train': 10.96359634399414} +02/24/2022 01:22:30 - INFO - codeparrot_training - Step 10: {'lr': 0.0, 'samples': 160, 'steps': 0, 'loss/train': 10.989349365234375} +02/24/2022 01:22:31 - INFO - codeparrot_training - Step 11: {'lr': 0.0, 'samples': 176, 'steps': 0, 'loss/train': 10.927996635437012} +02/24/2022 01:22:31 - INFO - codeparrot_training - Step 12: {'lr': 0.0, 'samples': 192, 'steps': 0, 'loss/train': 10.922099113464355} +02/24/2022 01:22:31 - INFO - codeparrot_training - Step 13: {'lr': 0.0, 'samples': 208, 'steps': 0, 'loss/train': 10.948519706726074} +02/24/2022 01:22:31 - INFO - codeparrot_training - Step 14: {'lr': 0.0, 'samples': 224, 'steps': 0, 'loss/train': 10.906710624694824} +02/24/2022 01:22:31 - INFO - codeparrot_training - Step 15: {'lr': 0.0, 'samples': 240, 'steps': 0, 'loss/train': 11.033504486083984} +02/24/2022 01:22:31 - INFO - codeparrot_training - Step 16: {'lr': 0.0, 'samples': 256, 'steps': 0, 'loss/train': 10.973214149475098} +02/24/2022 01:22:32 - INFO - codeparrot_training - Step 17: {'lr': 0.0, 'samples': 272, 'steps': 0, 'loss/train': 10.971339225769043} +02/24/2022 01:22:32 - INFO - codeparrot_training - Step 18: {'lr': 0.0, 'samples': 288, 'steps': 0, 'loss/train': 10.91922378540039} +02/24/2022 01:22:32 - INFO - codeparrot_training - Step 19: {'lr': 0.0, 'samples': 304, 'steps': 0, 'loss/train': 10.92506217956543} +02/24/2022 01:22:32 - INFO - codeparrot_training - Step 20: {'lr': 0.0, 'samples': 320, 'steps': 0, 'loss/train': 10.96170711517334} +02/24/2022 01:22:32 - INFO - codeparrot_training - Step 21: {'lr': 0.0, 'samples': 336, 'steps': 0, 'loss/train': 10.995943069458008} +02/24/2022 01:22:32 - INFO - codeparrot_training - Step 22: {'lr': 0.0, 'samples': 352, 'steps': 0, 'loss/train': 10.80260944366455} +02/24/2022 01:22:33 - INFO - codeparrot_training - Step 23: {'lr': 0.0, 'samples': 368, 'steps': 0, 'loss/train': 11.354506492614746} +02/24/2022 01:22:33 - INFO - codeparrot_training - Step 24: {'lr': 0.0, 'samples': 384, 'steps': 0, 'loss/train': 10.87732219696045} +02/24/2022 01:22:33 - INFO - codeparrot_training - Step 25: {'lr': 0.0, 'samples': 400, 'steps': 0, 'loss/train': 10.90416145324707} +02/24/2022 01:22:33 - INFO - codeparrot_training - Step 26: {'lr': 0.0, 'samples': 416, 'steps': 0, 'loss/train': 10.979859352111816} +02/24/2022 01:22:33 - INFO - codeparrot_training - Step 27: {'lr': 0.0, 'samples': 432, 'steps': 0, 'loss/train': 10.968029022216797} +02/24/2022 01:22:33 - INFO - codeparrot_training - Step 28: {'lr': 0.0, 'samples': 448, 'steps': 0, 'loss/train': 10.952177047729492} +02/24/2022 01:22:33 - INFO - codeparrot_training - Step 29: {'lr': 0.0, 'samples': 464, 'steps': 0, 'loss/train': 10.939998626708984} +02/24/2022 01:22:34 - INFO - codeparrot_training - Step 30: {'lr': 0.0, 'samples': 480, 'steps': 0, 'loss/train': 10.979020118713379} +02/24/2022 01:22:34 - INFO - codeparrot_training - Step 31: {'lr': 0.0, 'samples': 496, 'steps': 0, 'loss/train': 10.940269470214844} +02/24/2022 01:22:34 - INFO - codeparrot_training - Step 32: {'lr': 0.0, 'samples': 512, 'steps': 0, 'loss/train': 11.01601791381836} +02/24/2022 01:25:47 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +02/24/2022 01:25:48 - WARNING - huggingface_hub.repository - Revision `misty-fire-10` does not exist. Created and checked out branch `misty-fire-10`. +02/24/2022 01:25:48 - WARNING - huggingface_hub.repository - +02/24/2022 01:26:00 - WARNING - datasets.builder - Using custom data configuration lvwerra--codeparrot-clean-train-a1efdd1059bd841d +02/24/2022 01:26:02 - WARNING - datasets.builder - Using custom data configuration lvwerra--codeparrot-clean-valid-a800eb55c299abc0 +02/24/2022 01:26:10 - INFO - codeparrot_training - Step 1: {'lr': 0.0, 'samples': 16, 'steps': 0, 'loss/train': 10.91077709197998} +02/24/2022 01:26:23 - INFO - codeparrot_training - Step 2: {'lr': 0.0, 'samples': 32, 'steps': 0, 'loss/train': 10.942980766296387} +02/24/2022 01:26:33 - INFO - codeparrot_training - Step 3: {'lr': 0.0, 'samples': 48, 'steps': 0, 'loss/train': 11.002518653869629} +02/24/2022 01:26:33 - INFO - codeparrot_training - Step 4: {'lr': 0.0, 'samples': 64, 'steps': 0, 'loss/train': 10.910642623901367} +02/24/2022 01:26:34 - INFO - codeparrot_training - Step 5: {'lr': 0.0, 'samples': 80, 'steps': 0, 'loss/train': 10.943111419677734} +02/24/2022 01:26:34 - INFO - codeparrot_training - Step 6: {'lr': 0.0, 'samples': 96, 'steps': 0, 'loss/train': 10.949981689453125} +02/24/2022 01:26:34 - INFO - codeparrot_training - Step 7: {'lr': 0.0, 'samples': 112, 'steps': 0, 'loss/train': 10.960782051086426} +02/24/2022 01:26:34 - INFO - codeparrot_training - Step 8: {'lr': 0.0, 'samples': 128, 'steps': 0, 'loss/train': 10.970269203186035} +02/24/2022 01:26:34 - INFO - codeparrot_training - Step 9: {'lr': 0.0, 'samples': 144, 'steps': 0, 'loss/train': 10.96359634399414} +02/24/2022 01:26:34 - INFO - codeparrot_training - Step 10: {'lr': 0.0, 'samples': 160, 'steps': 0, 'loss/train': 10.989349365234375} +02/24/2022 01:26:35 - INFO - codeparrot_training - Step 11: {'lr': 0.0, 'samples': 176, 'steps': 0, 'loss/train': 10.927996635437012} +02/24/2022 01:26:35 - INFO - codeparrot_training - Step 12: {'lr': 0.0, 'samples': 192, 'steps': 0, 'loss/train': 10.922099113464355} +02/24/2022 01:26:35 - INFO - codeparrot_training - Step 13: {'lr': 0.0, 'samples': 208, 'steps': 0, 'loss/train': 10.948519706726074} +02/24/2022 01:26:35 - INFO - codeparrot_training - Step 14: {'lr': 0.0, 'samples': 224, 'steps': 0, 'loss/train': 10.906710624694824} +02/24/2022 01:26:35 - INFO - codeparrot_training - Step 15: {'lr': 0.0, 'samples': 240, 'steps': 0, 'loss/train': 11.033504486083984} +02/24/2022 01:26:35 - INFO - codeparrot_training - Step 16: {'lr': 0.0, 'samples': 256, 'steps': 0, 'loss/train': 10.973214149475098} +02/24/2022 01:26:36 - INFO - codeparrot_training - Step 17: {'lr': 0.0, 'samples': 272, 'steps': 0, 'loss/train': 10.971339225769043} +02/24/2022 01:26:36 - INFO - codeparrot_training - Step 18: {'lr': 0.0, 'samples': 288, 'steps': 0, 'loss/train': 10.91922378540039} +02/24/2022 01:26:36 - INFO - codeparrot_training - Step 19: {'lr': 0.0, 'samples': 304, 'steps': 0, 'loss/train': 10.92506217956543} +02/24/2022 01:26:36 - INFO - codeparrot_training - Step 20: {'lr': 0.0, 'samples': 320, 'steps': 0, 'loss/train': 10.96170711517334} +02/24/2022 01:26:36 - INFO - codeparrot_training - Step 21: {'lr': 0.0, 'samples': 336, 'steps': 0, 'loss/train': 10.995943069458008} +02/24/2022 01:26:36 - INFO - codeparrot_training - Step 22: {'lr': 0.0, 'samples': 352, 'steps': 0, 'loss/train': 10.80260944366455} +02/24/2022 01:26:36 - INFO - codeparrot_training - Step 23: {'lr': 0.0, 'samples': 368, 'steps': 0, 'loss/train': 11.354506492614746} +02/24/2022 01:26:37 - INFO - codeparrot_training - Step 24: {'lr': 0.0, 'samples': 384, 'steps': 0, 'loss/train': 10.87732219696045} +02/24/2022 01:26:37 - INFO - codeparrot_training - Step 25: {'lr': 0.0, 'samples': 400, 'steps': 0, 'loss/train': 10.90416145324707} +02/24/2022 01:26:37 - INFO - codeparrot_training - Step 26: {'lr': 0.0, 'samples': 416, 'steps': 0, 'loss/train': 10.979859352111816} +02/24/2022 01:26:37 - INFO - codeparrot_training - Step 27: {'lr': 0.0, 'samples': 432, 'steps': 0, 'loss/train': 10.968029022216797} +02/24/2022 01:26:37 - INFO - codeparrot_training - Step 28: {'lr': 0.0, 'samples': 448, 'steps': 0, 'loss/train': 10.952177047729492} +02/24/2022 01:26:37 - INFO - codeparrot_training - Step 29: {'lr': 0.0, 'samples': 464, 'steps': 0, 'loss/train': 10.939998626708984} +02/24/2022 01:26:38 - INFO - codeparrot_training - Step 30: {'lr': 0.0, 'samples': 480, 'steps': 0, 'loss/train': 10.979020118713379} +02/24/2022 01:26:38 - INFO - codeparrot_training - Step 31: {'lr': 0.0, 'samples': 496, 'steps': 0, 'loss/train': 10.940269470214844} +02/24/2022 01:26:38 - INFO - codeparrot_training - Step 32: {'lr': 0.0, 'samples': 512, 'steps': 0, 'loss/train': 11.01601791381836} +02/24/2022 01:27:44 - INFO - codeparrot_training - Step 33: {'lr': 2.5e-07, 'samples': 528, 'steps': 1, 'loss/train': 10.974133491516113} +02/24/2022 01:27:44 - INFO - codeparrot_training - Step 34: {'lr': 2.5e-07, 'samples': 544, 'steps': 1, 'loss/train': 10.982958793640137} +02/24/2022 01:27:44 - INFO - codeparrot_training - Step 35: {'lr': 2.5e-07, 'samples': 560, 'steps': 1, 'loss/train': 10.95070743560791} +02/24/2022 01:27:45 - INFO - codeparrot_training - Step 36: {'lr': 2.5e-07, 'samples': 576, 'steps': 1, 'loss/train': 10.997182846069336} +02/24/2022 01:27:45 - INFO - codeparrot_training - Step 37: {'lr': 2.5e-07, 'samples': 592, 'steps': 1, 'loss/train': 10.971476554870605} +02/24/2022 01:27:45 - INFO - codeparrot_training - Step 38: {'lr': 2.5e-07, 'samples': 608, 'steps': 1, 'loss/train': 10.988201141357422} +02/24/2022 01:27:45 - INFO - codeparrot_training - Step 39: {'lr': 2.5e-07, 'samples': 624, 'steps': 1, 'loss/train': 10.913816452026367} +02/24/2022 01:27:45 - INFO - codeparrot_training - Step 40: {'lr': 2.5e-07, 'samples': 640, 'steps': 1, 'loss/train': 10.960057258605957} +02/24/2022 01:27:45 - INFO - codeparrot_training - Step 41: {'lr': 2.5e-07, 'samples': 656, 'steps': 1, 'loss/train': 10.945222854614258} +02/24/2022 01:27:46 - INFO - codeparrot_training - Step 42: {'lr': 2.5e-07, 'samples': 672, 'steps': 1, 'loss/train': 10.931300163269043} +02/24/2022 01:27:46 - INFO - codeparrot_training - Step 43: {'lr': 2.5e-07, 'samples': 688, 'steps': 1, 'loss/train': 10.93020248413086} +02/24/2022 01:27:46 - INFO - codeparrot_training - Step 44: {'lr': 2.5e-07, 'samples': 704, 'steps': 1, 'loss/train': 10.922496795654297} +02/24/2022 01:27:46 - INFO - codeparrot_training - Step 45: {'lr': 2.5e-07, 'samples': 720, 'steps': 1, 'loss/train': 10.934232711791992} +02/24/2022 01:27:46 - INFO - codeparrot_training - Step 46: {'lr': 2.5e-07, 'samples': 736, 'steps': 1, 'loss/train': 10.906913757324219} +02/24/2022 01:27:46 - INFO - codeparrot_training - Step 47: {'lr': 2.5e-07, 'samples': 752, 'steps': 1, 'loss/train': 10.988995552062988} +02/24/2022 01:27:47 - INFO - codeparrot_training - Step 48: {'lr': 2.5e-07, 'samples': 768, 'steps': 1, 'loss/train': 10.946273803710938} +02/24/2022 01:27:47 - INFO - codeparrot_training - Step 49: {'lr': 2.5e-07, 'samples': 784, 'steps': 1, 'loss/train': 10.94522476196289} +02/24/2022 01:27:47 - INFO - codeparrot_training - Step 50: {'lr': 2.5e-07, 'samples': 800, 'steps': 1, 'loss/train': 10.973355293273926} +02/24/2022 01:27:47 - INFO - codeparrot_training - Step 51: {'lr': 2.5e-07, 'samples': 816, 'steps': 1, 'loss/train': 10.965099334716797} +02/24/2022 01:27:47 - INFO - codeparrot_training - Step 52: {'lr': 2.5e-07, 'samples': 832, 'steps': 1, 'loss/train': 10.938562393188477} +02/24/2022 01:27:47 - INFO - codeparrot_training - Step 53: {'lr': 2.5e-07, 'samples': 848, 'steps': 1, 'loss/train': 10.90954303741455} +02/24/2022 01:27:47 - INFO - codeparrot_training - Step 54: {'lr': 2.5e-07, 'samples': 864, 'steps': 1, 'loss/train': 10.997761726379395} +02/24/2022 01:27:48 - INFO - codeparrot_training - Step 55: {'lr': 2.5e-07, 'samples': 880, 'steps': 1, 'loss/train': 10.994376182556152} +02/24/2022 01:27:48 - INFO - codeparrot_training - Step 56: {'lr': 2.5e-07, 'samples': 896, 'steps': 1, 'loss/train': 10.929353713989258} +02/24/2022 01:27:48 - INFO - codeparrot_training - Step 57: {'lr': 2.5e-07, 'samples': 912, 'steps': 1, 'loss/train': 10.877440452575684} +02/24/2022 01:27:48 - INFO - codeparrot_training - Step 58: {'lr': 2.5e-07, 'samples': 928, 'steps': 1, 'loss/train': 10.93001651763916} +02/24/2022 01:27:48 - INFO - codeparrot_training - Step 59: {'lr': 2.5e-07, 'samples': 944, 'steps': 1, 'loss/train': 10.937539100646973} +02/24/2022 01:27:48 - INFO - codeparrot_training - Step 60: {'lr': 2.5e-07, 'samples': 960, 'steps': 1, 'loss/train': 10.957945823669434} +02/24/2022 01:27:49 - INFO - codeparrot_training - Step 61: {'lr': 2.5e-07, 'samples': 976, 'steps': 1, 'loss/train': 11.001383781433105} +02/24/2022 01:27:49 - INFO - codeparrot_training - Step 62: {'lr': 2.5e-07, 'samples': 992, 'steps': 1, 'loss/train': 10.79994010925293} +02/24/2022 01:27:49 - INFO - codeparrot_training - Step 63: {'lr': 2.5e-07, 'samples': 1008, 'steps': 1, 'loss/train': 11.036500930786133} +02/24/2022 01:27:51 - INFO - codeparrot_training - Step 64: {'lr': 2.5e-07, 'samples': 1024, 'steps': 1, 'loss/train': 10.936944961547852} +02/24/2022 01:29:01 - INFO - codeparrot_training - Step 65: {'lr': 5e-07, 'samples': 1040, 'steps': 2, 'loss/train': 10.962884902954102} +02/24/2022 01:29:01 - INFO - codeparrot_training - Step 66: {'lr': 5e-07, 'samples': 1056, 'steps': 2, 'loss/train': 10.924057960510254} +02/24/2022 01:29:01 - INFO - codeparrot_training - Step 67: {'lr': 5e-07, 'samples': 1072, 'steps': 2, 'loss/train': 11.00478744506836} +02/24/2022 01:29:01 - INFO - codeparrot_training - Step 68: {'lr': 5e-07, 'samples': 1088, 'steps': 2, 'loss/train': 10.938624382019043} +02/24/2022 01:29:02 - INFO - codeparrot_training - Step 69: {'lr': 5e-07, 'samples': 1104, 'steps': 2, 'loss/train': 10.946737289428711} +02/24/2022 01:29:02 - INFO - codeparrot_training - Step 70: {'lr': 5e-07, 'samples': 1120, 'steps': 2, 'loss/train': 10.97043514251709} +02/24/2022 01:29:02 - INFO - codeparrot_training - Step 71: {'lr': 5e-07, 'samples': 1136, 'steps': 2, 'loss/train': 10.919570922851562} +02/24/2022 01:29:02 - INFO - codeparrot_training - Step 72: {'lr': 5e-07, 'samples': 1152, 'steps': 2, 'loss/train': 10.94793701171875} +02/24/2022 01:29:02 - INFO - codeparrot_training - Step 73: {'lr': 5e-07, 'samples': 1168, 'steps': 2, 'loss/train': 10.879217147827148} +02/24/2022 01:29:02 - INFO - codeparrot_training - Step 74: {'lr': 5e-07, 'samples': 1184, 'steps': 2, 'loss/train': 10.92875862121582} +02/24/2022 01:29:03 - INFO - codeparrot_training - Step 75: {'lr': 5e-07, 'samples': 1200, 'steps': 2, 'loss/train': 10.892864227294922} +02/24/2022 01:29:03 - INFO - codeparrot_training - Step 76: {'lr': 5e-07, 'samples': 1216, 'steps': 2, 'loss/train': 10.862300872802734} +02/24/2022 01:29:03 - INFO - codeparrot_training - Step 77: {'lr': 5e-07, 'samples': 1232, 'steps': 2, 'loss/train': 10.956947326660156} +02/24/2022 01:29:03 - INFO - codeparrot_training - Step 78: {'lr': 5e-07, 'samples': 1248, 'steps': 2, 'loss/train': 10.952834129333496} +02/24/2022 01:29:03 - INFO - codeparrot_training - Step 79: {'lr': 5e-07, 'samples': 1264, 'steps': 2, 'loss/train': 10.932134628295898} +02/24/2022 01:29:03 - INFO - codeparrot_training - Step 80: {'lr': 5e-07, 'samples': 1280, 'steps': 2, 'loss/train': 10.967159271240234} +02/24/2022 01:29:03 - INFO - codeparrot_training - Step 81: {'lr': 5e-07, 'samples': 1296, 'steps': 2, 'loss/train': 10.990038871765137} +02/24/2022 01:29:04 - INFO - codeparrot_training - Step 82: {'lr': 5e-07, 'samples': 1312, 'steps': 2, 'loss/train': 10.931655883789062} +02/24/2022 01:29:04 - INFO - codeparrot_training - Step 83: {'lr': 5e-07, 'samples': 1328, 'steps': 2, 'loss/train': 11.00280475616455} +02/24/2022 01:29:04 - INFO - codeparrot_training - Step 84: {'lr': 5e-07, 'samples': 1344, 'steps': 2, 'loss/train': 10.966012954711914} +02/24/2022 01:29:04 - INFO - codeparrot_training - Step 85: {'lr': 5e-07, 'samples': 1360, 'steps': 2, 'loss/train': 10.966472625732422} +02/24/2022 01:29:04 - INFO - codeparrot_training - Step 86: {'lr': 5e-07, 'samples': 1376, 'steps': 2, 'loss/train': 11.024860382080078} +02/24/2022 01:29:04 - INFO - codeparrot_training - Step 87: {'lr': 5e-07, 'samples': 1392, 'steps': 2, 'loss/train': 10.982053756713867} +02/24/2022 01:29:04 - INFO - codeparrot_training - Step 88: {'lr': 5e-07, 'samples': 1408, 'steps': 2, 'loss/train': 10.985541343688965} +02/24/2022 01:29:05 - INFO - codeparrot_training - Step 89: {'lr': 5e-07, 'samples': 1424, 'steps': 2, 'loss/train': 10.979313850402832} +02/24/2022 01:29:05 - INFO - codeparrot_training - Step 90: {'lr': 5e-07, 'samples': 1440, 'steps': 2, 'loss/train': 10.98116683959961} +02/24/2022 01:29:05 - INFO - codeparrot_training - Step 91: {'lr': 5e-07, 'samples': 1456, 'steps': 2, 'loss/train': 11.013162612915039} +02/24/2022 01:29:05 - INFO - codeparrot_training - Step 92: {'lr': 5e-07, 'samples': 1472, 'steps': 2, 'loss/train': 10.944647789001465} +02/24/2022 01:29:05 - INFO - codeparrot_training - Step 93: {'lr': 5e-07, 'samples': 1488, 'steps': 2, 'loss/train': 10.926380157470703} +02/24/2022 01:29:05 - INFO - codeparrot_training - Step 94: {'lr': 5e-07, 'samples': 1504, 'steps': 2, 'loss/train': 10.939397811889648} +02/24/2022 01:29:06 - INFO - codeparrot_training - Step 95: {'lr': 5e-07, 'samples': 1520, 'steps': 2, 'loss/train': 10.954940795898438} +02/24/2022 01:29:06 - INFO - codeparrot_training - Step 96: {'lr': 5e-07, 'samples': 1536, 'steps': 2, 'loss/train': 10.925644874572754} +02/24/2022 01:29:06 - INFO - codeparrot_training - Step 97: {'lr': 7.5e-07, 'samples': 1552, 'steps': 3, 'loss/train': 10.895140647888184} +02/24/2022 01:29:06 - INFO - codeparrot_training - Step 98: {'lr': 7.5e-07, 'samples': 1568, 'steps': 3, 'loss/train': 10.933130264282227} +02/24/2022 01:29:06 - INFO - codeparrot_training - Step 99: {'lr': 7.5e-07, 'samples': 1584, 'steps': 3, 'loss/train': 10.823505401611328} +02/24/2022 01:29:06 - INFO - codeparrot_training - Step 100: {'lr': 7.5e-07, 'samples': 1600, 'steps': 3, 'loss/train': 10.901811599731445} +02/24/2022 01:29:07 - INFO - codeparrot_training - Step 101: {'lr': 7.5e-07, 'samples': 1616, 'steps': 3, 'loss/train': 10.928942680358887} +02/24/2022 01:29:07 - INFO - codeparrot_training - Step 102: {'lr': 7.5e-07, 'samples': 1632, 'steps': 3, 'loss/train': 10.981282234191895} +02/24/2022 01:29:07 - INFO - codeparrot_training - Step 103: {'lr': 7.5e-07, 'samples': 1648, 'steps': 3, 'loss/train': 10.933391571044922} +02/24/2022 01:29:07 - INFO - codeparrot_training - Step 104: {'lr': 7.5e-07, 'samples': 1664, 'steps': 3, 'loss/train': 10.925240516662598} +02/24/2022 01:29:07 - INFO - codeparrot_training - Step 105: {'lr': 7.5e-07, 'samples': 1680, 'steps': 3, 'loss/train': 10.990243911743164} +02/24/2022 01:29:07 - INFO - codeparrot_training - Step 106: {'lr': 7.5e-07, 'samples': 1696, 'steps': 3, 'loss/train': 10.938335418701172} +02/24/2022 01:29:07 - INFO - codeparrot_training - Step 107: {'lr': 7.5e-07, 'samples': 1712, 'steps': 3, 'loss/train': 10.967667579650879} +02/24/2022 01:29:08 - INFO - codeparrot_training - Step 108: {'lr': 7.5e-07, 'samples': 1728, 'steps': 3, 'loss/train': 10.93986988067627} +02/24/2022 01:29:08 - INFO - codeparrot_training - Step 109: {'lr': 7.5e-07, 'samples': 1744, 'steps': 3, 'loss/train': 10.925867080688477} +02/24/2022 01:29:08 - INFO - codeparrot_training - Step 110: {'lr': 7.5e-07, 'samples': 1760, 'steps': 3, 'loss/train': 10.97072982788086} +02/24/2022 01:29:08 - INFO - codeparrot_training - Step 111: {'lr': 7.5e-07, 'samples': 1776, 'steps': 3, 'loss/train': 10.872678756713867} +02/24/2022 01:29:08 - INFO - codeparrot_training - Step 112: {'lr': 7.5e-07, 'samples': 1792, 'steps': 3, 'loss/train': 10.886422157287598} +02/24/2022 01:29:08 - INFO - codeparrot_training - Step 113: {'lr': 7.5e-07, 'samples': 1808, 'steps': 3, 'loss/train': 10.965313911437988} +02/24/2022 01:29:09 - INFO - codeparrot_training - Step 114: {'lr': 7.5e-07, 'samples': 1824, 'steps': 3, 'loss/train': 10.914895057678223} +02/24/2022 01:29:09 - INFO - codeparrot_training - Step 115: {'lr': 7.5e-07, 'samples': 1840, 'steps': 3, 'loss/train': 10.9727144241333} +02/24/2022 01:29:09 - INFO - codeparrot_training - Step 116: {'lr': 7.5e-07, 'samples': 1856, 'steps': 3, 'loss/train': 10.957098960876465} +02/24/2022 01:29:09 - INFO - codeparrot_training - Step 117: {'lr': 7.5e-07, 'samples': 1872, 'steps': 3, 'loss/train': 10.932665824890137} +02/24/2022 01:29:09 - INFO - codeparrot_training - Step 118: {'lr': 7.5e-07, 'samples': 1888, 'steps': 3, 'loss/train': 10.878507614135742} +02/24/2022 01:29:09 - INFO - codeparrot_training - Step 119: {'lr': 7.5e-07, 'samples': 1904, 'steps': 3, 'loss/train': 10.957688331604004} +02/24/2022 01:29:09 - INFO - codeparrot_training - Step 120: {'lr': 7.5e-07, 'samples': 1920, 'steps': 3, 'loss/train': 10.916014671325684} +02/24/2022 01:29:10 - INFO - codeparrot_training - Step 121: {'lr': 7.5e-07, 'samples': 1936, 'steps': 3, 'loss/train': 10.942499160766602} +02/24/2022 01:29:10 - INFO - codeparrot_training - Step 122: {'lr': 7.5e-07, 'samples': 1952, 'steps': 3, 'loss/train': 10.985833168029785} +02/24/2022 01:29:10 - INFO - codeparrot_training - Step 123: {'lr': 7.5e-07, 'samples': 1968, 'steps': 3, 'loss/train': 10.920166969299316} +02/24/2022 01:29:10 - INFO - codeparrot_training - Step 124: {'lr': 7.5e-07, 'samples': 1984, 'steps': 3, 'loss/train': 10.89915657043457} +02/24/2022 01:29:10 - INFO - codeparrot_training - Step 125: {'lr': 7.5e-07, 'samples': 2000, 'steps': 3, 'loss/train': 10.908203125} +02/24/2022 01:29:10 - INFO - codeparrot_training - Step 126: {'lr': 7.5e-07, 'samples': 2016, 'steps': 3, 'loss/train': 10.928319931030273} +02/24/2022 01:29:11 - INFO - codeparrot_training - Step 127: {'lr': 7.5e-07, 'samples': 2032, 'steps': 3, 'loss/train': 10.972715377807617} +02/24/2022 01:29:14 - INFO - codeparrot_training - Step 128: {'lr': 7.5e-07, 'samples': 2048, 'steps': 3, 'loss/train': 10.941082954406738} +02/24/2022 01:29:14 - INFO - codeparrot_training - Step 129: {'lr': 1e-06, 'samples': 2064, 'steps': 4, 'loss/train': 10.916863441467285} +02/24/2022 01:29:14 - INFO - codeparrot_training - Step 130: {'lr': 1e-06, 'samples': 2080, 'steps': 4, 'loss/train': 10.913991928100586} +02/24/2022 01:29:15 - INFO - codeparrot_training - Step 131: {'lr': 1e-06, 'samples': 2096, 'steps': 4, 'loss/train': 10.973291397094727} +02/24/2022 01:29:15 - INFO - codeparrot_training - Step 132: {'lr': 1e-06, 'samples': 2112, 'steps': 4, 'loss/train': 10.985342979431152} +02/24/2022 01:29:15 - INFO - codeparrot_training - Step 133: {'lr': 1e-06, 'samples': 2128, 'steps': 4, 'loss/train': 10.939002990722656} +02/24/2022 01:29:15 - INFO - codeparrot_training - Step 134: {'lr': 1e-06, 'samples': 2144, 'steps': 4, 'loss/train': 10.843615531921387} +02/24/2022 01:29:15 - INFO - codeparrot_training - Step 135: {'lr': 1e-06, 'samples': 2160, 'steps': 4, 'loss/train': 10.945176124572754} +02/24/2022 01:29:15 - INFO - codeparrot_training - Step 136: {'lr': 1e-06, 'samples': 2176, 'steps': 4, 'loss/train': 10.920622825622559} +02/24/2022 01:29:15 - INFO - codeparrot_training - Step 137: {'lr': 1e-06, 'samples': 2192, 'steps': 4, 'loss/train': 10.99799919128418} +02/24/2022 01:29:16 - INFO - codeparrot_training - Step 138: {'lr': 1e-06, 'samples': 2208, 'steps': 4, 'loss/train': 10.955973625183105} +02/24/2022 01:29:16 - INFO - codeparrot_training - Step 139: {'lr': 1e-06, 'samples': 2224, 'steps': 4, 'loss/train': 10.947653770446777} +02/24/2022 01:29:16 - INFO - codeparrot_training - Step 140: {'lr': 1e-06, 'samples': 2240, 'steps': 4, 'loss/train': 10.857400894165039} +02/24/2022 01:29:16 - INFO - codeparrot_training - Step 141: {'lr': 1e-06, 'samples': 2256, 'steps': 4, 'loss/train': 10.980046272277832} +02/24/2022 01:29:16 - INFO - codeparrot_training - Step 142: {'lr': 1e-06, 'samples': 2272, 'steps': 4, 'loss/train': 10.897544860839844} +02/24/2022 01:29:16 - INFO - codeparrot_training - Step 143: {'lr': 1e-06, 'samples': 2288, 'steps': 4, 'loss/train': 10.952252388000488} +02/24/2022 01:29:17 - INFO - codeparrot_training - Step 144: {'lr': 1e-06, 'samples': 2304, 'steps': 4, 'loss/train': 10.939106941223145} +02/24/2022 01:29:17 - INFO - codeparrot_training - Step 145: {'lr': 1e-06, 'samples': 2320, 'steps': 4, 'loss/train': 10.965592384338379} +02/24/2022 01:29:17 - INFO - codeparrot_training - Step 146: {'lr': 1e-06, 'samples': 2336, 'steps': 4, 'loss/train': 10.850253105163574} +02/24/2022 01:29:17 - INFO - codeparrot_training - Step 147: {'lr': 1e-06, 'samples': 2352, 'steps': 4, 'loss/train': 10.900897026062012} +02/24/2022 01:29:17 - INFO - codeparrot_training - Step 148: {'lr': 1e-06, 'samples': 2368, 'steps': 4, 'loss/train': 10.954386711120605} +02/24/2022 01:29:17 - INFO - codeparrot_training - Step 149: {'lr': 1e-06, 'samples': 2384, 'steps': 4, 'loss/train': 10.88646411895752} +02/24/2022 01:29:18 - INFO - codeparrot_training - Step 150: {'lr': 1e-06, 'samples': 2400, 'steps': 4, 'loss/train': 10.871075630187988} +02/24/2022 01:29:18 - INFO - codeparrot_training - Step 151: {'lr': 1e-06, 'samples': 2416, 'steps': 4, 'loss/train': 10.888216018676758} +02/24/2022 01:29:18 - INFO - codeparrot_training - Step 152: {'lr': 1e-06, 'samples': 2432, 'steps': 4, 'loss/train': 10.9573392868042} +02/24/2022 01:29:18 - INFO - codeparrot_training - Step 153: {'lr': 1e-06, 'samples': 2448, 'steps': 4, 'loss/train': 10.916056632995605} +02/24/2022 01:29:18 - INFO - codeparrot_training - Step 154: {'lr': 1e-06, 'samples': 2464, 'steps': 4, 'loss/train': 10.897372245788574} +02/24/2022 01:29:18 - INFO - codeparrot_training - Step 155: {'lr': 1e-06, 'samples': 2480, 'steps': 4, 'loss/train': 10.932211875915527} +02/24/2022 01:29:18 - INFO - codeparrot_training - Step 156: {'lr': 1e-06, 'samples': 2496, 'steps': 4, 'loss/train': 10.901747703552246} +02/24/2022 01:29:19 - INFO - codeparrot_training - Step 157: {'lr': 1e-06, 'samples': 2512, 'steps': 4, 'loss/train': 10.91716194152832} +02/24/2022 01:29:19 - INFO - codeparrot_training - Step 158: {'lr': 1e-06, 'samples': 2528, 'steps': 4, 'loss/train': 10.892692565917969} +02/24/2022 01:29:19 - INFO - codeparrot_training - Step 159: {'lr': 1e-06, 'samples': 2544, 'steps': 4, 'loss/train': 10.95022201538086} +02/24/2022 01:29:19 - INFO - codeparrot_training - Step 160: {'lr': 1e-06, 'samples': 2560, 'steps': 4, 'loss/train': 11.07193374633789} +02/24/2022 01:29:19 - INFO - codeparrot_training - Step 161: {'lr': 1.25e-06, 'samples': 2576, 'steps': 5, 'loss/train': 11.085668563842773} +02/24/2022 01:29:19 - INFO - codeparrot_training - Step 162: {'lr': 1.25e-06, 'samples': 2592, 'steps': 5, 'loss/train': 10.96953010559082} +02/24/2022 01:29:20 - INFO - codeparrot_training - Step 163: {'lr': 1.25e-06, 'samples': 2608, 'steps': 5, 'loss/train': 10.864081382751465} +02/24/2022 01:29:20 - INFO - codeparrot_training - Step 164: {'lr': 1.25e-06, 'samples': 2624, 'steps': 5, 'loss/train': 10.895583152770996} +02/24/2022 01:29:20 - INFO - codeparrot_training - Step 165: {'lr': 1.25e-06, 'samples': 2640, 'steps': 5, 'loss/train': 10.876590728759766} +02/24/2022 01:29:20 - INFO - codeparrot_training - Step 166: {'lr': 1.25e-06, 'samples': 2656, 'steps': 5, 'loss/train': 10.900389671325684} +02/24/2022 01:29:20 - INFO - codeparrot_training - Step 167: {'lr': 1.25e-06, 'samples': 2672, 'steps': 5, 'loss/train': 10.90485668182373} +02/24/2022 01:29:20 - INFO - codeparrot_training - Step 168: {'lr': 1.25e-06, 'samples': 2688, 'steps': 5, 'loss/train': 10.92477035522461} +02/24/2022 01:29:21 - INFO - codeparrot_training - Step 169: {'lr': 1.25e-06, 'samples': 2704, 'steps': 5, 'loss/train': 10.900215148925781} +02/24/2022 01:29:21 - INFO - codeparrot_training - Step 170: {'lr': 1.25e-06, 'samples': 2720, 'steps': 5, 'loss/train': 10.924201011657715} +02/24/2022 01:29:21 - INFO - codeparrot_training - Step 171: {'lr': 1.25e-06, 'samples': 2736, 'steps': 5, 'loss/train': 10.936107635498047} +02/24/2022 01:29:21 - INFO - codeparrot_training - Step 172: {'lr': 1.25e-06, 'samples': 2752, 'steps': 5, 'loss/train': 10.946706771850586} +02/24/2022 01:29:21 - INFO - codeparrot_training - Step 173: {'lr': 1.25e-06, 'samples': 2768, 'steps': 5, 'loss/train': 10.901002883911133} +02/24/2022 01:29:21 - INFO - codeparrot_training - Step 174: {'lr': 1.25e-06, 'samples': 2784, 'steps': 5, 'loss/train': 10.934091567993164} +02/24/2022 01:29:21 - INFO - codeparrot_training - Step 175: {'lr': 1.25e-06, 'samples': 2800, 'steps': 5, 'loss/train': 10.858243942260742} +02/24/2022 01:29:22 - INFO - codeparrot_training - Step 176: {'lr': 1.25e-06, 'samples': 2816, 'steps': 5, 'loss/train': 10.875936508178711} +02/24/2022 01:29:22 - INFO - codeparrot_training - Step 177: {'lr': 1.25e-06, 'samples': 2832, 'steps': 5, 'loss/train': 10.9532470703125} +02/24/2022 01:29:22 - INFO - codeparrot_training - Step 178: {'lr': 1.25e-06, 'samples': 2848, 'steps': 5, 'loss/train': 10.994404792785645} +02/24/2022 01:29:22 - INFO - codeparrot_training - Step 179: {'lr': 1.25e-06, 'samples': 2864, 'steps': 5, 'loss/train': 10.915818214416504} +02/24/2022 01:29:22 - INFO - codeparrot_training - Step 180: {'lr': 1.25e-06, 'samples': 2880, 'steps': 5, 'loss/train': 10.936393737792969} +02/24/2022 01:29:22 - INFO - codeparrot_training - Step 181: {'lr': 1.25e-06, 'samples': 2896, 'steps': 5, 'loss/train': 10.88879680633545} +02/24/2022 01:29:23 - INFO - codeparrot_training - Step 182: {'lr': 1.25e-06, 'samples': 2912, 'steps': 5, 'loss/train': 10.824249267578125} +02/24/2022 01:29:23 - INFO - codeparrot_training - Step 183: {'lr': 1.25e-06, 'samples': 2928, 'steps': 5, 'loss/train': 10.954201698303223} +02/24/2022 01:29:23 - INFO - codeparrot_training - Step 184: {'lr': 1.25e-06, 'samples': 2944, 'steps': 5, 'loss/train': 10.881426811218262} +02/24/2022 01:29:23 - INFO - codeparrot_training - Step 185: {'lr': 1.25e-06, 'samples': 2960, 'steps': 5, 'loss/train': 10.915998458862305} +02/24/2022 01:29:23 - INFO - codeparrot_training - Step 186: {'lr': 1.25e-06, 'samples': 2976, 'steps': 5, 'loss/train': 10.899885177612305} +02/24/2022 01:29:23 - INFO - codeparrot_training - Step 187: {'lr': 1.25e-06, 'samples': 2992, 'steps': 5, 'loss/train': 10.919334411621094} +02/24/2022 01:29:23 - INFO - codeparrot_training - Step 188: {'lr': 1.25e-06, 'samples': 3008, 'steps': 5, 'loss/train': 10.756229400634766} +02/24/2022 01:29:24 - INFO - codeparrot_training - Step 189: {'lr': 1.25e-06, 'samples': 3024, 'steps': 5, 'loss/train': 10.891862869262695} +02/24/2022 01:29:24 - INFO - codeparrot_training - Step 190: {'lr': 1.25e-06, 'samples': 3040, 'steps': 5, 'loss/train': 10.897555351257324} +02/24/2022 01:29:24 - INFO - codeparrot_training - Step 191: {'lr': 1.25e-06, 'samples': 3056, 'steps': 5, 'loss/train': 10.805274963378906} +02/24/2022 01:29:26 - INFO - codeparrot_training - Step 192: {'lr': 1.25e-06, 'samples': 3072, 'steps': 5, 'loss/train': 10.89697551727295} +02/24/2022 01:29:27 - INFO - codeparrot_training - Step 193: {'lr': 1.5e-06, 'samples': 3088, 'steps': 6, 'loss/train': 10.931663513183594} +02/24/2022 01:29:27 - INFO - codeparrot_training - Step 194: {'lr': 1.5e-06, 'samples': 3104, 'steps': 6, 'loss/train': 10.827430725097656} +02/24/2022 01:29:27 - INFO - codeparrot_training - Step 195: {'lr': 1.5e-06, 'samples': 3120, 'steps': 6, 'loss/train': 10.844098091125488} +02/24/2022 01:29:27 - INFO - codeparrot_training - Step 196: {'lr': 1.5e-06, 'samples': 3136, 'steps': 6, 'loss/train': 10.851478576660156} +02/24/2022 01:29:27 - INFO - codeparrot_training - Step 197: {'lr': 1.5e-06, 'samples': 3152, 'steps': 6, 'loss/train': 11.007500648498535} +02/24/2022 01:29:27 - INFO - codeparrot_training - Step 198: {'lr': 1.5e-06, 'samples': 3168, 'steps': 6, 'loss/train': 10.888507843017578} +02/24/2022 01:29:27 - INFO - codeparrot_training - Step 199: {'lr': 1.5e-06, 'samples': 3184, 'steps': 6, 'loss/train': 10.888191223144531} +02/24/2022 01:29:28 - INFO - codeparrot_training - Step 200: {'lr': 1.5e-06, 'samples': 3200, 'steps': 6, 'loss/train': 10.859925270080566} +02/24/2022 01:29:28 - INFO - codeparrot_training - Step 201: {'lr': 1.5e-06, 'samples': 3216, 'steps': 6, 'loss/train': 10.911829948425293} +02/24/2022 01:29:28 - INFO - codeparrot_training - Step 202: {'lr': 1.5e-06, 'samples': 3232, 'steps': 6, 'loss/train': 10.920969009399414} +02/24/2022 01:29:28 - INFO - codeparrot_training - Step 203: {'lr': 1.5e-06, 'samples': 3248, 'steps': 6, 'loss/train': 11.01217269897461} +02/24/2022 01:29:28 - INFO - codeparrot_training - Step 204: {'lr': 1.5e-06, 'samples': 3264, 'steps': 6, 'loss/train': 10.69102954864502} +02/24/2022 01:29:28 - INFO - codeparrot_training - Step 205: {'lr': 1.5e-06, 'samples': 3280, 'steps': 6, 'loss/train': 10.810357093811035} +02/24/2022 01:29:29 - INFO - codeparrot_training - Step 206: {'lr': 1.5e-06, 'samples': 3296, 'steps': 6, 'loss/train': 10.828718185424805} +02/24/2022 01:29:29 - INFO - codeparrot_training - Step 207: {'lr': 1.5e-06, 'samples': 3312, 'steps': 6, 'loss/train': 10.842058181762695} +02/24/2022 01:29:29 - INFO - codeparrot_training - Step 208: {'lr': 1.5e-06, 'samples': 3328, 'steps': 6, 'loss/train': 10.93617057800293} +02/24/2022 01:29:29 - INFO - codeparrot_training - Step 209: {'lr': 1.5e-06, 'samples': 3344, 'steps': 6, 'loss/train': 10.911834716796875} +02/24/2022 01:29:29 - INFO - codeparrot_training - Step 210: {'lr': 1.5e-06, 'samples': 3360, 'steps': 6, 'loss/train': 10.907511711120605} +02/24/2022 01:29:29 - INFO - codeparrot_training - Step 211: {'lr': 1.5e-06, 'samples': 3376, 'steps': 6, 'loss/train': 10.88161849975586} +02/24/2022 01:29:29 - INFO - codeparrot_training - Step 212: {'lr': 1.5e-06, 'samples': 3392, 'steps': 6, 'loss/train': 10.922626495361328} +02/24/2022 01:29:30 - INFO - codeparrot_training - Step 213: {'lr': 1.5e-06, 'samples': 3408, 'steps': 6, 'loss/train': 10.967133522033691} +02/24/2022 01:29:30 - INFO - codeparrot_training - Step 214: {'lr': 1.5e-06, 'samples': 3424, 'steps': 6, 'loss/train': 10.916214942932129} +02/24/2022 01:29:30 - INFO - codeparrot_training - Step 215: {'lr': 1.5e-06, 'samples': 3440, 'steps': 6, 'loss/train': 10.900676727294922} +02/24/2022 01:29:30 - INFO - codeparrot_training - Step 216: {'lr': 1.5e-06, 'samples': 3456, 'steps': 6, 'loss/train': 10.85409164428711} +02/24/2022 01:29:30 - INFO - codeparrot_training - Step 217: {'lr': 1.5e-06, 'samples': 3472, 'steps': 6, 'loss/train': 10.870858192443848} +02/24/2022 01:29:30 - INFO - codeparrot_training - Step 218: {'lr': 1.5e-06, 'samples': 3488, 'steps': 6, 'loss/train': 10.858830451965332} +02/24/2022 01:29:31 - INFO - codeparrot_training - Step 219: {'lr': 1.5e-06, 'samples': 3504, 'steps': 6, 'loss/train': 10.821305274963379} +02/24/2022 01:29:31 - INFO - codeparrot_training - Step 220: {'lr': 1.5e-06, 'samples': 3520, 'steps': 6, 'loss/train': 10.941291809082031} +02/24/2022 01:29:31 - INFO - codeparrot_training - Step 221: {'lr': 1.5e-06, 'samples': 3536, 'steps': 6, 'loss/train': 10.885320663452148} +02/24/2022 01:29:31 - INFO - codeparrot_training - Step 222: {'lr': 1.5e-06, 'samples': 3552, 'steps': 6, 'loss/train': 10.896737098693848} +02/24/2022 01:29:31 - INFO - codeparrot_training - Step 223: {'lr': 1.5e-06, 'samples': 3568, 'steps': 6, 'loss/train': 10.932000160217285} +02/24/2022 01:29:31 - INFO - codeparrot_training - Step 224: {'lr': 1.5e-06, 'samples': 3584, 'steps': 6, 'loss/train': 10.873857498168945} +02/24/2022 01:29:32 - INFO - codeparrot_training - Step 225: {'lr': 1.75e-06, 'samples': 3600, 'steps': 7, 'loss/train': 10.845444679260254} +02/24/2022 01:29:32 - INFO - codeparrot_training - Step 226: {'lr': 1.75e-06, 'samples': 3616, 'steps': 7, 'loss/train': 10.83541488647461} +02/24/2022 01:29:32 - INFO - codeparrot_training - Step 227: {'lr': 1.75e-06, 'samples': 3632, 'steps': 7, 'loss/train': 10.851716995239258} +02/24/2022 01:31:34 - INFO - codeparrot_training - Distributed environment: TPU +Num processes: 8 +Process index: 0 +Local process index: 0 +Device: xla:1 +Use FP16 precision: False + +02/24/2022 01:31:34 - WARNING - huggingface_hub.repository - Revision `floral-grass-11` does not exist. Created and checked out branch `floral-grass-11`. +02/24/2022 01:31:34 - WARNING - huggingface_hub.repository - +02/24/2022 01:31:47 - WARNING - datasets.builder - Using custom data configuration lvwerra--codeparrot-clean-train-a1efdd1059bd841d +02/24/2022 01:31:48 - WARNING - datasets.builder - Using custom data configuration lvwerra--codeparrot-clean-valid-a800eb55c299abc0 +02/24/2022 01:32:22 - INFO - codeparrot_training - Step 0: {'lr': 0.0, 'samples': 512, 'steps': 0, 'loss/train': 11.01601791381836} +02/24/2022 01:33:30 - INFO - codeparrot_training - Step 1: {'lr': 2.5e-07, 'samples': 1024, 'steps': 1, 'loss/train': 10.936944961547852} +02/24/2022 01:34:46 - INFO - codeparrot_training - Step 2: {'lr': 5e-07, 'samples': 1536, 'steps': 2, 'loss/train': 10.925644874572754} +02/24/2022 01:34:52 - INFO - codeparrot_training - Step 3: {'lr': 7.5e-07, 'samples': 2048, 'steps': 3, 'loss/train': 10.941082954406738} +02/24/2022 01:34:56 - INFO - codeparrot_training - Step 4: {'lr': 1e-06, 'samples': 2560, 'steps': 4, 'loss/train': 11.07193374633789} +02/24/2022 01:35:01 - INFO - codeparrot_training - Step 5: {'lr': 1.25e-06, 'samples': 3072, 'steps': 5, 'loss/train': 10.89697551727295} +02/24/2022 01:35:05 - INFO - codeparrot_training - Step 6: {'lr': 1.5e-06, 'samples': 3584, 'steps': 6, 'loss/train': 10.873857498168945} +02/24/2022 01:35:10 - INFO - codeparrot_training - Step 7: {'lr': 1.75e-06, 'samples': 4096, 'steps': 7, 'loss/train': 10.825765609741211} +02/24/2022 01:35:13 - INFO - codeparrot_training - Step 8: {'lr': 2e-06, 'samples': 4608, 'steps': 8, 'loss/train': 10.752714157104492} +02/24/2022 01:35:19 - INFO - codeparrot_training - Step 9: {'lr': 2.25e-06, 'samples': 5120, 'steps': 9, 'loss/train': 10.731087684631348} +02/24/2022 01:35:22 - INFO - codeparrot_training - Step 10: {'lr': 2.5e-06, 'samples': 5632, 'steps': 10, 'loss/train': 10.78388786315918} +02/24/2022 01:35:28 - INFO - codeparrot_training - Step 11: {'lr': 2.75e-06, 'samples': 6144, 'steps': 11, 'loss/train': 10.686656951904297} +02/24/2022 01:35:31 - INFO - codeparrot_training - Step 12: {'lr': 3e-06, 'samples': 6656, 'steps': 12, 'loss/train': 10.692927360534668} +02/24/2022 01:35:38 - INFO - codeparrot_training - Step 13: {'lr': 3.25e-06, 'samples': 7168, 'steps': 13, 'loss/train': 10.630372047424316} +02/24/2022 01:35:41 - INFO - codeparrot_training - Step 14: {'lr': 3.5e-06, 'samples': 7680, 'steps': 14, 'loss/train': 10.618318557739258} +02/24/2022 01:35:46 - INFO - codeparrot_training - Step 15: {'lr': 3.75e-06, 'samples': 8192, 'steps': 15, 'loss/train': 10.532057762145996} +02/24/2022 01:35:50 - INFO - codeparrot_training - Step 16: {'lr': 4e-06, 'samples': 8704, 'steps': 16, 'loss/train': 10.441239356994629} +02/24/2022 01:35:55 - INFO - codeparrot_training - Step 17: {'lr': 4.250000000000001e-06, 'samples': 9216, 'steps': 17, 'loss/train': 10.438984870910645} +02/24/2022 01:35:59 - INFO - codeparrot_training - Step 18: {'lr': 4.5e-06, 'samples': 9728, 'steps': 18, 'loss/train': 10.231606483459473} +02/24/2022 01:36:04 - INFO - codeparrot_training - Step 19: {'lr': 4.75e-06, 'samples': 10240, 'steps': 19, 'loss/train': 10.179640769958496} +02/24/2022 01:36:07 - INFO - codeparrot_training - Step 20: {'lr': 5e-06, 'samples': 10752, 'steps': 20, 'loss/train': 9.973851203918457} +02/24/2022 01:36:13 - INFO - codeparrot_training - Step 21: {'lr': 5.2500000000000006e-06, 'samples': 11264, 'steps': 21, 'loss/train': 9.950591087341309} +02/24/2022 01:36:16 - INFO - codeparrot_training - Step 22: {'lr': 5.5e-06, 'samples': 11776, 'steps': 22, 'loss/train': 10.476153373718262} +02/24/2022 01:36:23 - INFO - codeparrot_training - Step 23: {'lr': 5.75e-06, 'samples': 12288, 'steps': 23, 'loss/train': 9.552414894104004} +02/24/2022 01:36:26 - INFO - codeparrot_training - Step 24: {'lr': 6e-06, 'samples': 12800, 'steps': 24, 'loss/train': 10.253140449523926} +02/24/2022 01:36:32 - INFO - codeparrot_training - Step 25: {'lr': 6.25e-06, 'samples': 13312, 'steps': 25, 'loss/train': 9.579047203063965} +02/24/2022 01:36:35 - INFO - codeparrot_training - Step 26: {'lr': 6.5e-06, 'samples': 13824, 'steps': 26, 'loss/train': 10.285812377929688} +02/24/2022 01:36:41 - INFO - codeparrot_training - Step 27: {'lr': 6.75e-06, 'samples': 14336, 'steps': 27, 'loss/train': 10.226362228393555} +02/24/2022 01:36:44 - INFO - codeparrot_training - Step 28: {'lr': 7e-06, 'samples': 14848, 'steps': 28, 'loss/train': 9.725104331970215} +02/24/2022 01:36:50 - INFO - codeparrot_training - Step 29: {'lr': 7.250000000000001e-06, 'samples': 15360, 'steps': 29, 'loss/train': 9.937402725219727} +02/24/2022 01:36:53 - INFO - codeparrot_training - Step 30: {'lr': 7.5e-06, 'samples': 15872, 'steps': 30, 'loss/train': 9.978718757629395} +02/24/2022 01:36:59 - INFO - codeparrot_training - Step 31: {'lr': 7.75e-06, 'samples': 16384, 'steps': 31, 'loss/train': 9.851637840270996} +02/24/2022 01:37:02 - INFO - codeparrot_training - Step 32: {'lr': 8e-06, 'samples': 16896, 'steps': 32, 'loss/train': 9.715484619140625} +02/24/2022 01:37:08 - INFO - codeparrot_training - Step 33: {'lr': 8.25e-06, 'samples': 17408, 'steps': 33, 'loss/train': 9.392934799194336} +02/24/2022 01:37:11 - INFO - codeparrot_training - Step 34: {'lr': 8.500000000000002e-06, 'samples': 17920, 'steps': 34, 'loss/train': 9.88122844696045} +02/24/2022 01:37:18 - INFO - codeparrot_training - Step 35: {'lr': 8.750000000000001e-06, 'samples': 18432, 'steps': 35, 'loss/train': 9.911639213562012} +02/24/2022 01:37:21 - INFO - codeparrot_training - Step 36: {'lr': 9e-06, 'samples': 18944, 'steps': 36, 'loss/train': 10.15811538696289} +02/24/2022 01:37:26 - INFO - codeparrot_training - Step 37: {'lr': 9.25e-06, 'samples': 19456, 'steps': 37, 'loss/train': 9.33040714263916} +02/24/2022 01:37:30 - INFO - codeparrot_training - Step 38: {'lr': 9.5e-06, 'samples': 19968, 'steps': 38, 'loss/train': 9.846328735351562} +02/24/2022 01:37:35 - INFO - codeparrot_training - Step 39: {'lr': 9.75e-06, 'samples': 20480, 'steps': 39, 'loss/train': 9.611072540283203} +02/24/2022 01:37:39 - INFO - codeparrot_training - Step 40: {'lr': 1e-05, 'samples': 20992, 'steps': 40, 'loss/train': 9.038996696472168} +02/24/2022 01:37:44 - INFO - codeparrot_training - Step 41: {'lr': 1.025e-05, 'samples': 21504, 'steps': 41, 'loss/train': 9.312322616577148} +02/24/2022 01:37:48 - INFO - codeparrot_training - Step 42: {'lr': 1.0500000000000001e-05, 'samples': 22016, 'steps': 42, 'loss/train': 9.412076950073242} +02/24/2022 01:37:53 - INFO - codeparrot_training - Step 43: {'lr': 1.0749999999999999e-05, 'samples': 22528, 'steps': 43, 'loss/train': 9.228338241577148} +02/24/2022 01:37:57 - INFO - codeparrot_training - Step 44: {'lr': 1.1e-05, 'samples': 23040, 'steps': 44, 'loss/train': 9.625134468078613} +02/24/2022 01:38:03 - INFO - codeparrot_training - Step 45: {'lr': 1.1249999999999999e-05, 'samples': 23552, 'steps': 45, 'loss/train': 9.419281005859375} +02/24/2022 01:38:06 - INFO - codeparrot_training - Step 46: {'lr': 1.15e-05, 'samples': 24064, 'steps': 46, 'loss/train': 9.415852546691895} +02/24/2022 01:38:11 - INFO - codeparrot_training - Step 47: {'lr': 1.1750000000000001e-05, 'samples': 24576, 'steps': 47, 'loss/train': 9.942625045776367} +02/24/2022 01:38:15 - INFO - codeparrot_training - Step 48: {'lr': 1.2e-05, 'samples': 25088, 'steps': 48, 'loss/train': 9.21979808807373} +02/24/2022 01:38:20 - INFO - codeparrot_training - Step 49: {'lr': 1.2250000000000001e-05, 'samples': 25600, 'steps': 49, 'loss/train': 9.481938362121582} +02/24/2022 01:38:26 - INFO - codeparrot_training - Step 50: {'lr': 1.25e-05, 'samples': 26112, 'steps': 50, 'loss/train': 9.65326976776123} +02/24/2022 01:38:29 - INFO - codeparrot_training - Step 51: {'lr': 1.275e-05, 'samples': 26624, 'steps': 51, 'loss/train': 9.695452690124512} +02/24/2022 01:38:35 - INFO - codeparrot_training - Step 52: {'lr': 1.3e-05, 'samples': 27136, 'steps': 52, 'loss/train': 9.77243709564209} +02/24/2022 01:38:38 - INFO - codeparrot_training - Step 53: {'lr': 1.325e-05, 'samples': 27648, 'steps': 53, 'loss/train': 9.193220138549805} +02/24/2022 01:38:44 - INFO - codeparrot_training - Step 54: {'lr': 1.35e-05, 'samples': 28160, 'steps': 54, 'loss/train': 9.582751274108887} +02/24/2022 01:38:47 - INFO - codeparrot_training - Step 55: {'lr': 1.375e-05, 'samples': 28672, 'steps': 55, 'loss/train': 10.538885116577148} +02/24/2022 01:38:53 - INFO - codeparrot_training - Step 56: {'lr': 1.4e-05, 'samples': 29184, 'steps': 56, 'loss/train': 9.603676795959473} +02/24/2022 01:38:56 - INFO - codeparrot_training - Step 57: {'lr': 1.425e-05, 'samples': 29696, 'steps': 57, 'loss/train': 9.933730125427246} +02/24/2022 01:39:02 - INFO - codeparrot_training - Step 58: {'lr': 1.4500000000000002e-05, 'samples': 30208, 'steps': 58, 'loss/train': 10.022768020629883} +02/24/2022 01:39:06 - INFO - codeparrot_training - Step 59: {'lr': 1.475e-05, 'samples': 30720, 'steps': 59, 'loss/train': 9.456661224365234} +02/24/2022 01:39:11 - INFO - codeparrot_training - Step 60: {'lr': 1.5e-05, 'samples': 31232, 'steps': 60, 'loss/train': 9.61857795715332} +02/24/2022 01:39:15 - INFO - codeparrot_training - Step 61: {'lr': 1.525e-05, 'samples': 31744, 'steps': 61, 'loss/train': 9.595654487609863} +02/24/2022 01:39:20 - INFO - codeparrot_training - Step 62: {'lr': 1.55e-05, 'samples': 32256, 'steps': 62, 'loss/train': 9.803630828857422} +02/24/2022 01:39:23 - INFO - codeparrot_training - Step 63: {'lr': 1.575e-05, 'samples': 32768, 'steps': 63, 'loss/train': 9.261384010314941} +02/24/2022 01:39:29 - INFO - codeparrot_training - Step 64: {'lr': 1.6e-05, 'samples': 33280, 'steps': 64, 'loss/train': 9.71921443939209} +02/24/2022 01:39:33 - INFO - codeparrot_training - Step 65: {'lr': 1.6250000000000002e-05, 'samples': 33792, 'steps': 65, 'loss/train': 9.623526573181152} +02/24/2022 01:39:38 - INFO - codeparrot_training - Step 66: {'lr': 1.65e-05, 'samples': 34304, 'steps': 66, 'loss/train': 9.844499588012695} +02/24/2022 01:39:42 - INFO - codeparrot_training - Step 67: {'lr': 1.675e-05, 'samples': 34816, 'steps': 67, 'loss/train': 9.011263847351074} +02/24/2022 01:39:47 - INFO - codeparrot_training - Step 68: {'lr': 1.7000000000000003e-05, 'samples': 35328, 'steps': 68, 'loss/train': 9.395312309265137} +02/24/2022 01:39:50 - INFO - codeparrot_training - Step 69: {'lr': 1.7250000000000003e-05, 'samples': 35840, 'steps': 69, 'loss/train': 9.74931812286377} +02/24/2022 01:39:57 - INFO - codeparrot_training - Step 70: {'lr': 1.7500000000000002e-05, 'samples': 36352, 'steps': 70, 'loss/train': 9.2435884475708} +02/24/2022 01:40:00 - INFO - codeparrot_training - Step 71: {'lr': 1.7749999999999998e-05, 'samples': 36864, 'steps': 71, 'loss/train': 9.436172485351562} +02/24/2022 01:40:06 - INFO - codeparrot_training - Step 72: {'lr': 1.8e-05, 'samples': 37376, 'steps': 72, 'loss/train': 9.279879570007324} +02/24/2022 01:40:09 - INFO - codeparrot_training - Step 73: {'lr': 1.825e-05, 'samples': 37888, 'steps': 73, 'loss/train': 9.521175384521484} +02/24/2022 01:40:15 - INFO - codeparrot_training - Step 74: {'lr': 1.85e-05, 'samples': 38400, 'steps': 74, 'loss/train': 9.995550155639648} +02/24/2022 01:40:18 - INFO - codeparrot_training - Step 75: {'lr': 1.875e-05, 'samples': 38912, 'steps': 75, 'loss/train': 9.35321044921875} +02/24/2022 01:40:23 - INFO - codeparrot_training - Step 76: {'lr': 1.9e-05, 'samples': 39424, 'steps': 76, 'loss/train': 9.540994644165039} +02/24/2022 01:40:27 - INFO - codeparrot_training - Step 77: {'lr': 1.925e-05, 'samples': 39936, 'steps': 77, 'loss/train': 9.684625625610352} +02/24/2022 01:40:32 - INFO - codeparrot_training - Step 78: {'lr': 1.95e-05, 'samples': 40448, 'steps': 78, 'loss/train': 9.534080505371094} +02/24/2022 01:40:36 - INFO - codeparrot_training - Step 79: {'lr': 1.975e-05, 'samples': 40960, 'steps': 79, 'loss/train': 9.40109920501709} +02/24/2022 01:40:42 - INFO - codeparrot_training - Step 80: {'lr': 2e-05, 'samples': 41472, 'steps': 80, 'loss/train': 9.779088020324707} +02/24/2022 01:40:46 - INFO - codeparrot_training - Step 81: {'lr': 2.025e-05, 'samples': 41984, 'steps': 81, 'loss/train': 9.618664741516113} +02/24/2022 01:40:51 - INFO - codeparrot_training - Step 82: {'lr': 2.05e-05, 'samples': 42496, 'steps': 82, 'loss/train': 9.514649391174316} +02/24/2022 01:40:55 - INFO - codeparrot_training - Step 83: {'lr': 2.0750000000000003e-05, 'samples': 43008, 'steps': 83, 'loss/train': 9.943718910217285} +02/24/2022 01:41:00 - INFO - codeparrot_training - Step 84: {'lr': 2.1000000000000002e-05, 'samples': 43520, 'steps': 84, 'loss/train': 9.961823463439941} +02/24/2022 01:41:04 - INFO - codeparrot_training - Step 85: {'lr': 2.125e-05, 'samples': 44032, 'steps': 85, 'loss/train': 9.23841667175293} +02/24/2022 01:41:09 - INFO - codeparrot_training - Step 86: {'lr': 2.1499999999999997e-05, 'samples': 44544, 'steps': 86, 'loss/train': 9.898496627807617} +02/24/2022 01:41:12 - INFO - codeparrot_training - Step 87: {'lr': 2.175e-05, 'samples': 45056, 'steps': 87, 'loss/train': 10.449203491210938} +02/24/2022 01:41:18 - INFO - codeparrot_training - Step 88: {'lr': 2.2e-05, 'samples': 45568, 'steps': 88, 'loss/train': 9.512351036071777} +02/24/2022 01:41:21 - INFO - codeparrot_training - Step 89: {'lr': 2.225e-05, 'samples': 46080, 'steps': 89, 'loss/train': 9.259896278381348} +02/24/2022 01:41:27 - INFO - codeparrot_training - Step 90: {'lr': 2.2499999999999998e-05, 'samples': 46592, 'steps': 90, 'loss/train': 9.768840789794922} +02/24/2022 01:41:31 - INFO - codeparrot_training - Step 91: {'lr': 2.275e-05, 'samples': 47104, 'steps': 91, 'loss/train': 9.244466781616211} +02/24/2022 01:41:36 - INFO - codeparrot_training - Step 92: {'lr': 2.3e-05, 'samples': 47616, 'steps': 92, 'loss/train': 9.151692390441895} +02/24/2022 01:41:40 - INFO - codeparrot_training - Step 93: {'lr': 2.325e-05, 'samples': 48128, 'steps': 93, 'loss/train': 9.288126945495605} +02/24/2022 01:41:45 - INFO - codeparrot_training - Step 94: {'lr': 2.3500000000000002e-05, 'samples': 48640, 'steps': 94, 'loss/train': 9.82015609741211} +02/24/2022 01:41:49 - INFO - codeparrot_training - Step 95: {'lr': 2.375e-05, 'samples': 49152, 'steps': 95, 'loss/train': 10.049545288085938} +02/24/2022 01:41:54 - INFO - codeparrot_training - Step 96: {'lr': 2.4e-05, 'samples': 49664, 'steps': 96, 'loss/train': 9.537553787231445} +02/24/2022 01:41:58 - INFO - codeparrot_training - Step 97: {'lr': 2.425e-05, 'samples': 50176, 'steps': 97, 'loss/train': 9.354862213134766} +02/24/2022 01:42:03 - INFO - codeparrot_training - Step 98: {'lr': 2.4500000000000003e-05, 'samples': 50688, 'steps': 98, 'loss/train': 9.080880165100098} +02/24/2022 01:42:07 - INFO - codeparrot_training - Step 99: {'lr': 2.4750000000000002e-05, 'samples': 51200, 'steps': 99, 'loss/train': 8.6892671585083} +02/24/2022 01:42:13 - INFO - codeparrot_training - Step 100: {'lr': 2.5e-05, 'samples': 51712, 'steps': 100, 'loss/train': 9.663463592529297} +02/24/2022 01:42:18 - INFO - codeparrot_training - Step 101: {'lr': 2.525e-05, 'samples': 52224, 'steps': 101, 'loss/train': 9.291194915771484} +02/24/2022 01:42:22 - INFO - codeparrot_training - Step 102: {'lr': 2.55e-05, 'samples': 52736, 'steps': 102, 'loss/train': 9.83191967010498} +02/24/2022 01:42:27 - INFO - codeparrot_training - Step 103: {'lr': 2.575e-05, 'samples': 53248, 'steps': 103, 'loss/train': 9.29751205444336} +02/24/2022 01:42:30 - INFO - codeparrot_training - Step 104: {'lr': 2.6e-05, 'samples': 53760, 'steps': 104, 'loss/train': 9.098075866699219} +02/24/2022 01:42:36 - INFO - codeparrot_training - Step 105: {'lr': 2.625e-05, 'samples': 54272, 'steps': 105, 'loss/train': 8.863370895385742} +02/24/2022 01:42:39 - INFO - codeparrot_training - Step 106: {'lr': 2.65e-05, 'samples': 54784, 'steps': 106, 'loss/train': 9.174436569213867} +02/24/2022 01:42:45 - INFO - codeparrot_training - Step 107: {'lr': 2.675e-05, 'samples': 55296, 'steps': 107, 'loss/train': 9.370267868041992} +02/24/2022 01:42:48 - INFO - codeparrot_training - Step 108: {'lr': 2.7e-05, 'samples': 55808, 'steps': 108, 'loss/train': 8.78602123260498} +02/24/2022 01:42:54 - INFO - codeparrot_training - Step 109: {'lr': 2.725e-05, 'samples': 56320, 'steps': 109, 'loss/train': 9.10840129852295} +02/24/2022 01:42:57 - INFO - codeparrot_training - Step 110: {'lr': 2.75e-05, 'samples': 56832, 'steps': 110, 'loss/train': 9.094649314880371} +02/24/2022 01:43:02 - INFO - codeparrot_training - Step 111: {'lr': 2.775e-05, 'samples': 57344, 'steps': 111, 'loss/train': 9.771917343139648} +02/24/2022 01:43:06 - INFO - codeparrot_training - Step 112: {'lr': 2.8e-05, 'samples': 57856, 'steps': 112, 'loss/train': 9.095439910888672} +02/24/2022 01:43:12 - INFO - codeparrot_training - Step 113: {'lr': 2.8250000000000002e-05, 'samples': 58368, 'steps': 113, 'loss/train': 9.394959449768066} +02/24/2022 01:43:15 - INFO - codeparrot_training - Step 114: {'lr': 2.85e-05, 'samples': 58880, 'steps': 114, 'loss/train': 9.721589088439941} +02/24/2022 01:43:21 - INFO - codeparrot_training - Step 115: {'lr': 2.875e-05, 'samples': 59392, 'steps': 115, 'loss/train': 7.896186828613281} +02/24/2022 01:43:24 - INFO - codeparrot_training - Step 116: {'lr': 2.9000000000000004e-05, 'samples': 59904, 'steps': 116, 'loss/train': 7.90242862701416} +02/24/2022 01:43:30 - INFO - codeparrot_training - Step 117: {'lr': 2.9250000000000003e-05, 'samples': 60416, 'steps': 117, 'loss/train': 9.375008583068848} +02/24/2022 01:43:33 - INFO - codeparrot_training - Step 118: {'lr': 2.95e-05, 'samples': 60928, 'steps': 118, 'loss/train': 8.5529203414917} +02/24/2022 01:43:39 - INFO - codeparrot_training - Step 119: {'lr': 2.9749999999999998e-05, 'samples': 61440, 'steps': 119, 'loss/train': 8.504928588867188} +02/24/2022 01:43:42 - INFO - codeparrot_training - Step 120: {'lr': 3e-05, 'samples': 61952, 'steps': 120, 'loss/train': 9.61538028717041} +02/24/2022 01:43:48 - INFO - codeparrot_training - Step 121: {'lr': 3.025e-05, 'samples': 62464, 'steps': 121, 'loss/train': 9.16252613067627} +02/24/2022 01:43:51 - INFO - codeparrot_training - Step 122: {'lr': 3.05e-05, 'samples': 62976, 'steps': 122, 'loss/train': 9.315605163574219} +02/24/2022 01:43:57 - INFO - codeparrot_training - Step 123: {'lr': 3.075e-05, 'samples': 63488, 'steps': 123, 'loss/train': 9.257500648498535} +02/24/2022 01:44:00 - INFO - codeparrot_training - Step 124: {'lr': 3.1e-05, 'samples': 64000, 'steps': 124, 'loss/train': 8.4464693069458} +02/24/2022 01:44:06 - INFO - codeparrot_training - Step 125: {'lr': 3.125e-05, 'samples': 64512, 'steps': 125, 'loss/train': 9.038679122924805} +02/24/2022 01:44:10 - INFO - codeparrot_training - Step 126: {'lr': 3.15e-05, 'samples': 65024, 'steps': 126, 'loss/train': 8.971589088439941} +02/24/2022 01:44:15 - INFO - codeparrot_training - Step 127: {'lr': 3.175e-05, 'samples': 65536, 'steps': 127, 'loss/train': 9.091878890991211} +02/24/2022 01:44:19 - INFO - codeparrot_training - Step 128: {'lr': 3.2e-05, 'samples': 66048, 'steps': 128, 'loss/train': 8.642971992492676} +02/24/2022 01:44:24 - INFO - codeparrot_training - Step 129: {'lr': 3.2250000000000005e-05, 'samples': 66560, 'steps': 129, 'loss/train': 9.095447540283203} +02/24/2022 01:44:27 - INFO - codeparrot_training - Step 130: {'lr': 3.2500000000000004e-05, 'samples': 67072, 'steps': 130, 'loss/train': 8.557703018188477} +02/24/2022 01:44:33 - INFO - codeparrot_training - Step 131: {'lr': 3.275e-05, 'samples': 67584, 'steps': 131, 'loss/train': 8.567814826965332} +02/24/2022 01:44:38 - INFO - codeparrot_training - Step 132: {'lr': 3.3e-05, 'samples': 68096, 'steps': 132, 'loss/train': 8.998651504516602} +02/24/2022 01:44:42 - INFO - codeparrot_training - Step 133: {'lr': 3.325e-05, 'samples': 68608, 'steps': 133, 'loss/train': 8.84678840637207} +02/24/2022 01:44:49 - INFO - codeparrot_training - Step 134: {'lr': 3.35e-05, 'samples': 69120, 'steps': 134, 'loss/train': 8.768152236938477} +02/24/2022 01:44:52 - INFO - codeparrot_training - Step 135: {'lr': 3.375e-05, 'samples': 69632, 'steps': 135, 'loss/train': 8.87886905670166} +02/24/2022 01:44:58 - INFO - codeparrot_training - Step 136: {'lr': 3.4000000000000007e-05, 'samples': 70144, 'steps': 136, 'loss/train': 9.644051551818848} +02/24/2022 01:45:01 - INFO - codeparrot_training - Step 137: {'lr': 3.4250000000000006e-05, 'samples': 70656, 'steps': 137, 'loss/train': 9.087210655212402} +02/24/2022 01:45:06 - INFO - codeparrot_training - Step 138: {'lr': 3.4500000000000005e-05, 'samples': 71168, 'steps': 138, 'loss/train': 9.06340503692627} +02/24/2022 01:45:10 - INFO - codeparrot_training - Step 139: {'lr': 3.4750000000000004e-05, 'samples': 71680, 'steps': 139, 'loss/train': 8.886314392089844} +02/24/2022 01:45:15 - INFO - codeparrot_training - Step 140: {'lr': 3.5000000000000004e-05, 'samples': 72192, 'steps': 140, 'loss/train': 9.091540336608887} +02/24/2022 01:45:19 - INFO - codeparrot_training - Step 141: {'lr': 3.5249999999999996e-05, 'samples': 72704, 'steps': 141, 'loss/train': 9.188919067382812} +02/24/2022 01:45:24 - INFO - codeparrot_training - Step 142: {'lr': 3.5499999999999996e-05, 'samples': 73216, 'steps': 142, 'loss/train': 9.140847206115723} +02/24/2022 01:45:28 - INFO - codeparrot_training - Step 143: {'lr': 3.5749999999999995e-05, 'samples': 73728, 'steps': 143, 'loss/train': 8.462065696716309} +02/24/2022 01:45:33 - INFO - codeparrot_training - Step 144: {'lr': 3.6e-05, 'samples': 74240, 'steps': 144, 'loss/train': 8.03787899017334} +02/24/2022 01:45:37 - INFO - codeparrot_training - Step 145: {'lr': 3.625e-05, 'samples': 74752, 'steps': 145, 'loss/train': 8.804536819458008} +02/24/2022 01:45:43 - INFO - codeparrot_training - Step 146: {'lr': 3.65e-05, 'samples': 75264, 'steps': 146, 'loss/train': 9.822867393493652} +02/24/2022 01:45:46 - INFO - codeparrot_training - Step 147: {'lr': 3.675e-05, 'samples': 75776, 'steps': 147, 'loss/train': 9.119757652282715} +02/24/2022 01:45:52 - INFO - codeparrot_training - Step 148: {'lr': 3.7e-05, 'samples': 76288, 'steps': 148, 'loss/train': 8.35252571105957} +02/24/2022 01:45:55 - INFO - codeparrot_training - Step 149: {'lr': 3.725e-05, 'samples': 76800, 'steps': 149, 'loss/train': 8.668418884277344} +02/24/2022 01:46:01 - INFO - codeparrot_training - Step 150: {'lr': 3.75e-05, 'samples': 77312, 'steps': 150, 'loss/train': 8.233305931091309} +02/24/2022 01:46:04 - INFO - codeparrot_training - Step 151: {'lr': 3.775e-05, 'samples': 77824, 'steps': 151, 'loss/train': 9.180137634277344} +02/24/2022 01:46:10 - INFO - codeparrot_training - Step 152: {'lr': 3.8e-05, 'samples': 78336, 'steps': 152, 'loss/train': 8.461322784423828} +02/24/2022 01:46:13 - INFO - codeparrot_training - Step 153: {'lr': 3.825e-05, 'samples': 78848, 'steps': 153, 'loss/train': 8.905062675476074} +02/24/2022 01:46:19 - INFO - codeparrot_training - Step 154: {'lr': 3.85e-05, 'samples': 79360, 'steps': 154, 'loss/train': 8.642675399780273} +02/24/2022 01:46:23 - INFO - codeparrot_training - Step 155: {'lr': 3.875e-05, 'samples': 79872, 'steps': 155, 'loss/train': 8.070643424987793} +02/24/2022 01:46:28 - INFO - codeparrot_training - Step 156: {'lr': 3.9e-05, 'samples': 80384, 'steps': 156, 'loss/train': 5.581755638122559} +02/24/2022 01:46:32 - INFO - codeparrot_training - Step 157: {'lr': 3.925e-05, 'samples': 80896, 'steps': 157, 'loss/train': 8.737156867980957} +02/24/2022 01:46:37 - INFO - codeparrot_training - Step 158: {'lr': 3.95e-05, 'samples': 81408, 'steps': 158, 'loss/train': 8.90294075012207} +02/24/2022 01:46:41 - INFO - codeparrot_training - Step 159: {'lr': 3.9750000000000004e-05, 'samples': 81920, 'steps': 159, 'loss/train': 8.663994789123535} +02/24/2022 01:46:46 - INFO - codeparrot_training - Step 160: {'lr': 4e-05, 'samples': 82432, 'steps': 160, 'loss/train': 8.823864936828613} +02/24/2022 01:46:50 - INFO - codeparrot_training - Step 161: {'lr': 4.025e-05, 'samples': 82944, 'steps': 161, 'loss/train': 8.195619583129883} +02/24/2022 01:46:55 - INFO - codeparrot_training - Step 162: {'lr': 4.05e-05, 'samples': 83456, 'steps': 162, 'loss/train': 8.817365646362305} +02/24/2022 01:46:58 - INFO - codeparrot_training - Step 163: {'lr': 4.075e-05, 'samples': 83968, 'steps': 163, 'loss/train': 9.630975723266602} +02/24/2022 01:47:04 - INFO - codeparrot_training - Step 164: {'lr': 4.1e-05, 'samples': 84480, 'steps': 164, 'loss/train': 8.188645362854004} +02/24/2022 01:47:07 - INFO - codeparrot_training - Step 165: {'lr': 4.125e-05, 'samples': 84992, 'steps': 165, 'loss/train': 10.159860610961914} +02/24/2022 01:47:13 - INFO - codeparrot_training - Step 166: {'lr': 4.1500000000000006e-05, 'samples': 85504, 'steps': 166, 'loss/train': 8.092951774597168} +02/24/2022 01:47:16 - INFO - codeparrot_training - Step 167: {'lr': 4.1750000000000005e-05, 'samples': 86016, 'steps': 167, 'loss/train': 9.026200294494629} +02/24/2022 01:47:22 - INFO - codeparrot_training - Step 168: {'lr': 4.2000000000000004e-05, 'samples': 86528, 'steps': 168, 'loss/train': 8.288607597351074} +02/24/2022 01:47:25 - INFO - codeparrot_training - Step 169: {'lr': 4.2250000000000004e-05, 'samples': 87040, 'steps': 169, 'loss/train': 7.285762786865234} +02/24/2022 01:47:31 - INFO - codeparrot_training - Step 170: {'lr': 4.25e-05, 'samples': 87552, 'steps': 170, 'loss/train': 8.039697647094727} +02/24/2022 01:47:35 - INFO - codeparrot_training - Step 171: {'lr': 4.275e-05, 'samples': 88064, 'steps': 171, 'loss/train': 8.41202163696289} +02/24/2022 01:47:40 - INFO - codeparrot_training - Step 172: {'lr': 4.2999999999999995e-05, 'samples': 88576, 'steps': 172, 'loss/train': 8.135007858276367} +02/24/2022 01:47:44 - INFO - codeparrot_training - Step 173: {'lr': 4.325e-05, 'samples': 89088, 'steps': 173, 'loss/train': 8.630109786987305} +02/24/2022 01:47:49 - INFO - codeparrot_training - Step 174: {'lr': 4.35e-05, 'samples': 89600, 'steps': 174, 'loss/train': 8.232574462890625} +02/24/2022 01:47:52 - INFO - codeparrot_training - Step 175: {'lr': 4.375e-05, 'samples': 90112, 'steps': 175, 'loss/train': 7.799593925476074} +02/24/2022 01:47:58 - INFO - codeparrot_training - Step 176: {'lr': 4.4e-05, 'samples': 90624, 'steps': 176, 'loss/train': 8.399349212646484} +02/24/2022 01:48:01 - INFO - codeparrot_training - Step 177: {'lr': 4.425e-05, 'samples': 91136, 'steps': 177, 'loss/train': 8.36412239074707} +02/24/2022 01:48:07 - INFO - codeparrot_training - Step 178: {'lr': 4.45e-05, 'samples': 91648, 'steps': 178, 'loss/train': 9.112398147583008} +02/24/2022 01:48:10 - INFO - codeparrot_training - Step 179: {'lr': 4.475e-05, 'samples': 92160, 'steps': 179, 'loss/train': 7.641449928283691} +02/24/2022 01:48:16 - INFO - codeparrot_training - Step 180: {'lr': 4.4999999999999996e-05, 'samples': 92672, 'steps': 180, 'loss/train': 9.119322776794434} +02/24/2022 01:48:19 - INFO - codeparrot_training - Step 181: {'lr': 4.525e-05, 'samples': 93184, 'steps': 181, 'loss/train': 8.35004711151123} +02/24/2022 01:48:25 - INFO - codeparrot_training - Step 182: {'lr': 4.55e-05, 'samples': 93696, 'steps': 182, 'loss/train': 8.301406860351562} +02/24/2022 01:48:29 - INFO - codeparrot_training - Step 183: {'lr': 4.575e-05, 'samples': 94208, 'steps': 183, 'loss/train': 8.395172119140625} +02/24/2022 01:48:34 - INFO - codeparrot_training - Step 184: {'lr': 4.6e-05, 'samples': 94720, 'steps': 184, 'loss/train': 10.358185768127441} +02/24/2022 01:48:38 - INFO - codeparrot_training - Step 185: {'lr': 4.625e-05, 'samples': 95232, 'steps': 185, 'loss/train': 7.754116535186768} +02/24/2022 01:48:43 - INFO - codeparrot_training - Step 186: {'lr': 4.65e-05, 'samples': 95744, 'steps': 186, 'loss/train': 8.634472846984863} +02/24/2022 01:48:46 - INFO - codeparrot_training - Step 187: {'lr': 4.675e-05, 'samples': 96256, 'steps': 187, 'loss/train': 9.413156509399414} +02/24/2022 01:48:52 - INFO - codeparrot_training - Step 188: {'lr': 4.7000000000000004e-05, 'samples': 96768, 'steps': 188, 'loss/train': 8.477133750915527} +02/24/2022 01:48:55 - INFO - codeparrot_training - Step 189: {'lr': 4.725e-05, 'samples': 97280, 'steps': 189, 'loss/train': 8.565733909606934} +02/24/2022 01:49:01 - INFO - codeparrot_training - Step 190: {'lr': 4.75e-05, 'samples': 97792, 'steps': 190, 'loss/train': 8.980598449707031} +02/24/2022 01:49:05 - INFO - codeparrot_training - Step 191: {'lr': 4.775e-05, 'samples': 98304, 'steps': 191, 'loss/train': 8.175108909606934} +02/24/2022 01:49:11 - INFO - codeparrot_training - Step 192: {'lr': 4.8e-05, 'samples': 98816, 'steps': 192, 'loss/train': 7.164796829223633} +02/24/2022 01:49:14 - INFO - codeparrot_training - Step 193: {'lr': 4.825e-05, 'samples': 99328, 'steps': 193, 'loss/train': 7.408548831939697} +02/24/2022 01:49:20 - INFO - codeparrot_training - Step 194: {'lr': 4.85e-05, 'samples': 99840, 'steps': 194, 'loss/train': 8.422624588012695} +02/24/2022 01:49:23 - INFO - codeparrot_training - Step 195: {'lr': 4.8750000000000006e-05, 'samples': 100352, 'steps': 195, 'loss/train': 7.857550621032715} +02/24/2022 01:49:29 - INFO - codeparrot_training - Step 196: {'lr': 4.9000000000000005e-05, 'samples': 100864, 'steps': 196, 'loss/train': 7.618260860443115} +02/24/2022 01:49:32 - INFO - codeparrot_training - Step 197: {'lr': 4.9250000000000004e-05, 'samples': 101376, 'steps': 197, 'loss/train': 8.605622291564941} +02/24/2022 01:49:37 - INFO - codeparrot_training - Step 198: {'lr': 4.9500000000000004e-05, 'samples': 101888, 'steps': 198, 'loss/train': 7.873624801635742} +02/24/2022 01:49:41 - INFO - codeparrot_training - Step 199: {'lr': 4.975e-05, 'samples': 102400, 'steps': 199, 'loss/train': 9.396079063415527} +02/24/2022 01:49:46 - INFO - codeparrot_training - Step 200: {'lr': 5e-05, 'samples': 102912, 'steps': 200, 'loss/train': 9.744832992553711} +02/24/2022 01:49:50 - INFO - codeparrot_training - Step 201: {'lr': 5.025e-05, 'samples': 103424, 'steps': 201, 'loss/train': 8.990636825561523} +02/24/2022 01:49:56 - INFO - codeparrot_training - Step 202: {'lr': 5.05e-05, 'samples': 103936, 'steps': 202, 'loss/train': 8.19791030883789} +02/24/2022 01:49:59 - INFO - codeparrot_training - Step 203: {'lr': 5.075000000000001e-05, 'samples': 104448, 'steps': 203, 'loss/train': 8.748708724975586} +02/24/2022 01:50:05 - INFO - codeparrot_training - Step 204: {'lr': 5.1e-05, 'samples': 104960, 'steps': 204, 'loss/train': 10.17846393585205} +02/24/2022 01:50:08 - INFO - codeparrot_training - Step 205: {'lr': 5.125e-05, 'samples': 105472, 'steps': 205, 'loss/train': 8.256689071655273} +02/24/2022 01:50:14 - INFO - codeparrot_training - Step 206: {'lr': 5.15e-05, 'samples': 105984, 'steps': 206, 'loss/train': 8.524040222167969} +02/24/2022 01:50:17 - INFO - codeparrot_training - Step 207: {'lr': 5.175e-05, 'samples': 106496, 'steps': 207, 'loss/train': 8.013733863830566} +02/24/2022 01:50:23 - INFO - codeparrot_training - Step 208: {'lr': 5.2e-05, 'samples': 107008, 'steps': 208, 'loss/train': 8.974383354187012} +02/24/2022 01:50:26 - INFO - codeparrot_training - Step 209: {'lr': 5.2249999999999996e-05, 'samples': 107520, 'steps': 209, 'loss/train': 7.138608932495117} +02/24/2022 01:50:32 - INFO - codeparrot_training - Step 210: {'lr': 5.25e-05, 'samples': 108032, 'steps': 210, 'loss/train': 8.761432647705078} +02/24/2022 01:50:35 - INFO - codeparrot_training - Step 211: {'lr': 5.275e-05, 'samples': 108544, 'steps': 211, 'loss/train': 8.35822868347168} +02/24/2022 01:50:41 - INFO - codeparrot_training - Step 212: {'lr': 5.3e-05, 'samples': 109056, 'steps': 212, 'loss/train': 7.769471168518066} +02/24/2022 01:50:44 - INFO - codeparrot_training - Step 213: {'lr': 5.325e-05, 'samples': 109568, 'steps': 213, 'loss/train': 7.3015055656433105} +02/24/2022 01:50:50 - INFO - codeparrot_training - Step 214: {'lr': 5.35e-05, 'samples': 110080, 'steps': 214, 'loss/train': 8.458576202392578} +02/24/2022 01:50:55 - INFO - codeparrot_training - Step 215: {'lr': 5.375e-05, 'samples': 110592, 'steps': 215, 'loss/train': 7.801426410675049} +02/24/2022 01:50:59 - INFO - codeparrot_training - Step 216: {'lr': 5.4e-05, 'samples': 111104, 'steps': 216, 'loss/train': 8.24003791809082} +02/24/2022 01:51:04 - INFO - codeparrot_training - Step 217: {'lr': 5.4250000000000004e-05, 'samples': 111616, 'steps': 217, 'loss/train': 8.818034172058105} +02/24/2022 01:51:08 - INFO - codeparrot_training - Step 218: {'lr': 5.45e-05, 'samples': 112128, 'steps': 218, 'loss/train': 7.830726146697998} +02/24/2022 01:51:13 - INFO - codeparrot_training - Step 219: {'lr': 5.475e-05, 'samples': 112640, 'steps': 219, 'loss/train': 8.32111644744873} +02/24/2022 01:51:17 - INFO - codeparrot_training - Step 220: {'lr': 5.5e-05, 'samples': 113152, 'steps': 220, 'loss/train': 7.4934258460998535} +02/24/2022 01:51:23 - INFO - codeparrot_training - Step 221: {'lr': 5.525e-05, 'samples': 113664, 'steps': 221, 'loss/train': 7.56221866607666} +02/24/2022 01:51:26 - INFO - codeparrot_training - Step 222: {'lr': 5.55e-05, 'samples': 114176, 'steps': 222, 'loss/train': 7.5190558433532715} +02/24/2022 01:51:31 - INFO - codeparrot_training - Step 223: {'lr': 5.575e-05, 'samples': 114688, 'steps': 223, 'loss/train': 7.6769304275512695} +02/24/2022 01:51:35 - INFO - codeparrot_training - Step 224: {'lr': 5.6e-05, 'samples': 115200, 'steps': 224, 'loss/train': 8.480721473693848} +02/24/2022 01:51:40 - INFO - codeparrot_training - Step 225: {'lr': 5.6250000000000005e-05, 'samples': 115712, 'steps': 225, 'loss/train': 7.005849838256836} +02/24/2022 01:51:44 - INFO - codeparrot_training - Step 226: {'lr': 5.6500000000000005e-05, 'samples': 116224, 'steps': 226, 'loss/train': 7.340949058532715} +02/24/2022 01:51:49 - INFO - codeparrot_training - Step 227: {'lr': 5.6750000000000004e-05, 'samples': 116736, 'steps': 227, 'loss/train': 8.233819961547852} +02/24/2022 01:51:53 - INFO - codeparrot_training - Step 228: {'lr': 5.7e-05, 'samples': 117248, 'steps': 228, 'loss/train': 9.251286506652832} +02/24/2022 01:51:58 - INFO - codeparrot_training - Step 229: {'lr': 5.725e-05, 'samples': 117760, 'steps': 229, 'loss/train': 7.45113468170166} +02/24/2022 01:52:01 - INFO - codeparrot_training - Step 230: {'lr': 5.75e-05, 'samples': 118272, 'steps': 230, 'loss/train': 7.624856472015381} +02/24/2022 01:52:07 - INFO - codeparrot_training - Step 231: {'lr': 5.775e-05, 'samples': 118784, 'steps': 231, 'loss/train': 8.095080375671387} +02/24/2022 01:52:11 - INFO - codeparrot_training - Step 232: {'lr': 5.800000000000001e-05, 'samples': 119296, 'steps': 232, 'loss/train': 6.90823221206665} +02/24/2022 01:52:16 - INFO - codeparrot_training - Step 233: {'lr': 5.8250000000000006e-05, 'samples': 119808, 'steps': 233, 'loss/train': 8.450687408447266} +02/24/2022 01:52:20 - INFO - codeparrot_training - Step 234: {'lr': 5.8500000000000006e-05, 'samples': 120320, 'steps': 234, 'loss/train': 7.578708648681641} +02/24/2022 01:52:25 - INFO - codeparrot_training - Step 235: {'lr': 5.875e-05, 'samples': 120832, 'steps': 235, 'loss/train': 8.322768211364746} +02/24/2022 01:52:29 - INFO - codeparrot_training - Step 236: {'lr': 5.9e-05, 'samples': 121344, 'steps': 236, 'loss/train': 7.880550861358643} +02/24/2022 01:52:35 - INFO - codeparrot_training - Step 237: {'lr': 5.925e-05, 'samples': 121856, 'steps': 237, 'loss/train': 8.012900352478027} +02/24/2022 01:52:38 - INFO - codeparrot_training - Step 238: {'lr': 5.9499999999999996e-05, 'samples': 122368, 'steps': 238, 'loss/train': 8.338845252990723} +02/24/2022 01:52:44 - INFO - codeparrot_training - Step 239: {'lr': 5.9749999999999995e-05, 'samples': 122880, 'steps': 239, 'loss/train': 8.256518363952637} +02/24/2022 01:52:47 - INFO - codeparrot_training - Step 240: {'lr': 6e-05, 'samples': 123392, 'steps': 240, 'loss/train': 8.110925674438477} +02/24/2022 01:52:53 - INFO - codeparrot_training - Step 241: {'lr': 6.025e-05, 'samples': 123904, 'steps': 241, 'loss/train': 8.951800346374512} +02/24/2022 01:52:56 - INFO - codeparrot_training - Step 242: {'lr': 6.05e-05, 'samples': 124416, 'steps': 242, 'loss/train': 8.23705768585205} +02/24/2022 01:53:02 - INFO - codeparrot_training - Step 243: {'lr': 6.075e-05, 'samples': 124928, 'steps': 243, 'loss/train': 7.9062042236328125} +02/24/2022 01:53:05 - INFO - codeparrot_training - Step 244: {'lr': 6.1e-05, 'samples': 125440, 'steps': 244, 'loss/train': 10.34733772277832} +02/24/2022 01:53:10 - INFO - codeparrot_training - Step 245: {'lr': 6.125e-05, 'samples': 125952, 'steps': 245, 'loss/train': 8.160472869873047} +02/24/2022 01:53:14 - INFO - codeparrot_training - Step 246: {'lr': 6.15e-05, 'samples': 126464, 'steps': 246, 'loss/train': 8.468631744384766} +02/24/2022 01:53:20 - INFO - codeparrot_training - Step 247: {'lr': 6.175e-05, 'samples': 126976, 'steps': 247, 'loss/train': 8.497027397155762} +02/24/2022 01:53:23 - INFO - codeparrot_training - Step 248: {'lr': 6.2e-05, 'samples': 127488, 'steps': 248, 'loss/train': 7.5156755447387695} +02/24/2022 01:53:29 - INFO - codeparrot_training - Step 249: {'lr': 6.225e-05, 'samples': 128000, 'steps': 249, 'loss/train': 8.726844787597656} +02/24/2022 01:53:32 - INFO - codeparrot_training - Step 250: {'lr': 6.25e-05, 'samples': 128512, 'steps': 250, 'loss/train': 8.230809211730957} +02/24/2022 01:53:38 - INFO - codeparrot_training - Step 251: {'lr': 6.275000000000001e-05, 'samples': 129024, 'steps': 251, 'loss/train': 8.839653015136719} +02/24/2022 01:53:41 - INFO - codeparrot_training - Step 252: {'lr': 6.3e-05, 'samples': 129536, 'steps': 252, 'loss/train': 7.990950584411621} +02/24/2022 01:53:47 - INFO - codeparrot_training - Step 253: {'lr': 6.325e-05, 'samples': 130048, 'steps': 253, 'loss/train': 7.565497398376465} +02/24/2022 01:53:50 - INFO - codeparrot_training - Step 254: {'lr': 6.35e-05, 'samples': 130560, 'steps': 254, 'loss/train': 7.676276206970215} +02/24/2022 01:53:55 - INFO - codeparrot_training - Step 255: {'lr': 6.375e-05, 'samples': 131072, 'steps': 255, 'loss/train': 8.033483505249023} +02/24/2022 01:53:59 - INFO - codeparrot_training - Step 256: {'lr': 6.4e-05, 'samples': 131584, 'steps': 256, 'loss/train': 8.045367240905762} +02/24/2022 01:54:05 - INFO - codeparrot_training - Step 257: {'lr': 6.425e-05, 'samples': 132096, 'steps': 257, 'loss/train': 8.210289001464844} +02/24/2022 01:54:08 - INFO - codeparrot_training - Step 258: {'lr': 6.450000000000001e-05, 'samples': 132608, 'steps': 258, 'loss/train': 7.078313827514648} +02/24/2022 01:54:14 - INFO - codeparrot_training - Step 259: {'lr': 6.475e-05, 'samples': 133120, 'steps': 259, 'loss/train': 9.826390266418457} +02/24/2022 01:54:18 - INFO - codeparrot_training - Step 260: {'lr': 6.500000000000001e-05, 'samples': 133632, 'steps': 260, 'loss/train': 7.573724746704102} +02/24/2022 01:54:23 - INFO - codeparrot_training - Step 261: {'lr': 6.525e-05, 'samples': 134144, 'steps': 261, 'loss/train': 8.298879623413086} +02/24/2022 01:54:26 - INFO - codeparrot_training - Step 262: {'lr': 6.55e-05, 'samples': 134656, 'steps': 262, 'loss/train': 8.041802406311035} +02/24/2022 01:54:32 - INFO - codeparrot_training - Step 263: {'lr': 6.575e-05, 'samples': 135168, 'steps': 263, 'loss/train': 8.012332916259766} +02/24/2022 01:54:35 - INFO - codeparrot_training - Step 264: {'lr': 6.6e-05, 'samples': 135680, 'steps': 264, 'loss/train': 7.702607154846191} +02/24/2022 01:54:41 - INFO - codeparrot_training - Step 265: {'lr': 6.625000000000001e-05, 'samples': 136192, 'steps': 265, 'loss/train': 8.194743156433105} +02/24/2022 01:54:44 - INFO - codeparrot_training - Step 266: {'lr': 6.65e-05, 'samples': 136704, 'steps': 266, 'loss/train': 8.723750114440918} +02/24/2022 01:54:50 - INFO - codeparrot_training - Step 267: {'lr': 6.675000000000001e-05, 'samples': 137216, 'steps': 267, 'loss/train': 7.491954326629639} +02/24/2022 01:54:54 - INFO - codeparrot_training - Step 268: {'lr': 6.7e-05, 'samples': 137728, 'steps': 268, 'loss/train': 8.32316780090332} +02/24/2022 01:54:59 - INFO - codeparrot_training - Step 269: {'lr': 6.725000000000001e-05, 'samples': 138240, 'steps': 269, 'loss/train': 7.601045608520508} +02/24/2022 01:55:03 - INFO - codeparrot_training - Step 270: {'lr': 6.75e-05, 'samples': 138752, 'steps': 270, 'loss/train': 8.582389831542969} +02/24/2022 01:55:08 - INFO - codeparrot_training - Step 271: {'lr': 6.775000000000001e-05, 'samples': 139264, 'steps': 271, 'loss/train': 7.930277347564697} +02/24/2022 01:55:12 - INFO - codeparrot_training - Step 272: {'lr': 6.800000000000001e-05, 'samples': 139776, 'steps': 272, 'loss/train': 7.568014621734619} +02/24/2022 01:55:18 - INFO - codeparrot_training - Step 273: {'lr': 6.825e-05, 'samples': 140288, 'steps': 273, 'loss/train': 7.254758358001709} +02/24/2022 01:55:21 - INFO - codeparrot_training - Step 274: {'lr': 6.850000000000001e-05, 'samples': 140800, 'steps': 274, 'loss/train': 7.01399564743042} +02/24/2022 01:55:26 - INFO - codeparrot_training - Step 275: {'lr': 6.875e-05, 'samples': 141312, 'steps': 275, 'loss/train': 7.368566989898682} +02/24/2022 01:55:30 - INFO - codeparrot_training - Step 276: {'lr': 6.900000000000001e-05, 'samples': 141824, 'steps': 276, 'loss/train': 7.3699870109558105} +02/24/2022 01:55:35 - INFO - codeparrot_training - Step 277: {'lr': 6.925e-05, 'samples': 142336, 'steps': 277, 'loss/train': 7.630679130554199} +02/24/2022 01:55:39 - INFO - codeparrot_training - Step 278: {'lr': 6.950000000000001e-05, 'samples': 142848, 'steps': 278, 'loss/train': 8.27759838104248} +02/24/2022 01:55:44 - INFO - codeparrot_training - Step 279: {'lr': 6.975e-05, 'samples': 143360, 'steps': 279, 'loss/train': 8.460628509521484} +02/24/2022 01:55:48 - INFO - codeparrot_training - Step 280: {'lr': 7.000000000000001e-05, 'samples': 143872, 'steps': 280, 'loss/train': 8.191814422607422} +02/24/2022 01:55:53 - INFO - codeparrot_training - Step 281: {'lr': 7.025000000000001e-05, 'samples': 144384, 'steps': 281, 'loss/train': 7.863226413726807} +02/24/2022 01:55:56 - INFO - codeparrot_training - Step 282: {'lr': 7.049999999999999e-05, 'samples': 144896, 'steps': 282, 'loss/train': 6.898287296295166} +02/24/2022 01:56:03 - INFO - codeparrot_training - Step 283: {'lr': 7.075e-05, 'samples': 145408, 'steps': 283, 'loss/train': 7.9318132400512695} +02/24/2022 01:56:06 - INFO - codeparrot_training - Step 284: {'lr': 7.099999999999999e-05, 'samples': 145920, 'steps': 284, 'loss/train': 8.058138847351074} +02/24/2022 01:56:12 - INFO - codeparrot_training - Step 285: {'lr': 7.125e-05, 'samples': 146432, 'steps': 285, 'loss/train': 7.8580546379089355} +02/24/2022 01:56:15 - INFO - codeparrot_training - Step 286: {'lr': 7.149999999999999e-05, 'samples': 146944, 'steps': 286, 'loss/train': 7.834128379821777} +02/24/2022 01:56:20 - INFO - codeparrot_training - Step 287: {'lr': 7.175e-05, 'samples': 147456, 'steps': 287, 'loss/train': 7.871674060821533} +02/24/2022 01:56:24 - INFO - codeparrot_training - Step 288: {'lr': 7.2e-05, 'samples': 147968, 'steps': 288, 'loss/train': 8.25328254699707} +02/24/2022 01:56:29 - INFO - codeparrot_training - Step 289: {'lr': 7.225e-05, 'samples': 148480, 'steps': 289, 'loss/train': 7.208605766296387} +02/24/2022 01:56:33 - INFO - codeparrot_training - Step 290: {'lr': 7.25e-05, 'samples': 148992, 'steps': 290, 'loss/train': 5.396283149719238} +02/24/2022 01:56:38 - INFO - codeparrot_training - Step 291: {'lr': 7.274999999999999e-05, 'samples': 149504, 'steps': 291, 'loss/train': 7.4003190994262695} +02/24/2022 01:56:42 - INFO - codeparrot_training - Step 292: {'lr': 7.3e-05, 'samples': 150016, 'steps': 292, 'loss/train': 8.524874687194824} +02/24/2022 01:56:48 - INFO - codeparrot_training - Step 293: {'lr': 7.324999999999999e-05, 'samples': 150528, 'steps': 293, 'loss/train': 5.260894775390625} +02/24/2022 01:56:52 - INFO - codeparrot_training - Step 294: {'lr': 7.35e-05, 'samples': 151040, 'steps': 294, 'loss/train': 7.498438835144043} +02/24/2022 01:56:57 - INFO - codeparrot_training - Step 295: {'lr': 7.375e-05, 'samples': 151552, 'steps': 295, 'loss/train': 7.932528972625732} +02/24/2022 01:57:00 - INFO - codeparrot_training - Step 296: {'lr': 7.4e-05, 'samples': 152064, 'steps': 296, 'loss/train': 7.992652416229248} +02/24/2022 01:57:06 - INFO - codeparrot_training - Step 297: {'lr': 7.425e-05, 'samples': 152576, 'steps': 297, 'loss/train': 7.58806037902832} +02/24/2022 01:57:09 - INFO - codeparrot_training - Step 298: {'lr': 7.45e-05, 'samples': 153088, 'steps': 298, 'loss/train': 8.329551696777344} +02/24/2022 01:57:15 - INFO - codeparrot_training - Step 299: {'lr': 7.475e-05, 'samples': 153600, 'steps': 299, 'loss/train': 7.876159191131592} +02/24/2022 01:57:19 - INFO - codeparrot_training - Step 300: {'lr': 7.5e-05, 'samples': 154112, 'steps': 300, 'loss/train': 8.205613136291504} +02/24/2022 01:57:24 - INFO - codeparrot_training - Step 301: {'lr': 7.525e-05, 'samples': 154624, 'steps': 301, 'loss/train': 8.59995174407959} +02/24/2022 01:57:28 - INFO - codeparrot_training - Step 302: {'lr': 7.55e-05, 'samples': 155136, 'steps': 302, 'loss/train': 8.156671524047852} +02/24/2022 01:57:34 - INFO - codeparrot_training - Step 303: {'lr': 7.575e-05, 'samples': 155648, 'steps': 303, 'loss/train': 7.888469696044922} +02/24/2022 01:57:37 - INFO - codeparrot_training - Step 304: {'lr': 7.6e-05, 'samples': 156160, 'steps': 304, 'loss/train': 7.206390857696533} +02/24/2022 01:57:42 - INFO - codeparrot_training - Step 305: {'lr': 7.625e-05, 'samples': 156672, 'steps': 305, 'loss/train': 8.198074340820312} +02/24/2022 01:57:46 - INFO - codeparrot_training - Step 306: {'lr': 7.65e-05, 'samples': 157184, 'steps': 306, 'loss/train': 7.702149868011475} +02/24/2022 01:57:51 - INFO - codeparrot_training - Step 307: {'lr': 7.675e-05, 'samples': 157696, 'steps': 307, 'loss/train': 8.235662460327148} +02/24/2022 01:57:55 - INFO - codeparrot_training - Step 308: {'lr': 7.7e-05, 'samples': 158208, 'steps': 308, 'loss/train': 8.55713939666748} +02/24/2022 01:58:00 - INFO - codeparrot_training - Step 309: {'lr': 7.725000000000001e-05, 'samples': 158720, 'steps': 309, 'loss/train': 7.453092098236084} +02/24/2022 01:58:04 - INFO - codeparrot_training - Step 310: {'lr': 7.75e-05, 'samples': 159232, 'steps': 310, 'loss/train': 7.088987350463867} +02/24/2022 01:58:09 - INFO - codeparrot_training - Step 311: {'lr': 7.775e-05, 'samples': 159744, 'steps': 311, 'loss/train': 5.286121368408203} +02/24/2022 01:58:13 - INFO - codeparrot_training - Step 312: {'lr': 7.8e-05, 'samples': 160256, 'steps': 312, 'loss/train': 7.3425517082214355} +02/24/2022 01:58:19 - INFO - codeparrot_training - Step 313: {'lr': 7.825e-05, 'samples': 160768, 'steps': 313, 'loss/train': 8.444873809814453} +02/24/2022 01:58:25 - INFO - codeparrot_training - Step 314: {'lr': 7.85e-05, 'samples': 161280, 'steps': 314, 'loss/train': 9.051511764526367} +02/24/2022 01:58:28 - INFO - codeparrot_training - Step 315: {'lr': 7.875e-05, 'samples': 161792, 'steps': 315, 'loss/train': 8.298068046569824} +02/24/2022 01:58:32 - INFO - codeparrot_training - Step 316: {'lr': 7.9e-05, 'samples': 162304, 'steps': 316, 'loss/train': 7.379024028778076} +02/24/2022 01:58:37 - INFO - codeparrot_training - Step 317: {'lr': 7.925e-05, 'samples': 162816, 'steps': 317, 'loss/train': 8.077301979064941} +02/24/2022 01:58:41 - INFO - codeparrot_training - Step 318: {'lr': 7.950000000000001e-05, 'samples': 163328, 'steps': 318, 'loss/train': 8.417896270751953} +02/24/2022 01:58:46 - INFO - codeparrot_training - Step 319: {'lr': 7.975e-05, 'samples': 163840, 'steps': 319, 'loss/train': 7.594058513641357} +02/24/2022 01:58:49 - INFO - codeparrot_training - Step 320: {'lr': 8e-05, 'samples': 164352, 'steps': 320, 'loss/train': 8.477936744689941} +02/24/2022 01:58:55 - INFO - codeparrot_training - Step 321: {'lr': 8.025e-05, 'samples': 164864, 'steps': 321, 'loss/train': 8.0873384475708} +02/24/2022 01:58:58 - INFO - codeparrot_training - Step 322: {'lr': 8.05e-05, 'samples': 165376, 'steps': 322, 'loss/train': 7.719216823577881} +02/24/2022 01:59:04 - INFO - codeparrot_training - Step 323: {'lr': 8.075e-05, 'samples': 165888, 'steps': 323, 'loss/train': 7.951709270477295} +02/24/2022 01:59:07 - INFO - codeparrot_training - Step 324: {'lr': 8.1e-05, 'samples': 166400, 'steps': 324, 'loss/train': 7.829857349395752} +02/24/2022 01:59:14 - INFO - codeparrot_training - Step 325: {'lr': 8.125000000000001e-05, 'samples': 166912, 'steps': 325, 'loss/train': 8.28011703491211} +02/24/2022 01:59:17 - INFO - codeparrot_training - Step 326: {'lr': 8.15e-05, 'samples': 167424, 'steps': 326, 'loss/train': 8.2847261428833} +02/24/2022 01:59:23 - INFO - codeparrot_training - Step 327: {'lr': 8.175000000000001e-05, 'samples': 167936, 'steps': 327, 'loss/train': 7.977908611297607} +02/24/2022 01:59:26 - INFO - codeparrot_training - Step 328: {'lr': 8.2e-05, 'samples': 168448, 'steps': 328, 'loss/train': 6.0809855461120605} +02/24/2022 01:59:31 - INFO - codeparrot_training - Step 329: {'lr': 8.225000000000001e-05, 'samples': 168960, 'steps': 329, 'loss/train': 7.6018290519714355} +02/24/2022 01:59:35 - INFO - codeparrot_training - Step 330: {'lr': 8.25e-05, 'samples': 169472, 'steps': 330, 'loss/train': 7.403947353363037} +02/24/2022 01:59:40 - INFO - codeparrot_training - Step 331: {'lr': 8.275e-05, 'samples': 169984, 'steps': 331, 'loss/train': 8.224590301513672} +02/24/2022 01:59:44 - INFO - codeparrot_training - Step 332: {'lr': 8.300000000000001e-05, 'samples': 170496, 'steps': 332, 'loss/train': 7.179081916809082} +02/24/2022 01:59:49 - INFO - codeparrot_training - Step 333: {'lr': 8.325e-05, 'samples': 171008, 'steps': 333, 'loss/train': 7.664137363433838} +02/24/2022 01:59:55 - INFO - codeparrot_training - Step 334: {'lr': 8.350000000000001e-05, 'samples': 171520, 'steps': 334, 'loss/train': 6.750515937805176} +02/24/2022 01:59:58 - INFO - codeparrot_training - Step 335: {'lr': 8.375e-05, 'samples': 172032, 'steps': 335, 'loss/train': 7.544463634490967} +02/24/2022 02:00:04 - INFO - codeparrot_training - Step 336: {'lr': 8.400000000000001e-05, 'samples': 172544, 'steps': 336, 'loss/train': 6.866092205047607} +02/24/2022 02:00:07 - INFO - codeparrot_training - Step 337: {'lr': 8.425e-05, 'samples': 173056, 'steps': 337, 'loss/train': 7.727400302886963} +02/24/2022 02:00:13 - INFO - codeparrot_training - Step 338: {'lr': 8.450000000000001e-05, 'samples': 173568, 'steps': 338, 'loss/train': 7.797516345977783} +02/24/2022 02:00:16 - INFO - codeparrot_training - Step 339: {'lr': 8.475000000000001e-05, 'samples': 174080, 'steps': 339, 'loss/train': 7.595932960510254} +02/24/2022 02:00:22 - INFO - codeparrot_training - Step 340: {'lr': 8.5e-05, 'samples': 174592, 'steps': 340, 'loss/train': 7.032388687133789} +02/24/2022 02:00:25 - INFO - codeparrot_training - Step 341: {'lr': 8.525000000000001e-05, 'samples': 175104, 'steps': 341, 'loss/train': 7.881145477294922} +02/24/2022 02:00:31 - INFO - codeparrot_training - Step 342: {'lr': 8.55e-05, 'samples': 175616, 'steps': 342, 'loss/train': 7.786242961883545} +02/24/2022 02:00:34 - INFO - codeparrot_training - Step 343: {'lr': 8.575000000000001e-05, 'samples': 176128, 'steps': 343, 'loss/train': 7.198973178863525} +02/24/2022 02:00:40 - INFO - codeparrot_training - Step 344: {'lr': 8.599999999999999e-05, 'samples': 176640, 'steps': 344, 'loss/train': 8.599320411682129} +02/24/2022 02:00:43 - INFO - codeparrot_training - Step 345: {'lr': 8.625e-05, 'samples': 177152, 'steps': 345, 'loss/train': 7.57554817199707} +02/24/2022 02:00:49 - INFO - codeparrot_training - Step 346: {'lr': 8.65e-05, 'samples': 177664, 'steps': 346, 'loss/train': 8.224437713623047} +02/24/2022 02:00:52 - INFO - codeparrot_training - Step 347: {'lr': 8.675e-05, 'samples': 178176, 'steps': 347, 'loss/train': 7.841677188873291} +02/24/2022 02:00:58 - INFO - codeparrot_training - Step 348: {'lr': 8.7e-05, 'samples': 178688, 'steps': 348, 'loss/train': 7.63005256652832} +02/24/2022 02:01:01 - INFO - codeparrot_training - Step 349: {'lr': 8.724999999999999e-05, 'samples': 179200, 'steps': 349, 'loss/train': 8.130577087402344} +02/24/2022 02:01:07 - INFO - codeparrot_training - Step 350: {'lr': 8.75e-05, 'samples': 179712, 'steps': 350, 'loss/train': 7.222663402557373} +02/24/2022 02:01:10 - INFO - codeparrot_training - Step 351: {'lr': 8.774999999999999e-05, 'samples': 180224, 'steps': 351, 'loss/train': 7.223997116088867} +02/24/2022 02:01:16 - INFO - codeparrot_training - Step 352: {'lr': 8.8e-05, 'samples': 180736, 'steps': 352, 'loss/train': 7.704534530639648} +02/24/2022 02:01:19 - INFO - codeparrot_training - Step 353: {'lr': 8.824999999999999e-05, 'samples': 181248, 'steps': 353, 'loss/train': 8.424806594848633} +02/24/2022 02:01:25 - INFO - codeparrot_training - Step 354: {'lr': 8.85e-05, 'samples': 181760, 'steps': 354, 'loss/train': 8.343356132507324} +02/24/2022 02:01:28 - INFO - codeparrot_training - Step 355: {'lr': 8.875e-05, 'samples': 182272, 'steps': 355, 'loss/train': 7.833737850189209} +02/24/2022 02:01:34 - INFO - codeparrot_training - Step 356: {'lr': 8.9e-05, 'samples': 182784, 'steps': 356, 'loss/train': 4.860916614532471} +02/24/2022 02:01:37 - INFO - codeparrot_training - Step 357: {'lr': 8.925e-05, 'samples': 183296, 'steps': 357, 'loss/train': 7.812404632568359} +02/24/2022 02:01:42 - INFO - codeparrot_training - Step 358: {'lr': 8.95e-05, 'samples': 183808, 'steps': 358, 'loss/train': 8.174966812133789} +02/24/2022 02:01:46 - INFO - codeparrot_training - Step 359: {'lr': 8.975e-05, 'samples': 184320, 'steps': 359, 'loss/train': 7.480123043060303} +02/24/2022 02:01:52 - INFO - codeparrot_training - Step 360: {'lr': 8.999999999999999e-05, 'samples': 184832, 'steps': 360, 'loss/train': 8.104669570922852} +02/24/2022 02:01:55 - INFO - codeparrot_training - Step 361: {'lr': 9.025e-05, 'samples': 185344, 'steps': 361, 'loss/train': 7.596373558044434} +02/24/2022 02:02:01 - INFO - codeparrot_training - Step 362: {'lr': 9.05e-05, 'samples': 185856, 'steps': 362, 'loss/train': 7.922679901123047} +02/24/2022 02:02:04 - INFO - codeparrot_training - Step 363: {'lr': 9.075e-05, 'samples': 186368, 'steps': 363, 'loss/train': 7.51069450378418} +02/24/2022 02:02:10 - INFO - codeparrot_training - Step 364: {'lr': 9.1e-05, 'samples': 186880, 'steps': 364, 'loss/train': 7.531533241271973} +02/24/2022 02:02:14 - INFO - codeparrot_training - Step 365: {'lr': 9.125e-05, 'samples': 187392, 'steps': 365, 'loss/train': 8.281204223632812} +02/24/2022 02:02:19 - INFO - codeparrot_training - Step 366: {'lr': 9.15e-05, 'samples': 187904, 'steps': 366, 'loss/train': 7.3093342781066895} +02/24/2022 02:02:23 - INFO - codeparrot_training - Step 367: {'lr': 9.175e-05, 'samples': 188416, 'steps': 367, 'loss/train': 8.077080726623535} +02/24/2022 02:02:28 - INFO - codeparrot_training - Step 368: {'lr': 9.2e-05, 'samples': 188928, 'steps': 368, 'loss/train': 7.250057697296143} +02/24/2022 02:02:31 - INFO - codeparrot_training - Step 369: {'lr': 9.225e-05, 'samples': 189440, 'steps': 369, 'loss/train': 7.866724967956543} +02/24/2022 02:02:37 - INFO - codeparrot_training - Step 370: {'lr': 9.25e-05, 'samples': 189952, 'steps': 370, 'loss/train': 6.862495422363281} +02/24/2022 02:02:41 - INFO - codeparrot_training - Step 371: {'lr': 9.275e-05, 'samples': 190464, 'steps': 371, 'loss/train': 8.284356117248535} +02/24/2022 02:02:46 - INFO - codeparrot_training - Step 372: {'lr': 9.3e-05, 'samples': 190976, 'steps': 372, 'loss/train': 7.869273662567139} +02/24/2022 02:02:50 - INFO - codeparrot_training - Step 373: {'lr': 9.325e-05, 'samples': 191488, 'steps': 373, 'loss/train': 8.418437957763672} +02/24/2022 02:02:55 - INFO - codeparrot_training - Step 374: {'lr': 9.35e-05, 'samples': 192000, 'steps': 374, 'loss/train': 10.001588821411133} +02/24/2022 02:02:59 - INFO - codeparrot_training - Step 375: {'lr': 9.375e-05, 'samples': 192512, 'steps': 375, 'loss/train': 7.9266862869262695} +02/24/2022 02:03:04 - INFO - codeparrot_training - Step 376: {'lr': 9.400000000000001e-05, 'samples': 193024, 'steps': 376, 'loss/train': 8.20534896850586} +02/24/2022 02:03:08 - INFO - codeparrot_training - Step 377: {'lr': 9.425e-05, 'samples': 193536, 'steps': 377, 'loss/train': 7.760704040527344} +02/24/2022 02:03:13 - INFO - codeparrot_training - Step 378: {'lr': 9.45e-05, 'samples': 194048, 'steps': 378, 'loss/train': 8.39869213104248} +02/24/2022 02:03:16 - INFO - codeparrot_training - Step 379: {'lr': 9.475e-05, 'samples': 194560, 'steps': 379, 'loss/train': 7.1428704261779785} +02/24/2022 02:03:23 - INFO - codeparrot_training - Step 380: {'lr': 9.5e-05, 'samples': 195072, 'steps': 380, 'loss/train': 7.34848690032959} +02/24/2022 02:03:26 - INFO - codeparrot_training - Step 381: {'lr': 9.525e-05, 'samples': 195584, 'steps': 381, 'loss/train': 7.505448341369629} +02/24/2022 02:03:32 - INFO - codeparrot_training - Step 382: {'lr': 9.55e-05, 'samples': 196096, 'steps': 382, 'loss/train': 7.698422908782959} +02/24/2022 02:03:35 - INFO - codeparrot_training - Step 383: {'lr': 9.575000000000001e-05, 'samples': 196608, 'steps': 383, 'loss/train': 8.264890670776367} +02/24/2022 02:03:40 - INFO - codeparrot_training - Step 384: {'lr': 9.6e-05, 'samples': 197120, 'steps': 384, 'loss/train': 7.688831329345703} +02/24/2022 02:03:44 - INFO - codeparrot_training - Step 385: {'lr': 9.625000000000001e-05, 'samples': 197632, 'steps': 385, 'loss/train': 7.142866134643555} +02/24/2022 02:03:50 - INFO - codeparrot_training - Step 386: {'lr': 9.65e-05, 'samples': 198144, 'steps': 386, 'loss/train': 7.939741611480713} +02/24/2022 02:03:53 - INFO - codeparrot_training - Step 387: {'lr': 9.675000000000001e-05, 'samples': 198656, 'steps': 387, 'loss/train': 7.401408672332764} +02/24/2022 02:03:59 - INFO - codeparrot_training - Step 388: {'lr': 9.7e-05, 'samples': 199168, 'steps': 388, 'loss/train': 7.98380708694458} +02/24/2022 02:04:02 - INFO - codeparrot_training - Step 389: {'lr': 9.725e-05, 'samples': 199680, 'steps': 389, 'loss/train': 5.516870498657227} +02/24/2022 02:04:08 - INFO - codeparrot_training - Step 390: {'lr': 9.750000000000001e-05, 'samples': 200192, 'steps': 390, 'loss/train': 8.51853084564209} +02/24/2022 02:04:13 - INFO - codeparrot_training - Step 391: {'lr': 9.775e-05, 'samples': 200704, 'steps': 391, 'loss/train': 7.015237331390381} +02/24/2022 02:04:17 - INFO - codeparrot_training - Step 392: {'lr': 9.800000000000001e-05, 'samples': 201216, 'steps': 392, 'loss/train': 9.577959060668945} +02/24/2022 02:04:22 - INFO - codeparrot_training - Step 393: {'lr': 9.825e-05, 'samples': 201728, 'steps': 393, 'loss/train': 7.536276817321777} +02/24/2022 02:04:25 - INFO - codeparrot_training - Step 394: {'lr': 9.850000000000001e-05, 'samples': 202240, 'steps': 394, 'loss/train': 7.226565837860107} +02/24/2022 02:04:32 - INFO - codeparrot_training - Step 395: {'lr': 9.875e-05, 'samples': 202752, 'steps': 395, 'loss/train': 8.628961563110352} +02/24/2022 02:04:35 - INFO - codeparrot_training - Step 396: {'lr': 9.900000000000001e-05, 'samples': 203264, 'steps': 396, 'loss/train': 7.343240737915039} +02/24/2022 02:04:40 - INFO - codeparrot_training - Step 397: {'lr': 9.925000000000001e-05, 'samples': 203776, 'steps': 397, 'loss/train': 7.60067081451416} +02/24/2022 02:04:44 - INFO - codeparrot_training - Step 398: {'lr': 9.95e-05, 'samples': 204288, 'steps': 398, 'loss/train': 7.258427143096924} +02/24/2022 02:04:49 - INFO - codeparrot_training - Step 399: {'lr': 9.975000000000001e-05, 'samples': 204800, 'steps': 399, 'loss/train': 6.988162517547607} +02/24/2022 02:04:53 - INFO - codeparrot_training - Step 400: {'lr': 0.0001, 'samples': 205312, 'steps': 400, 'loss/train': 7.182863235473633} +02/24/2022 02:04:58 - INFO - codeparrot_training - Step 401: {'lr': 0.00010025000000000001, 'samples': 205824, 'steps': 401, 'loss/train': 7.117374420166016} +02/24/2022 02:05:02 - INFO - codeparrot_training - Step 402: {'lr': 0.0001005, 'samples': 206336, 'steps': 402, 'loss/train': 7.725515842437744} +02/24/2022 02:05:07 - INFO - codeparrot_training - Step 403: {'lr': 0.00010075000000000001, 'samples': 206848, 'steps': 403, 'loss/train': 7.501499176025391} +02/24/2022 02:05:10 - INFO - codeparrot_training - Step 404: {'lr': 0.000101, 'samples': 207360, 'steps': 404, 'loss/train': 8.18635082244873} +02/24/2022 02:05:17 - INFO - codeparrot_training - Step 405: {'lr': 0.00010125000000000001, 'samples': 207872, 'steps': 405, 'loss/train': 8.187431335449219} +02/24/2022 02:05:20 - INFO - codeparrot_training - Step 406: {'lr': 0.00010150000000000001, 'samples': 208384, 'steps': 406, 'loss/train': 7.888332366943359} +02/24/2022 02:05:26 - INFO - codeparrot_training - Step 407: {'lr': 0.00010174999999999999, 'samples': 208896, 'steps': 407, 'loss/train': 7.384456157684326} +02/24/2022 02:05:29 - INFO - codeparrot_training - Step 408: {'lr': 0.000102, 'samples': 209408, 'steps': 408, 'loss/train': 7.803598403930664} +02/24/2022 02:05:35 - INFO - codeparrot_training - Step 409: {'lr': 0.00010224999999999999, 'samples': 209920, 'steps': 409, 'loss/train': 7.0473527908325195} +02/24/2022 02:05:38 - INFO - codeparrot_training - Step 410: {'lr': 0.0001025, 'samples': 210432, 'steps': 410, 'loss/train': 9.248546600341797} +02/24/2022 02:05:43 - INFO - codeparrot_training - Step 411: {'lr': 0.00010274999999999999, 'samples': 210944, 'steps': 411, 'loss/train': 7.117498397827148} +02/24/2022 02:05:47 - INFO - codeparrot_training - Step 412: {'lr': 0.000103, 'samples': 211456, 'steps': 412, 'loss/train': 7.365967273712158} +02/24/2022 02:05:52 - INFO - codeparrot_training - Step 413: {'lr': 0.00010325, 'samples': 211968, 'steps': 413, 'loss/train': 8.108454704284668} +02/24/2022 02:05:56 - INFO - codeparrot_training - Step 414: {'lr': 0.0001035, 'samples': 212480, 'steps': 414, 'loss/train': 7.5737385749816895} +02/24/2022 02:06:03 - INFO - codeparrot_training - Step 415: {'lr': 0.00010375, 'samples': 212992, 'steps': 415, 'loss/train': 8.08419132232666} +02/24/2022 02:06:06 - INFO - codeparrot_training - Step 416: {'lr': 0.000104, 'samples': 213504, 'steps': 416, 'loss/train': 7.4956183433532715} +02/24/2022 02:06:11 - INFO - codeparrot_training - Step 417: {'lr': 0.00010425, 'samples': 214016, 'steps': 417, 'loss/train': 7.642014503479004} +02/24/2022 02:06:15 - INFO - codeparrot_training - Step 418: {'lr': 0.00010449999999999999, 'samples': 214528, 'steps': 418, 'loss/train': 7.403052806854248} +02/24/2022 02:06:20 - INFO - codeparrot_training - Step 419: {'lr': 0.00010475, 'samples': 215040, 'steps': 419, 'loss/train': 7.374420166015625} +02/24/2022 02:06:24 - INFO - codeparrot_training - Step 420: {'lr': 0.000105, 'samples': 215552, 'steps': 420, 'loss/train': 7.816165924072266} +02/24/2022 02:06:29 - INFO - codeparrot_training - Step 421: {'lr': 0.00010525, 'samples': 216064, 'steps': 421, 'loss/train': 6.225070953369141} +02/24/2022 02:06:33 - INFO - codeparrot_training - Step 422: {'lr': 0.0001055, 'samples': 216576, 'steps': 422, 'loss/train': 7.872852325439453} +02/24/2022 02:06:38 - INFO - codeparrot_training - Step 423: {'lr': 0.00010575, 'samples': 217088, 'steps': 423, 'loss/train': 7.829193115234375} +02/24/2022 02:06:42 - INFO - codeparrot_training - Step 424: {'lr': 0.000106, 'samples': 217600, 'steps': 424, 'loss/train': 8.13473892211914} +02/24/2022 02:06:48 - INFO - codeparrot_training - Step 425: {'lr': 0.00010625, 'samples': 218112, 'steps': 425, 'loss/train': 3.2847564220428467} +02/24/2022 02:06:51 - INFO - codeparrot_training - Step 426: {'lr': 0.0001065, 'samples': 218624, 'steps': 426, 'loss/train': 6.639157295227051} +02/24/2022 02:06:57 - INFO - codeparrot_training - Step 427: {'lr': 0.00010675, 'samples': 219136, 'steps': 427, 'loss/train': 8.147637367248535} +02/24/2022 02:07:00 - INFO - codeparrot_training - Step 428: {'lr': 0.000107, 'samples': 219648, 'steps': 428, 'loss/train': 7.7012457847595215} +02/24/2022 02:07:06 - INFO - codeparrot_training - Step 429: {'lr': 0.00010725, 'samples': 220160, 'steps': 429, 'loss/train': 7.161632061004639} +02/24/2022 02:07:09 - INFO - codeparrot_training - Step 430: {'lr': 0.0001075, 'samples': 220672, 'steps': 430, 'loss/train': 7.4439873695373535} +02/24/2022 02:07:14 - INFO - codeparrot_training - Step 431: {'lr': 0.00010775, 'samples': 221184, 'steps': 431, 'loss/train': 8.414506912231445} +02/24/2022 02:07:18 - INFO - codeparrot_training - Step 432: {'lr': 0.000108, 'samples': 221696, 'steps': 432, 'loss/train': 7.436773777008057} +02/24/2022 02:07:23 - INFO - codeparrot_training - Step 433: {'lr': 0.00010825, 'samples': 222208, 'steps': 433, 'loss/train': 7.460289478302002} +02/24/2022 02:07:27 - INFO - codeparrot_training - Step 434: {'lr': 0.00010850000000000001, 'samples': 222720, 'steps': 434, 'loss/train': 8.266501426696777} +02/24/2022 02:07:32 - INFO - codeparrot_training - Step 435: {'lr': 0.00010875, 'samples': 223232, 'steps': 435, 'loss/train': 8.083974838256836} +02/24/2022 02:07:36 - INFO - codeparrot_training - Step 436: {'lr': 0.000109, 'samples': 223744, 'steps': 436, 'loss/train': 8.220951080322266} +02/24/2022 02:07:41 - INFO - codeparrot_training - Step 437: {'lr': 0.00010925, 'samples': 224256, 'steps': 437, 'loss/train': 6.722242832183838} +02/24/2022 02:07:45 - INFO - codeparrot_training - Step 438: {'lr': 0.0001095, 'samples': 224768, 'steps': 438, 'loss/train': 6.867252826690674} +02/24/2022 02:07:50 - INFO - codeparrot_training - Step 439: {'lr': 0.00010975, 'samples': 225280, 'steps': 439, 'loss/train': 6.986952304840088} +02/24/2022 02:07:54 - INFO - codeparrot_training - Step 440: {'lr': 0.00011, 'samples': 225792, 'steps': 440, 'loss/train': 7.2183146476745605} +02/24/2022 02:08:00 - INFO - codeparrot_training - Step 441: {'lr': 0.00011025, 'samples': 226304, 'steps': 441, 'loss/train': 7.320436954498291} +02/24/2022 02:08:03 - INFO - codeparrot_training - Step 442: {'lr': 0.0001105, 'samples': 226816, 'steps': 442, 'loss/train': 6.413392543792725} +02/24/2022 02:08:09 - INFO - codeparrot_training - Step 443: {'lr': 0.00011075000000000001, 'samples': 227328, 'steps': 443, 'loss/train': 7.322403430938721} +02/24/2022 02:08:12 - INFO - codeparrot_training - Step 444: {'lr': 0.000111, 'samples': 227840, 'steps': 444, 'loss/train': 7.285653114318848} +02/24/2022 02:08:18 - INFO - codeparrot_training - Step 445: {'lr': 0.00011125000000000001, 'samples': 228352, 'steps': 445, 'loss/train': 7.396255016326904} +02/24/2022 02:08:21 - INFO - codeparrot_training - Step 446: {'lr': 0.0001115, 'samples': 228864, 'steps': 446, 'loss/train': 7.2955827713012695} +02/24/2022 02:08:27 - INFO - codeparrot_training - Step 447: {'lr': 0.00011175, 'samples': 229376, 'steps': 447, 'loss/train': 4.9974751472473145} +02/24/2022 02:08:30 - INFO - codeparrot_training - Step 448: {'lr': 0.000112, 'samples': 229888, 'steps': 448, 'loss/train': 7.790682792663574} +02/24/2022 02:08:36 - INFO - codeparrot_training - Step 449: {'lr': 0.00011225, 'samples': 230400, 'steps': 449, 'loss/train': 7.6659979820251465} +02/24/2022 02:08:39 - INFO - codeparrot_training - Step 450: {'lr': 0.00011250000000000001, 'samples': 230912, 'steps': 450, 'loss/train': 7.553841590881348} +02/24/2022 02:08:45 - INFO - codeparrot_training - Step 451: {'lr': 0.00011275, 'samples': 231424, 'steps': 451, 'loss/train': 7.629961013793945} +02/24/2022 02:08:50 - INFO - codeparrot_training - Step 452: {'lr': 0.00011300000000000001, 'samples': 231936, 'steps': 452, 'loss/train': 4.840542316436768} +02/24/2022 02:08:54 - INFO - codeparrot_training - Step 453: {'lr': 0.00011325, 'samples': 232448, 'steps': 453, 'loss/train': 7.533297538757324} +02/24/2022 02:08:59 - INFO - codeparrot_training - Step 454: {'lr': 0.00011350000000000001, 'samples': 232960, 'steps': 454, 'loss/train': 7.914617538452148} +02/24/2022 02:09:03 - INFO - codeparrot_training - Step 455: {'lr': 0.00011375, 'samples': 233472, 'steps': 455, 'loss/train': 6.7937188148498535} +02/24/2022 02:09:08 - INFO - codeparrot_training - Step 456: {'lr': 0.000114, 'samples': 233984, 'steps': 456, 'loss/train': 6.527731418609619} +02/24/2022 02:09:12 - INFO - codeparrot_training - Step 457: {'lr': 0.00011425000000000001, 'samples': 234496, 'steps': 457, 'loss/train': 7.973141670227051} +02/24/2022 02:09:17 - INFO - codeparrot_training - Step 458: {'lr': 0.0001145, 'samples': 235008, 'steps': 458, 'loss/train': 7.240002155303955} +02/24/2022 02:09:21 - INFO - codeparrot_training - Step 459: {'lr': 0.00011475000000000001, 'samples': 235520, 'steps': 459, 'loss/train': 7.077408790588379} +02/24/2022 02:09:27 - INFO - codeparrot_training - Step 460: {'lr': 0.000115, 'samples': 236032, 'steps': 460, 'loss/train': 7.475757598876953} +02/24/2022 02:09:30 - INFO - codeparrot_training - Step 461: {'lr': 0.00011525000000000001, 'samples': 236544, 'steps': 461, 'loss/train': 7.6415300369262695} +02/24/2022 02:09:36 - INFO - codeparrot_training - Step 462: {'lr': 0.0001155, 'samples': 237056, 'steps': 462, 'loss/train': 8.935208320617676} +02/24/2022 02:09:39 - INFO - codeparrot_training - Step 463: {'lr': 0.00011575000000000001, 'samples': 237568, 'steps': 463, 'loss/train': 6.987576961517334} +02/24/2022 02:09:45 - INFO - codeparrot_training - Step 464: {'lr': 0.00011600000000000001, 'samples': 238080, 'steps': 464, 'loss/train': 7.757630825042725} +02/24/2022 02:09:48 - INFO - codeparrot_training - Step 465: {'lr': 0.00011625, 'samples': 238592, 'steps': 465, 'loss/train': 7.162644386291504} +02/24/2022 02:09:54 - INFO - codeparrot_training - Step 466: {'lr': 0.00011650000000000001, 'samples': 239104, 'steps': 466, 'loss/train': 8.270011901855469} +02/24/2022 02:09:57 - INFO - codeparrot_training - Step 467: {'lr': 0.00011675, 'samples': 239616, 'steps': 467, 'loss/train': 6.218019008636475} +02/24/2022 02:10:03 - INFO - codeparrot_training - Step 468: {'lr': 0.00011700000000000001, 'samples': 240128, 'steps': 468, 'loss/train': 6.125336647033691} +02/24/2022 02:10:06 - INFO - codeparrot_training - Step 469: {'lr': 0.00011724999999999999, 'samples': 240640, 'steps': 469, 'loss/train': 7.7407145500183105} +02/24/2022 02:10:12 - INFO - codeparrot_training - Step 470: {'lr': 0.0001175, 'samples': 241152, 'steps': 470, 'loss/train': 4.7091450691223145} +02/24/2022 02:10:15 - INFO - codeparrot_training - Step 471: {'lr': 0.00011775, 'samples': 241664, 'steps': 471, 'loss/train': 7.565977096557617} +02/24/2022 02:10:21 - INFO - codeparrot_training - Step 472: {'lr': 0.000118, 'samples': 242176, 'steps': 472, 'loss/train': 8.618671417236328} +02/24/2022 02:10:25 - INFO - codeparrot_training - Step 473: {'lr': 0.00011825, 'samples': 242688, 'steps': 473, 'loss/train': 7.5694475173950195} +02/24/2022 02:10:30 - INFO - codeparrot_training - Step 474: {'lr': 0.0001185, 'samples': 243200, 'steps': 474, 'loss/train': 6.634157657623291} +02/24/2022 02:10:34 - INFO - codeparrot_training - Step 475: {'lr': 0.00011875, 'samples': 243712, 'steps': 475, 'loss/train': 7.126220226287842} +02/24/2022 02:10:39 - INFO - codeparrot_training - Step 476: {'lr': 0.00011899999999999999, 'samples': 244224, 'steps': 476, 'loss/train': 7.417023181915283} +02/24/2022 02:10:43 - INFO - codeparrot_training - Step 477: {'lr': 0.00011925, 'samples': 244736, 'steps': 477, 'loss/train': 8.853533744812012} +02/24/2022 02:10:48 - INFO - codeparrot_training - Step 478: {'lr': 0.00011949999999999999, 'samples': 245248, 'steps': 478, 'loss/train': 6.4358439445495605} +02/24/2022 02:10:52 - INFO - codeparrot_training - Step 479: {'lr': 0.00011975, 'samples': 245760, 'steps': 479, 'loss/train': 8.242326736450195} +02/24/2022 02:10:57 - INFO - codeparrot_training - Step 480: {'lr': 0.00012, 'samples': 246272, 'steps': 480, 'loss/train': 7.858012676239014} +02/24/2022 02:11:01 - INFO - codeparrot_training - Step 481: {'lr': 0.00012025, 'samples': 246784, 'steps': 481, 'loss/train': 7.545612335205078} +02/24/2022 02:11:06 - INFO - codeparrot_training - Step 482: {'lr': 0.0001205, 'samples': 247296, 'steps': 482, 'loss/train': 7.130396842956543} +02/24/2022 02:11:10 - INFO - codeparrot_training - Step 483: {'lr': 0.00012075, 'samples': 247808, 'steps': 483, 'loss/train': 6.627236366271973} +02/24/2022 02:11:15 - INFO - codeparrot_training - Step 484: {'lr': 0.000121, 'samples': 248320, 'steps': 484, 'loss/train': 7.135700225830078} +02/24/2022 02:11:19 - INFO - codeparrot_training - Step 485: {'lr': 0.00012124999999999999, 'samples': 248832, 'steps': 485, 'loss/train': 7.578171253204346} +02/24/2022 02:11:24 - INFO - codeparrot_training - Step 486: {'lr': 0.0001215, 'samples': 249344, 'steps': 486, 'loss/train': 7.24437141418457} +02/24/2022 02:11:28 - INFO - codeparrot_training - Step 487: {'lr': 0.00012175, 'samples': 249856, 'steps': 487, 'loss/train': 4.430206775665283} +02/24/2022 02:11:34 - INFO - codeparrot_training - Step 488: {'lr': 0.000122, 'samples': 250368, 'steps': 488, 'loss/train': 7.954636096954346} +02/24/2022 02:11:37 - INFO - codeparrot_training - Step 489: {'lr': 0.00012225, 'samples': 250880, 'steps': 489, 'loss/train': 6.426181793212891} +02/24/2022 02:11:43 - INFO - codeparrot_training - Step 490: {'lr': 0.0001225, 'samples': 251392, 'steps': 490, 'loss/train': 7.321078300476074} +02/24/2022 02:11:46 - INFO - codeparrot_training - Step 491: {'lr': 0.00012275, 'samples': 251904, 'steps': 491, 'loss/train': 7.457513332366943} +02/24/2022 02:11:52 - INFO - codeparrot_training - Step 492: {'lr': 0.000123, 'samples': 252416, 'steps': 492, 'loss/train': 7.4673991203308105} +02/24/2022 02:11:55 - INFO - codeparrot_training - Step 493: {'lr': 0.00012325000000000001, 'samples': 252928, 'steps': 493, 'loss/train': 7.5666327476501465} +02/24/2022 02:12:01 - INFO - codeparrot_training - Step 494: {'lr': 0.0001235, 'samples': 253440, 'steps': 494, 'loss/train': 6.872106552124023} +02/24/2022 02:12:04 - INFO - codeparrot_training - Step 495: {'lr': 0.00012375, 'samples': 253952, 'steps': 495, 'loss/train': 7.914139270782471} +02/24/2022 02:12:10 - INFO - codeparrot_training - Step 496: {'lr': 0.000124, 'samples': 254464, 'steps': 496, 'loss/train': 7.269467830657959} +02/24/2022 02:12:13 - INFO - codeparrot_training - Step 497: {'lr': 0.00012425, 'samples': 254976, 'steps': 497, 'loss/train': 7.180480003356934} +02/24/2022 02:12:19 - INFO - codeparrot_training - Step 498: {'lr': 0.0001245, 'samples': 255488, 'steps': 498, 'loss/train': 7.44936466217041} +02/24/2022 02:12:22 - INFO - codeparrot_training - Step 499: {'lr': 0.00012475, 'samples': 256000, 'steps': 499, 'loss/train': 7.441803455352783} +02/24/2022 02:12:28 - INFO - codeparrot_training - Step 500: {'lr': 0.000125, 'samples': 256512, 'steps': 500, 'loss/train': 6.37514066696167} +02/24/2022 02:12:31 - INFO - codeparrot_training - Step 501: {'lr': 0.00012525, 'samples': 257024, 'steps': 501, 'loss/train': 7.1933794021606445} +02/24/2022 02:12:37 - INFO - codeparrot_training - Step 502: {'lr': 0.00012550000000000001, 'samples': 257536, 'steps': 502, 'loss/train': 7.345328330993652} +02/24/2022 02:12:40 - INFO - codeparrot_training - Step 503: {'lr': 0.00012575, 'samples': 258048, 'steps': 503, 'loss/train': 7.6105265617370605} +02/24/2022 02:12:46 - INFO - codeparrot_training - Step 504: {'lr': 0.000126, 'samples': 258560, 'steps': 504, 'loss/train': 8.217442512512207} +02/24/2022 02:12:49 - INFO - codeparrot_training - Step 505: {'lr': 0.00012625, 'samples': 259072, 'steps': 505, 'loss/train': 7.326420307159424} +02/24/2022 02:12:55 - INFO - codeparrot_training - Step 506: {'lr': 0.0001265, 'samples': 259584, 'steps': 506, 'loss/train': 7.832907199859619} +02/24/2022 02:12:58 - INFO - codeparrot_training - Step 507: {'lr': 0.00012675, 'samples': 260096, 'steps': 507, 'loss/train': 6.8345465660095215} +02/24/2022 02:13:05 - INFO - codeparrot_training - Step 508: {'lr': 0.000127, 'samples': 260608, 'steps': 508, 'loss/train': 7.008861064910889} +02/24/2022 02:13:08 - INFO - codeparrot_training - Step 509: {'lr': 0.00012725, 'samples': 261120, 'steps': 509, 'loss/train': 7.020104885101318} +02/24/2022 02:13:14 - INFO - codeparrot_training - Step 510: {'lr': 0.0001275, 'samples': 261632, 'steps': 510, 'loss/train': 7.388762474060059} +02/24/2022 02:13:17 - INFO - codeparrot_training - Step 511: {'lr': 0.00012775000000000002, 'samples': 262144, 'steps': 511, 'loss/train': 5.314887046813965} +02/24/2022 02:13:23 - INFO - codeparrot_training - Step 512: {'lr': 0.000128, 'samples': 262656, 'steps': 512, 'loss/train': 6.833120822906494} +02/24/2022 02:13:26 - INFO - codeparrot_training - Step 513: {'lr': 0.00012825, 'samples': 263168, 'steps': 513, 'loss/train': 4.8469953536987305} +02/24/2022 02:13:32 - INFO - codeparrot_training - Step 514: {'lr': 0.0001285, 'samples': 263680, 'steps': 514, 'loss/train': 7.254924774169922} +02/24/2022 02:13:35 - INFO - codeparrot_training - Step 515: {'lr': 0.00012875, 'samples': 264192, 'steps': 515, 'loss/train': 7.093168258666992} +02/24/2022 02:13:40 - INFO - codeparrot_training - Step 516: {'lr': 0.00012900000000000002, 'samples': 264704, 'steps': 516, 'loss/train': 6.9729108810424805} +02/24/2022 02:13:44 - INFO - codeparrot_training - Step 517: {'lr': 0.00012925, 'samples': 265216, 'steps': 517, 'loss/train': 7.3428215980529785} +02/24/2022 02:13:49 - INFO - codeparrot_training - Step 518: {'lr': 0.0001295, 'samples': 265728, 'steps': 518, 'loss/train': 7.924755096435547} +02/24/2022 02:13:53 - INFO - codeparrot_training - Step 519: {'lr': 0.00012975, 'samples': 266240, 'steps': 519, 'loss/train': 9.013562202453613} +02/24/2022 02:14:01 - INFO - codeparrot_training - Step 520: {'lr': 0.00013000000000000002, 'samples': 266752, 'steps': 520, 'loss/train': 6.795487403869629} +02/24/2022 02:14:04 - INFO - codeparrot_training - Step 521: {'lr': 0.00013025, 'samples': 267264, 'steps': 521, 'loss/train': 7.241908073425293} +02/24/2022 02:14:10 - INFO - codeparrot_training - Step 522: {'lr': 0.0001305, 'samples': 267776, 'steps': 522, 'loss/train': 6.705616474151611} +02/24/2022 02:14:13 - INFO - codeparrot_training - Step 523: {'lr': 0.00013075, 'samples': 268288, 'steps': 523, 'loss/train': 6.343685150146484} +02/24/2022 02:14:18 - INFO - codeparrot_training - Step 524: {'lr': 0.000131, 'samples': 268800, 'steps': 524, 'loss/train': 7.4530768394470215} +02/24/2022 02:14:22 - INFO - codeparrot_training - Step 525: {'lr': 0.00013125000000000002, 'samples': 269312, 'steps': 525, 'loss/train': 7.97170877456665} +02/24/2022 02:14:27 - INFO - codeparrot_training - Step 526: {'lr': 0.0001315, 'samples': 269824, 'steps': 526, 'loss/train': 6.723884582519531} +02/24/2022 02:14:31 - INFO - codeparrot_training - Step 527: {'lr': 0.00013175, 'samples': 270336, 'steps': 527, 'loss/train': 7.471153259277344} +02/24/2022 02:14:36 - INFO - codeparrot_training - Step 528: {'lr': 0.000132, 'samples': 270848, 'steps': 528, 'loss/train': 6.499663352966309} +02/24/2022 02:14:40 - INFO - codeparrot_training - Step 529: {'lr': 0.00013225000000000002, 'samples': 271360, 'steps': 529, 'loss/train': 7.3266730308532715} +02/24/2022 02:14:47 - INFO - codeparrot_training - Step 530: {'lr': 0.00013250000000000002, 'samples': 271872, 'steps': 530, 'loss/train': 6.079280853271484} +02/24/2022 02:14:50 - INFO - codeparrot_training - Step 531: {'lr': 0.00013275, 'samples': 272384, 'steps': 531, 'loss/train': 7.223377704620361} +02/24/2022 02:14:56 - INFO - codeparrot_training - Step 532: {'lr': 0.000133, 'samples': 272896, 'steps': 532, 'loss/train': 6.303267478942871} +02/24/2022 02:15:01 - INFO - codeparrot_training - Step 533: {'lr': 0.00013325, 'samples': 273408, 'steps': 533, 'loss/train': 6.488459587097168} +02/24/2022 02:15:05 - INFO - codeparrot_training - Step 534: {'lr': 0.00013350000000000002, 'samples': 273920, 'steps': 534, 'loss/train': 7.408422946929932} +02/24/2022 02:15:10 - INFO - codeparrot_training - Step 535: {'lr': 0.00013375, 'samples': 274432, 'steps': 535, 'loss/train': 7.3007588386535645} +02/24/2022 02:15:14 - INFO - codeparrot_training - Step 536: {'lr': 0.000134, 'samples': 274944, 'steps': 536, 'loss/train': 7.12288236618042} +02/24/2022 02:15:19 - INFO - codeparrot_training - Step 537: {'lr': 0.00013425, 'samples': 275456, 'steps': 537, 'loss/train': 6.852250099182129} +02/24/2022 02:15:23 - INFO - codeparrot_training - Step 538: {'lr': 0.00013450000000000002, 'samples': 275968, 'steps': 538, 'loss/train': 6.62864875793457} +02/24/2022 02:15:30 - INFO - codeparrot_training - Step 539: {'lr': 0.00013475000000000002, 'samples': 276480, 'steps': 539, 'loss/train': 7.144618034362793} +02/24/2022 02:15:33 - INFO - codeparrot_training - Step 540: {'lr': 0.000135, 'samples': 276992, 'steps': 540, 'loss/train': 6.56927490234375} +02/24/2022 02:15:39 - INFO - codeparrot_training - Step 541: {'lr': 0.00013525, 'samples': 277504, 'steps': 541, 'loss/train': 7.681875705718994} +02/24/2022 02:15:42 - INFO - codeparrot_training - Step 542: {'lr': 0.00013550000000000001, 'samples': 278016, 'steps': 542, 'loss/train': 6.560507297515869} +02/24/2022 02:15:48 - INFO - codeparrot_training - Step 543: {'lr': 0.00013575000000000002, 'samples': 278528, 'steps': 543, 'loss/train': 6.655404567718506} +02/24/2022 02:15:51 - INFO - codeparrot_training - Step 544: {'lr': 0.00013600000000000003, 'samples': 279040, 'steps': 544, 'loss/train': 6.519329071044922} +02/24/2022 02:15:57 - INFO - codeparrot_training - Step 545: {'lr': 0.00013625, 'samples': 279552, 'steps': 545, 'loss/train': 5.666187763214111} +02/24/2022 02:16:00 - INFO - codeparrot_training - Step 546: {'lr': 0.0001365, 'samples': 280064, 'steps': 546, 'loss/train': 7.2396979331970215} +02/24/2022 02:16:06 - INFO - codeparrot_training - Step 547: {'lr': 0.00013675000000000002, 'samples': 280576, 'steps': 547, 'loss/train': 6.303050994873047} +02/24/2022 02:16:09 - INFO - codeparrot_training - Step 548: {'lr': 0.00013700000000000002, 'samples': 281088, 'steps': 548, 'loss/train': 8.919416427612305} +02/24/2022 02:16:17 - INFO - codeparrot_training - Step 549: {'lr': 0.00013725, 'samples': 281600, 'steps': 549, 'loss/train': 4.016124248504639} +02/24/2022 02:16:20 - INFO - codeparrot_training - Step 550: {'lr': 0.0001375, 'samples': 282112, 'steps': 550, 'loss/train': 6.259299278259277} +02/24/2022 02:16:26 - INFO - codeparrot_training - Step 551: {'lr': 0.00013775000000000001, 'samples': 282624, 'steps': 551, 'loss/train': 6.790286064147949} +02/24/2022 02:16:29 - INFO - codeparrot_training - Step 552: {'lr': 0.00013800000000000002, 'samples': 283136, 'steps': 552, 'loss/train': 6.966577529907227} +02/24/2022 02:16:35 - INFO - codeparrot_training - Step 553: {'lr': 0.00013825000000000003, 'samples': 283648, 'steps': 553, 'loss/train': 7.551130771636963} +02/24/2022 02:16:38 - INFO - codeparrot_training - Step 554: {'lr': 0.0001385, 'samples': 284160, 'steps': 554, 'loss/train': 7.652850151062012} +02/24/2022 02:16:44 - INFO - codeparrot_training - Step 555: {'lr': 0.00013875, 'samples': 284672, 'steps': 555, 'loss/train': 7.365139007568359} +02/24/2022 02:16:47 - INFO - codeparrot_training - Step 556: {'lr': 0.00013900000000000002, 'samples': 285184, 'steps': 556, 'loss/train': 7.868185520172119} +02/24/2022 02:16:53 - INFO - codeparrot_training - Step 557: {'lr': 0.00013925000000000002, 'samples': 285696, 'steps': 557, 'loss/train': 7.059309959411621} +02/24/2022 02:16:56 - INFO - codeparrot_training - Step 558: {'lr': 0.0001395, 'samples': 286208, 'steps': 558, 'loss/train': 6.935769557952881} +02/24/2022 02:17:02 - INFO - codeparrot_training - Step 559: {'lr': 0.00013975, 'samples': 286720, 'steps': 559, 'loss/train': 6.963362693786621} +02/24/2022 02:17:05 - INFO - codeparrot_training - Step 560: {'lr': 0.00014000000000000001, 'samples': 287232, 'steps': 560, 'loss/train': 7.0291948318481445} +02/24/2022 02:17:11 - INFO - codeparrot_training - Step 561: {'lr': 0.00014025000000000002, 'samples': 287744, 'steps': 561, 'loss/train': 7.17509651184082} +02/24/2022 02:17:14 - INFO - codeparrot_training - Step 562: {'lr': 0.00014050000000000003, 'samples': 288256, 'steps': 562, 'loss/train': 6.840673446655273} +02/24/2022 02:17:20 - INFO - codeparrot_training - Step 563: {'lr': 0.00014074999999999998, 'samples': 288768, 'steps': 563, 'loss/train': 7.216966152191162} +02/24/2022 02:17:23 - INFO - codeparrot_training - Step 564: {'lr': 0.00014099999999999998, 'samples': 289280, 'steps': 564, 'loss/train': 7.15809440612793} +02/24/2022 02:17:29 - INFO - codeparrot_training - Step 565: {'lr': 0.00014125, 'samples': 289792, 'steps': 565, 'loss/train': 8.598647117614746} +02/24/2022 02:17:32 - INFO - codeparrot_training - Step 566: {'lr': 0.0001415, 'samples': 290304, 'steps': 566, 'loss/train': 6.900800704956055} +02/24/2022 02:17:39 - INFO - codeparrot_training - Step 567: {'lr': 0.00014175, 'samples': 290816, 'steps': 567, 'loss/train': 7.053728103637695} +02/24/2022 02:17:43 - INFO - codeparrot_training - Step 568: {'lr': 0.00014199999999999998, 'samples': 291328, 'steps': 568, 'loss/train': 7.268818378448486} +02/24/2022 02:17:48 - INFO - codeparrot_training - Step 569: {'lr': 0.00014225, 'samples': 291840, 'steps': 569, 'loss/train': 7.903963565826416} +02/24/2022 02:17:52 - INFO - codeparrot_training - Step 570: {'lr': 0.0001425, 'samples': 292352, 'steps': 570, 'loss/train': 7.795132160186768} +02/24/2022 02:17:57 - INFO - codeparrot_training - Step 571: {'lr': 0.00014275, 'samples': 292864, 'steps': 571, 'loss/train': 6.7889018058776855} +02/24/2022 02:18:03 - INFO - codeparrot_training - Step 572: {'lr': 0.00014299999999999998, 'samples': 293376, 'steps': 572, 'loss/train': 8.538932800292969} +02/24/2022 02:18:06 - INFO - codeparrot_training - Step 573: {'lr': 0.00014324999999999999, 'samples': 293888, 'steps': 573, 'loss/train': 6.632084369659424} +02/24/2022 02:18:11 - INFO - codeparrot_training - Step 574: {'lr': 0.0001435, 'samples': 294400, 'steps': 574, 'loss/train': 6.36543083190918} +02/24/2022 02:18:15 - INFO - codeparrot_training - Step 575: {'lr': 0.00014375, 'samples': 294912, 'steps': 575, 'loss/train': 6.891174793243408} +02/24/2022 02:18:22 - INFO - codeparrot_training - Step 576: {'lr': 0.000144, 'samples': 295424, 'steps': 576, 'loss/train': 4.885776519775391} +02/24/2022 02:18:26 - INFO - codeparrot_training - Step 577: {'lr': 0.00014424999999999998, 'samples': 295936, 'steps': 577, 'loss/train': 6.663343906402588} +02/24/2022 02:18:31 - INFO - codeparrot_training - Step 578: {'lr': 0.0001445, 'samples': 296448, 'steps': 578, 'loss/train': 5.8480987548828125} +02/24/2022 02:18:35 - INFO - codeparrot_training - Step 579: {'lr': 0.00014475, 'samples': 296960, 'steps': 579, 'loss/train': 7.730355739593506} +02/24/2022 02:18:40 - INFO - codeparrot_training - Step 580: {'lr': 0.000145, 'samples': 297472, 'steps': 580, 'loss/train': 7.83709716796875} +02/24/2022 02:18:44 - INFO - codeparrot_training - Step 581: {'lr': 0.00014524999999999998, 'samples': 297984, 'steps': 581, 'loss/train': 6.554624557495117} +02/24/2022 02:18:49 - INFO - codeparrot_training - Step 582: {'lr': 0.00014549999999999999, 'samples': 298496, 'steps': 582, 'loss/train': 7.251741886138916} +02/24/2022 02:18:52 - INFO - codeparrot_training - Step 583: {'lr': 0.00014575, 'samples': 299008, 'steps': 583, 'loss/train': 7.171562194824219} +02/24/2022 02:18:58 - INFO - codeparrot_training - Step 584: {'lr': 0.000146, 'samples': 299520, 'steps': 584, 'loss/train': 5.9258341789245605} +02/24/2022 02:19:01 - INFO - codeparrot_training - Step 585: {'lr': 0.00014625, 'samples': 300032, 'steps': 585, 'loss/train': 6.827935218811035} +02/24/2022 02:19:09 - INFO - codeparrot_training - Step 586: {'lr': 0.00014649999999999998, 'samples': 300544, 'steps': 586, 'loss/train': 6.947076797485352} +02/24/2022 02:19:12 - INFO - codeparrot_training - Step 587: {'lr': 0.00014675, 'samples': 301056, 'steps': 587, 'loss/train': 7.862649917602539} +02/24/2022 02:19:18 - INFO - codeparrot_training - Step 588: {'lr': 0.000147, 'samples': 301568, 'steps': 588, 'loss/train': 7.695960521697998} +02/24/2022 02:19:21 - INFO - codeparrot_training - Step 589: {'lr': 0.00014725, 'samples': 302080, 'steps': 589, 'loss/train': 3.70426344871521} +02/24/2022 02:19:26 - INFO - codeparrot_training - Step 590: {'lr': 0.0001475, 'samples': 302592, 'steps': 590, 'loss/train': 7.610629558563232} +02/24/2022 02:19:30 - INFO - codeparrot_training - Step 591: {'lr': 0.00014774999999999999, 'samples': 303104, 'steps': 591, 'loss/train': 6.706225872039795} +02/24/2022 02:19:35 - INFO - codeparrot_training - Step 592: {'lr': 0.000148, 'samples': 303616, 'steps': 592, 'loss/train': 7.33603572845459} +02/24/2022 02:19:39 - INFO - codeparrot_training - Step 593: {'lr': 0.00014825, 'samples': 304128, 'steps': 593, 'loss/train': 6.951836585998535} +02/24/2022 02:19:44 - INFO - codeparrot_training - Step 594: {'lr': 0.0001485, 'samples': 304640, 'steps': 594, 'loss/train': 6.484375953674316} +02/24/2022 02:19:48 - INFO - codeparrot_training - Step 595: {'lr': 0.00014874999999999998, 'samples': 305152, 'steps': 595, 'loss/train': 6.624990463256836} +02/24/2022 02:19:55 - INFO - codeparrot_training - Step 596: {'lr': 0.000149, 'samples': 305664, 'steps': 596, 'loss/train': 6.708791255950928} +02/24/2022 02:19:58 - INFO - codeparrot_training - Step 597: {'lr': 0.00014925, 'samples': 306176, 'steps': 597, 'loss/train': 5.992984294891357} +02/24/2022 02:20:04 - INFO - codeparrot_training - Step 598: {'lr': 0.0001495, 'samples': 306688, 'steps': 598, 'loss/train': 7.315062522888184} +02/24/2022 02:20:07 - INFO - codeparrot_training - Step 599: {'lr': 0.00014975, 'samples': 307200, 'steps': 599, 'loss/train': 7.080319881439209} +02/24/2022 02:20:13 - INFO - codeparrot_training - Step 600: {'lr': 0.00015, 'samples': 307712, 'steps': 600, 'loss/train': 6.57517671585083} +02/24/2022 02:20:16 - INFO - codeparrot_training - Step 601: {'lr': 0.00015025, 'samples': 308224, 'steps': 601, 'loss/train': 3.876352548599243} +02/24/2022 02:20:22 - INFO - codeparrot_training - Step 602: {'lr': 0.0001505, 'samples': 308736, 'steps': 602, 'loss/train': 6.989983558654785} +02/24/2022 02:20:25 - INFO - codeparrot_training - Step 603: {'lr': 0.00015075, 'samples': 309248, 'steps': 603, 'loss/train': 6.711729526519775} +02/24/2022 02:20:31 - INFO - codeparrot_training - Step 604: {'lr': 0.000151, 'samples': 309760, 'steps': 604, 'loss/train': 6.691339492797852} +02/24/2022 02:20:34 - INFO - codeparrot_training - Step 605: {'lr': 0.00015125, 'samples': 310272, 'steps': 605, 'loss/train': 7.045860767364502} +02/24/2022 02:20:39 - INFO - codeparrot_training - Step 606: {'lr': 0.0001515, 'samples': 310784, 'steps': 606, 'loss/train': 6.741722106933594} +02/24/2022 02:20:43 - INFO - codeparrot_training - Step 607: {'lr': 0.00015175, 'samples': 311296, 'steps': 607, 'loss/train': 7.056661128997803} +02/24/2022 02:20:49 - INFO - codeparrot_training - Step 608: {'lr': 0.000152, 'samples': 311808, 'steps': 608, 'loss/train': 6.854211807250977} +02/24/2022 02:20:52 - INFO - codeparrot_training - Step 609: {'lr': 0.00015225, 'samples': 312320, 'steps': 609, 'loss/train': 6.906655788421631} +02/24/2022 02:20:57 - INFO - codeparrot_training - Step 610: {'lr': 0.0001525, 'samples': 312832, 'steps': 610, 'loss/train': 5.857024192810059} +02/24/2022 02:21:01 - INFO - codeparrot_training - Step 611: {'lr': 0.00015275, 'samples': 313344, 'steps': 611, 'loss/train': 6.103697776794434} +02/24/2022 02:21:08 - INFO - codeparrot_training - Step 612: {'lr': 0.000153, 'samples': 313856, 'steps': 612, 'loss/train': 6.716464042663574} +02/24/2022 02:21:12 - INFO - codeparrot_training - Step 613: {'lr': 0.00015325, 'samples': 314368, 'steps': 613, 'loss/train': 6.938880920410156} +02/24/2022 02:21:17 - INFO - codeparrot_training - Step 614: {'lr': 0.0001535, 'samples': 314880, 'steps': 614, 'loss/train': 6.522587776184082} +02/24/2022 02:21:23 - INFO - codeparrot_training - Step 615: {'lr': 0.00015375, 'samples': 315392, 'steps': 615, 'loss/train': 6.33483362197876} +02/24/2022 02:21:26 - INFO - codeparrot_training - Step 616: {'lr': 0.000154, 'samples': 315904, 'steps': 616, 'loss/train': 6.6959333419799805} +02/24/2022 02:21:32 - INFO - codeparrot_training - Step 617: {'lr': 0.00015425, 'samples': 316416, 'steps': 617, 'loss/train': 6.425475120544434} +02/24/2022 02:21:35 - INFO - codeparrot_training - Step 618: {'lr': 0.00015450000000000001, 'samples': 316928, 'steps': 618, 'loss/train': 8.284980773925781} +02/24/2022 02:21:40 - INFO - codeparrot_training - Step 619: {'lr': 0.00015475, 'samples': 317440, 'steps': 619, 'loss/train': 4.899602890014648} +02/24/2022 02:21:44 - INFO - codeparrot_training - Step 620: {'lr': 0.000155, 'samples': 317952, 'steps': 620, 'loss/train': 7.085748195648193} +02/24/2022 02:21:51 - INFO - codeparrot_training - Step 621: {'lr': 0.00015525, 'samples': 318464, 'steps': 621, 'loss/train': 7.098223686218262} +02/24/2022 02:21:55 - INFO - codeparrot_training - Step 622: {'lr': 0.0001555, 'samples': 318976, 'steps': 622, 'loss/train': 6.562531471252441} +02/24/2022 02:22:00 - INFO - codeparrot_training - Step 623: {'lr': 0.00015575, 'samples': 319488, 'steps': 623, 'loss/train': 6.3596882820129395} +02/24/2022 02:22:03 - INFO - codeparrot_training - Step 624: {'lr': 0.000156, 'samples': 320000, 'steps': 624, 'loss/train': 7.403925895690918} +02/24/2022 02:22:09 - INFO - codeparrot_training - Step 625: {'lr': 0.00015625, 'samples': 320512, 'steps': 625, 'loss/train': 7.254275798797607} +02/24/2022 02:22:12 - INFO - codeparrot_training - Step 626: {'lr': 0.0001565, 'samples': 321024, 'steps': 626, 'loss/train': 7.162297248840332} +02/24/2022 02:22:18 - INFO - codeparrot_training - Step 627: {'lr': 0.00015675000000000002, 'samples': 321536, 'steps': 627, 'loss/train': 7.67238187789917} +02/24/2022 02:22:21 - INFO - codeparrot_training - Step 628: {'lr': 0.000157, 'samples': 322048, 'steps': 628, 'loss/train': 6.712908744812012} +02/24/2022 02:22:27 - INFO - codeparrot_training - Step 629: {'lr': 0.00015725, 'samples': 322560, 'steps': 629, 'loss/train': 6.164926052093506} +02/24/2022 02:22:30 - INFO - codeparrot_training - Step 630: {'lr': 0.0001575, 'samples': 323072, 'steps': 630, 'loss/train': 7.04307746887207} +02/24/2022 02:22:37 - INFO - codeparrot_training - Step 631: {'lr': 0.00015775, 'samples': 323584, 'steps': 631, 'loss/train': 7.326963901519775} +02/24/2022 02:22:41 - INFO - codeparrot_training - Step 632: {'lr': 0.000158, 'samples': 324096, 'steps': 632, 'loss/train': 7.378395080566406} +02/24/2022 02:22:46 - INFO - codeparrot_training - Step 633: {'lr': 0.00015825, 'samples': 324608, 'steps': 633, 'loss/train': 6.594248294830322} +02/24/2022 02:22:50 - INFO - codeparrot_training - Step 634: {'lr': 0.0001585, 'samples': 325120, 'steps': 634, 'loss/train': 6.691349983215332} +02/24/2022 02:22:55 - INFO - codeparrot_training - Step 635: {'lr': 0.00015875, 'samples': 325632, 'steps': 635, 'loss/train': 6.84854793548584} +02/24/2022 02:22:59 - INFO - codeparrot_training - Step 636: {'lr': 0.00015900000000000002, 'samples': 326144, 'steps': 636, 'loss/train': 7.24489164352417} +02/24/2022 02:23:04 - INFO - codeparrot_training - Step 637: {'lr': 0.00015925, 'samples': 326656, 'steps': 637, 'loss/train': 5.990461826324463} +02/24/2022 02:23:08 - INFO - codeparrot_training - Step 638: {'lr': 0.0001595, 'samples': 327168, 'steps': 638, 'loss/train': 6.8416428565979} +02/24/2022 02:23:13 - INFO - codeparrot_training - Step 639: {'lr': 0.00015975, 'samples': 327680, 'steps': 639, 'loss/train': 7.200850009918213} +02/24/2022 02:23:16 - INFO - codeparrot_training - Step 640: {'lr': 0.00016, 'samples': 328192, 'steps': 640, 'loss/train': 7.323842525482178} +02/24/2022 02:23:22 - INFO - codeparrot_training - Step 641: {'lr': 0.00016025000000000002, 'samples': 328704, 'steps': 641, 'loss/train': 6.524571895599365} +02/24/2022 02:23:25 - INFO - codeparrot_training - Step 642: {'lr': 0.0001605, 'samples': 329216, 'steps': 642, 'loss/train': 6.505560398101807} +02/24/2022 02:23:33 - INFO - codeparrot_training - Step 643: {'lr': 0.00016075, 'samples': 329728, 'steps': 643, 'loss/train': 7.442710876464844} +02/24/2022 02:23:37 - INFO - codeparrot_training - Step 644: {'lr': 0.000161, 'samples': 330240, 'steps': 644, 'loss/train': 7.752229690551758} +02/24/2022 02:23:42 - INFO - codeparrot_training - Step 645: {'lr': 0.00016125000000000002, 'samples': 330752, 'steps': 645, 'loss/train': 6.916399002075195} +02/24/2022 02:23:46 - INFO - codeparrot_training - Step 646: {'lr': 0.0001615, 'samples': 331264, 'steps': 646, 'loss/train': 6.4619011878967285} +02/24/2022 02:23:51 - INFO - codeparrot_training - Step 647: {'lr': 0.00016175, 'samples': 331776, 'steps': 647, 'loss/train': 6.152773380279541} +02/24/2022 02:23:54 - INFO - codeparrot_training - Step 648: {'lr': 0.000162, 'samples': 332288, 'steps': 648, 'loss/train': 6.222464084625244} +02/24/2022 02:24:00 - INFO - codeparrot_training - Step 649: {'lr': 0.00016225000000000001, 'samples': 332800, 'steps': 649, 'loss/train': 7.1147780418396} +02/24/2022 02:24:04 - INFO - codeparrot_training - Step 650: {'lr': 0.00016250000000000002, 'samples': 333312, 'steps': 650, 'loss/train': 4.591808795928955} +02/24/2022 02:24:09 - INFO - codeparrot_training - Step 651: {'lr': 0.00016275, 'samples': 333824, 'steps': 651, 'loss/train': 7.736910343170166} +02/24/2022 02:24:13 - INFO - codeparrot_training - Step 652: {'lr': 0.000163, 'samples': 334336, 'steps': 652, 'loss/train': 6.243583679199219} +02/24/2022 02:24:20 - INFO - codeparrot_training - Step 653: {'lr': 0.00016325, 'samples': 334848, 'steps': 653, 'loss/train': 6.474334239959717} +02/24/2022 02:24:23 - INFO - codeparrot_training - Step 654: {'lr': 0.00016350000000000002, 'samples': 335360, 'steps': 654, 'loss/train': 7.204182147979736} +02/24/2022 02:24:29 - INFO - codeparrot_training - Step 655: {'lr': 0.00016375000000000002, 'samples': 335872, 'steps': 655, 'loss/train': 6.291009902954102} +02/24/2022 02:24:32 - INFO - codeparrot_training - Step 656: {'lr': 0.000164, 'samples': 336384, 'steps': 656, 'loss/train': 5.735008239746094} +02/24/2022 02:24:38 - INFO - codeparrot_training - Step 657: {'lr': 0.00016425, 'samples': 336896, 'steps': 657, 'loss/train': 5.332834243774414} +02/24/2022 02:24:41 - INFO - codeparrot_training - Step 658: {'lr': 0.00016450000000000001, 'samples': 337408, 'steps': 658, 'loss/train': 6.378315448760986} +02/24/2022 02:24:47 - INFO - codeparrot_training - Step 659: {'lr': 0.00016475000000000002, 'samples': 337920, 'steps': 659, 'loss/train': 6.292074203491211} +02/24/2022 02:24:50 - INFO - codeparrot_training - Step 660: {'lr': 0.000165, 'samples': 338432, 'steps': 660, 'loss/train': 9.866281509399414} +02/24/2022 02:24:56 - INFO - codeparrot_training - Step 661: {'lr': 0.00016525, 'samples': 338944, 'steps': 661, 'loss/train': 6.443175792694092} +02/24/2022 02:24:59 - INFO - codeparrot_training - Step 662: {'lr': 0.0001655, 'samples': 339456, 'steps': 662, 'loss/train': 6.011322975158691} +02/24/2022 02:25:05 - INFO - codeparrot_training - Step 663: {'lr': 0.00016575000000000002, 'samples': 339968, 'steps': 663, 'loss/train': 7.997130393981934} +02/24/2022 02:25:08 - INFO - codeparrot_training - Step 664: {'lr': 0.00016600000000000002, 'samples': 340480, 'steps': 664, 'loss/train': 6.515625} +02/24/2022 02:25:14 - INFO - codeparrot_training - Step 665: {'lr': 0.00016625, 'samples': 340992, 'steps': 665, 'loss/train': 6.358397960662842} +02/24/2022 02:25:17 - INFO - codeparrot_training - Step 666: {'lr': 0.0001665, 'samples': 341504, 'steps': 666, 'loss/train': 7.1998515129089355} +02/24/2022 02:25:23 - INFO - codeparrot_training - Step 667: {'lr': 0.00016675000000000001, 'samples': 342016, 'steps': 667, 'loss/train': 6.34958028793335} +02/24/2022 02:25:26 - INFO - codeparrot_training - Step 668: {'lr': 0.00016700000000000002, 'samples': 342528, 'steps': 668, 'loss/train': 6.854135036468506} +02/24/2022 02:25:32 - INFO - codeparrot_training - Step 669: {'lr': 0.00016725000000000003, 'samples': 343040, 'steps': 669, 'loss/train': 5.932799339294434} +02/24/2022 02:25:36 - INFO - codeparrot_training - Step 670: {'lr': 0.0001675, 'samples': 343552, 'steps': 670, 'loss/train': 6.308033466339111} +02/24/2022 02:25:41 - INFO - codeparrot_training - Step 671: {'lr': 0.00016775, 'samples': 344064, 'steps': 671, 'loss/train': 7.460987567901611} +02/24/2022 02:25:45 - INFO - codeparrot_training - Step 672: {'lr': 0.00016800000000000002, 'samples': 344576, 'steps': 672, 'loss/train': 6.332848072052002} +02/24/2022 02:25:50 - INFO - codeparrot_training - Step 673: {'lr': 0.00016825000000000002, 'samples': 345088, 'steps': 673, 'loss/train': 6.770963668823242} +02/24/2022 02:25:54 - INFO - codeparrot_training - Step 674: {'lr': 0.0001685, 'samples': 345600, 'steps': 674, 'loss/train': 6.474719524383545} +02/24/2022 02:25:59 - INFO - codeparrot_training - Step 675: {'lr': 0.00016875, 'samples': 346112, 'steps': 675, 'loss/train': 6.697624206542969} +02/24/2022 02:26:03 - INFO - codeparrot_training - Step 676: {'lr': 0.00016900000000000002, 'samples': 346624, 'steps': 676, 'loss/train': 6.2510480880737305} +02/24/2022 02:26:09 - INFO - codeparrot_training - Step 677: {'lr': 0.00016925000000000002, 'samples': 347136, 'steps': 677, 'loss/train': 6.77142333984375} +02/24/2022 02:26:12 - INFO - codeparrot_training - Step 678: {'lr': 0.00016950000000000003, 'samples': 347648, 'steps': 678, 'loss/train': 6.4684247970581055} +02/24/2022 02:26:18 - INFO - codeparrot_training - Step 679: {'lr': 0.00016975, 'samples': 348160, 'steps': 679, 'loss/train': 6.551549434661865} +02/24/2022 02:26:21 - INFO - codeparrot_training - Step 680: {'lr': 0.00017, 'samples': 348672, 'steps': 680, 'loss/train': 6.417777061462402} +02/24/2022 02:26:27 - INFO - codeparrot_training - Step 681: {'lr': 0.00017025000000000002, 'samples': 349184, 'steps': 681, 'loss/train': 6.795881748199463} +02/24/2022 02:26:30 - INFO - codeparrot_training - Step 682: {'lr': 0.00017050000000000002, 'samples': 349696, 'steps': 682, 'loss/train': 6.6553568840026855} +02/24/2022 02:26:36 - INFO - codeparrot_training - Step 683: {'lr': 0.00017075, 'samples': 350208, 'steps': 683, 'loss/train': 6.663280010223389} +02/24/2022 02:26:39 - INFO - codeparrot_training - Step 684: {'lr': 0.000171, 'samples': 350720, 'steps': 684, 'loss/train': 6.603376865386963} +02/24/2022 02:26:45 - INFO - codeparrot_training - Step 685: {'lr': 0.00017125000000000002, 'samples': 351232, 'steps': 685, 'loss/train': 6.150605201721191} +02/24/2022 02:26:48 - INFO - codeparrot_training - Step 686: {'lr': 0.00017150000000000002, 'samples': 351744, 'steps': 686, 'loss/train': 6.475432395935059} +02/24/2022 02:26:54 - INFO - codeparrot_training - Step 687: {'lr': 0.00017175000000000003, 'samples': 352256, 'steps': 687, 'loss/train': 7.101222038269043} +02/24/2022 02:26:57 - INFO - codeparrot_training - Step 688: {'lr': 0.00017199999999999998, 'samples': 352768, 'steps': 688, 'loss/train': 6.253207206726074} +02/24/2022 02:27:03 - INFO - codeparrot_training - Step 689: {'lr': 0.00017224999999999999, 'samples': 353280, 'steps': 689, 'loss/train': 6.175331115722656} +02/24/2022 02:27:07 - INFO - codeparrot_training - Step 690: {'lr': 0.0001725, 'samples': 353792, 'steps': 690, 'loss/train': 5.864757061004639} +02/24/2022 02:27:12 - INFO - codeparrot_training - Step 691: {'lr': 0.00017275, 'samples': 354304, 'steps': 691, 'loss/train': 7.658307075500488} +02/24/2022 02:27:15 - INFO - codeparrot_training - Step 692: {'lr': 0.000173, 'samples': 354816, 'steps': 692, 'loss/train': 6.467141628265381} +02/24/2022 02:27:21 - INFO - codeparrot_training - Step 693: {'lr': 0.00017324999999999998, 'samples': 355328, 'steps': 693, 'loss/train': 6.138640403747559} +02/24/2022 02:27:26 - INFO - codeparrot_training - Step 694: {'lr': 0.0001735, 'samples': 355840, 'steps': 694, 'loss/train': 6.835862159729004} +02/24/2022 02:27:30 - INFO - codeparrot_training - Step 695: {'lr': 0.00017375, 'samples': 356352, 'steps': 695, 'loss/train': 6.1372199058532715} +02/24/2022 02:27:35 - INFO - codeparrot_training - Step 696: {'lr': 0.000174, 'samples': 356864, 'steps': 696, 'loss/train': 6.7231221199035645} +02/24/2022 02:27:39 - INFO - codeparrot_training - Step 697: {'lr': 0.00017424999999999998, 'samples': 357376, 'steps': 697, 'loss/train': 6.801931381225586} +02/24/2022 02:27:45 - INFO - codeparrot_training - Step 698: {'lr': 0.00017449999999999999, 'samples': 357888, 'steps': 698, 'loss/train': 6.4526753425598145} +02/24/2022 02:27:48 - INFO - codeparrot_training - Step 699: {'lr': 0.00017475, 'samples': 358400, 'steps': 699, 'loss/train': 5.960089206695557} +02/24/2022 02:27:54 - INFO - codeparrot_training - Step 700: {'lr': 0.000175, 'samples': 358912, 'steps': 700, 'loss/train': 6.215817928314209} +02/24/2022 02:27:57 - INFO - codeparrot_training - Step 701: {'lr': 0.00017525, 'samples': 359424, 'steps': 701, 'loss/train': 6.193975448608398} +02/24/2022 02:28:03 - INFO - codeparrot_training - Step 702: {'lr': 0.00017549999999999998, 'samples': 359936, 'steps': 702, 'loss/train': 6.0116167068481445} +02/24/2022 02:28:06 - INFO - codeparrot_training - Step 703: {'lr': 0.00017575, 'samples': 360448, 'steps': 703, 'loss/train': 6.346678256988525} +02/24/2022 02:28:12 - INFO - codeparrot_training - Step 704: {'lr': 0.000176, 'samples': 360960, 'steps': 704, 'loss/train': 6.734238147735596} +02/24/2022 02:28:15 - INFO - codeparrot_training - Step 705: {'lr': 0.00017625, 'samples': 361472, 'steps': 705, 'loss/train': 7.386608123779297} +02/24/2022 02:28:21 - INFO - codeparrot_training - Step 706: {'lr': 0.00017649999999999998, 'samples': 361984, 'steps': 706, 'loss/train': 7.438027381896973} +02/24/2022 02:28:24 - INFO - codeparrot_training - Step 707: {'lr': 0.00017675, 'samples': 362496, 'steps': 707, 'loss/train': 7.111026763916016} +02/24/2022 02:28:30 - INFO - codeparrot_training - Step 708: {'lr': 0.000177, 'samples': 363008, 'steps': 708, 'loss/train': 5.961710453033447} +02/24/2022 02:28:33 - INFO - codeparrot_training - Step 709: {'lr': 0.00017725, 'samples': 363520, 'steps': 709, 'loss/train': 6.055693626403809} +02/24/2022 02:28:39 - INFO - codeparrot_training - Step 710: {'lr': 0.0001775, 'samples': 364032, 'steps': 710, 'loss/train': 6.290581703186035} +02/24/2022 02:28:42 - INFO - codeparrot_training - Step 711: {'lr': 0.00017774999999999998, 'samples': 364544, 'steps': 711, 'loss/train': 6.256446838378906} +02/24/2022 02:28:47 - INFO - codeparrot_training - Step 712: {'lr': 0.000178, 'samples': 365056, 'steps': 712, 'loss/train': 5.427511692047119} +02/24/2022 02:28:51 - INFO - codeparrot_training - Step 713: {'lr': 0.00017825, 'samples': 365568, 'steps': 713, 'loss/train': 6.285609245300293} +02/24/2022 02:28:57 - INFO - codeparrot_training - Step 714: {'lr': 0.0001785, 'samples': 366080, 'steps': 714, 'loss/train': 6.4496331214904785} +02/24/2022 02:29:01 - INFO - codeparrot_training - Step 715: {'lr': 0.00017875, 'samples': 366592, 'steps': 715, 'loss/train': 4.660919666290283} +02/24/2022 02:29:06 - INFO - codeparrot_training - Step 716: {'lr': 0.000179, 'samples': 367104, 'steps': 716, 'loss/train': 5.834559440612793} +02/24/2022 02:29:10 - INFO - codeparrot_training - Step 717: {'lr': 0.00017925, 'samples': 367616, 'steps': 717, 'loss/train': 6.639416694641113} +02/24/2022 02:29:15 - INFO - codeparrot_training - Step 718: {'lr': 0.0001795, 'samples': 368128, 'steps': 718, 'loss/train': 6.017248153686523} +02/24/2022 02:29:19 - INFO - codeparrot_training - Step 719: {'lr': 0.00017975, 'samples': 368640, 'steps': 719, 'loss/train': 5.847497940063477} +02/24/2022 02:29:24 - INFO - codeparrot_training - Step 720: {'lr': 0.00017999999999999998, 'samples': 369152, 'steps': 720, 'loss/train': 7.500033378601074} +02/24/2022 02:29:28 - INFO - codeparrot_training - Step 721: {'lr': 0.00018025, 'samples': 369664, 'steps': 721, 'loss/train': 6.163767337799072} +02/24/2022 02:29:33 - INFO - codeparrot_training - Step 722: {'lr': 0.0001805, 'samples': 370176, 'steps': 722, 'loss/train': 5.948116779327393} +02/24/2022 02:29:37 - INFO - codeparrot_training - Step 723: {'lr': 0.00018075, 'samples': 370688, 'steps': 723, 'loss/train': 6.244548320770264} +02/24/2022 02:29:43 - INFO - codeparrot_training - Step 724: {'lr': 0.000181, 'samples': 371200, 'steps': 724, 'loss/train': 3.4994406700134277} +02/24/2022 02:29:46 - INFO - codeparrot_training - Step 725: {'lr': 0.00018125, 'samples': 371712, 'steps': 725, 'loss/train': 6.339785099029541} +02/24/2022 02:29:52 - INFO - codeparrot_training - Step 726: {'lr': 0.0001815, 'samples': 372224, 'steps': 726, 'loss/train': 6.456050395965576} +02/24/2022 02:29:55 - INFO - codeparrot_training - Step 727: {'lr': 0.00018175, 'samples': 372736, 'steps': 727, 'loss/train': 6.083957195281982} +02/24/2022 02:30:01 - INFO - codeparrot_training - Step 728: {'lr': 0.000182, 'samples': 373248, 'steps': 728, 'loss/train': 6.422770023345947} +02/24/2022 02:30:04 - INFO - codeparrot_training - Step 729: {'lr': 0.00018225, 'samples': 373760, 'steps': 729, 'loss/train': 6.4083757400512695} +02/24/2022 02:30:10 - INFO - codeparrot_training - Step 730: {'lr': 0.0001825, 'samples': 374272, 'steps': 730, 'loss/train': 6.041470527648926} +02/24/2022 02:30:13 - INFO - codeparrot_training - Step 731: {'lr': 0.00018275, 'samples': 374784, 'steps': 731, 'loss/train': 5.690192699432373} +02/24/2022 02:30:19 - INFO - codeparrot_training - Step 732: {'lr': 0.000183, 'samples': 375296, 'steps': 732, 'loss/train': 6.055638790130615} +02/24/2022 02:30:22 - INFO - codeparrot_training - Step 733: {'lr': 0.00018325, 'samples': 375808, 'steps': 733, 'loss/train': 6.33910608291626} +02/24/2022 02:30:28 - INFO - codeparrot_training - Step 734: {'lr': 0.0001835, 'samples': 376320, 'steps': 734, 'loss/train': 5.1370086669921875} +02/24/2022 02:30:32 - INFO - codeparrot_training - Step 735: {'lr': 0.00018375, 'samples': 376832, 'steps': 735, 'loss/train': 6.075075626373291} +02/24/2022 02:30:37 - INFO - codeparrot_training - Step 736: {'lr': 0.000184, 'samples': 377344, 'steps': 736, 'loss/train': 6.304084300994873} +02/24/2022 02:30:41 - INFO - codeparrot_training - Step 737: {'lr': 0.00018425, 'samples': 377856, 'steps': 737, 'loss/train': 6.102715969085693} +02/24/2022 02:30:46 - INFO - codeparrot_training - Step 738: {'lr': 0.0001845, 'samples': 378368, 'steps': 738, 'loss/train': 6.641720294952393} +02/24/2022 02:30:49 - INFO - codeparrot_training - Step 739: {'lr': 0.00018475, 'samples': 378880, 'steps': 739, 'loss/train': 6.921244144439697} +02/24/2022 02:30:55 - INFO - codeparrot_training - Step 740: {'lr': 0.000185, 'samples': 379392, 'steps': 740, 'loss/train': 6.271901607513428} +02/24/2022 02:30:58 - INFO - codeparrot_training - Step 741: {'lr': 0.00018525, 'samples': 379904, 'steps': 741, 'loss/train': 5.727572917938232} +02/24/2022 02:31:04 - INFO - codeparrot_training - Step 742: {'lr': 0.0001855, 'samples': 380416, 'steps': 742, 'loss/train': 6.31984281539917} +02/24/2022 02:31:07 - INFO - codeparrot_training - Step 743: {'lr': 0.00018575000000000002, 'samples': 380928, 'steps': 743, 'loss/train': 6.738889694213867} +02/24/2022 02:31:13 - INFO - codeparrot_training - Step 744: {'lr': 0.000186, 'samples': 381440, 'steps': 744, 'loss/train': 6.099252700805664} +02/24/2022 02:31:17 - INFO - codeparrot_training - Step 745: {'lr': 0.00018625, 'samples': 381952, 'steps': 745, 'loss/train': 6.482193470001221} +02/24/2022 02:31:22 - INFO - codeparrot_training - Step 746: {'lr': 0.0001865, 'samples': 382464, 'steps': 746, 'loss/train': 6.471707344055176} +02/24/2022 02:31:26 - INFO - codeparrot_training - Step 747: {'lr': 0.00018675, 'samples': 382976, 'steps': 747, 'loss/train': 7.385077476501465} +02/24/2022 02:31:31 - INFO - codeparrot_training - Step 748: {'lr': 0.000187, 'samples': 383488, 'steps': 748, 'loss/train': 10.252251625061035} +02/24/2022 02:31:35 - INFO - codeparrot_training - Step 749: {'lr': 0.00018725, 'samples': 384000, 'steps': 749, 'loss/train': 7.1071271896362305} +02/24/2022 02:31:40 - INFO - codeparrot_training - Step 750: {'lr': 0.0001875, 'samples': 384512, 'steps': 750, 'loss/train': 6.437521457672119} +02/24/2022 02:31:44 - INFO - codeparrot_training - Step 751: {'lr': 0.00018775, 'samples': 385024, 'steps': 751, 'loss/train': 6.625316143035889} +02/24/2022 02:31:49 - INFO - codeparrot_training - Step 752: {'lr': 0.00018800000000000002, 'samples': 385536, 'steps': 752, 'loss/train': 6.356978416442871} +02/24/2022 02:31:53 - INFO - codeparrot_training - Step 753: {'lr': 0.00018825, 'samples': 386048, 'steps': 753, 'loss/train': 6.442242622375488} +02/24/2022 02:31:58 - INFO - codeparrot_training - Step 754: {'lr': 0.0001885, 'samples': 386560, 'steps': 754, 'loss/train': 6.0052490234375} +02/24/2022 02:32:02 - INFO - codeparrot_training - Step 755: {'lr': 0.00018875, 'samples': 387072, 'steps': 755, 'loss/train': 6.370572566986084} +02/24/2022 02:32:07 - INFO - codeparrot_training - Step 756: {'lr': 0.000189, 'samples': 387584, 'steps': 756, 'loss/train': 6.61212682723999} +02/24/2022 02:32:10 - INFO - codeparrot_training - Step 757: {'lr': 0.00018925, 'samples': 388096, 'steps': 757, 'loss/train': 7.189175128936768} +02/24/2022 02:32:16 - INFO - codeparrot_training - Step 758: {'lr': 0.0001895, 'samples': 388608, 'steps': 758, 'loss/train': 6.407525539398193} +02/24/2022 02:32:19 - INFO - codeparrot_training - Step 759: {'lr': 0.00018975, 'samples': 389120, 'steps': 759, 'loss/train': 6.6135945320129395} +02/24/2022 02:32:26 - INFO - codeparrot_training - Step 760: {'lr': 0.00019, 'samples': 389632, 'steps': 760, 'loss/train': 5.9673566818237305} +02/24/2022 02:32:29 - INFO - codeparrot_training - Step 761: {'lr': 0.00019025000000000002, 'samples': 390144, 'steps': 761, 'loss/train': 5.967684268951416} +02/24/2022 02:32:34 - INFO - codeparrot_training - Step 762: {'lr': 0.0001905, 'samples': 390656, 'steps': 762, 'loss/train': 6.6024322509765625} +02/24/2022 02:32:38 - INFO - codeparrot_training - Step 763: {'lr': 0.00019075, 'samples': 391168, 'steps': 763, 'loss/train': 6.407318592071533} +02/24/2022 02:32:43 - INFO - codeparrot_training - Step 764: {'lr': 0.000191, 'samples': 391680, 'steps': 764, 'loss/train': 5.6461181640625} +02/24/2022 02:32:47 - INFO - codeparrot_training - Step 765: {'lr': 0.00019125000000000001, 'samples': 392192, 'steps': 765, 'loss/train': 6.097337245941162} +02/24/2022 02:32:52 - INFO - codeparrot_training - Step 766: {'lr': 0.00019150000000000002, 'samples': 392704, 'steps': 766, 'loss/train': 5.59611701965332} +02/24/2022 02:32:56 - INFO - codeparrot_training - Step 767: {'lr': 0.00019175, 'samples': 393216, 'steps': 767, 'loss/train': 5.815179347991943} +02/24/2022 02:33:01 - INFO - codeparrot_training - Step 768: {'lr': 0.000192, 'samples': 393728, 'steps': 768, 'loss/train': 5.541304111480713} +02/24/2022 02:33:05 - INFO - codeparrot_training - Step 769: {'lr': 0.00019225, 'samples': 394240, 'steps': 769, 'loss/train': 5.901695728302002} +02/24/2022 02:33:11 - INFO - codeparrot_training - Step 770: {'lr': 0.00019250000000000002, 'samples': 394752, 'steps': 770, 'loss/train': 6.1426472663879395} +02/24/2022 02:33:14 - INFO - codeparrot_training - Step 771: {'lr': 0.00019275, 'samples': 395264, 'steps': 771, 'loss/train': 6.149904727935791} +02/24/2022 02:33:20 - INFO - codeparrot_training - Step 772: {'lr': 0.000193, 'samples': 395776, 'steps': 772, 'loss/train': 5.976991176605225} +02/24/2022 02:33:25 - INFO - codeparrot_training - Step 773: {'lr': 0.00019325, 'samples': 396288, 'steps': 773, 'loss/train': 6.669251441955566} +02/24/2022 02:33:29 - INFO - codeparrot_training - Step 774: {'lr': 0.00019350000000000001, 'samples': 396800, 'steps': 774, 'loss/train': 6.111331462860107} +02/24/2022 02:33:34 - INFO - codeparrot_training - Step 775: {'lr': 0.00019375000000000002, 'samples': 397312, 'steps': 775, 'loss/train': 6.806671142578125} +02/24/2022 02:33:38 - INFO - codeparrot_training - Step 776: {'lr': 0.000194, 'samples': 397824, 'steps': 776, 'loss/train': 6.131837368011475} +02/24/2022 02:33:43 - INFO - codeparrot_training - Step 777: {'lr': 0.00019425, 'samples': 398336, 'steps': 777, 'loss/train': 5.915597915649414} +02/24/2022 02:33:47 - INFO - codeparrot_training - Step 778: {'lr': 0.0001945, 'samples': 398848, 'steps': 778, 'loss/train': 6.392073154449463} +02/24/2022 02:33:53 - INFO - codeparrot_training - Step 779: {'lr': 0.00019475000000000002, 'samples': 399360, 'steps': 779, 'loss/train': 7.551222324371338} +02/24/2022 02:33:56 - INFO - codeparrot_training - Step 780: {'lr': 0.00019500000000000002, 'samples': 399872, 'steps': 780, 'loss/train': 6.118439674377441} +02/24/2022 02:34:02 - INFO - codeparrot_training - Step 781: {'lr': 0.00019525, 'samples': 400384, 'steps': 781, 'loss/train': 6.616265296936035} +02/24/2022 02:34:05 - INFO - codeparrot_training - Step 782: {'lr': 0.0001955, 'samples': 400896, 'steps': 782, 'loss/train': 5.312283515930176} +02/24/2022 02:34:11 - INFO - codeparrot_training - Step 783: {'lr': 0.00019575000000000001, 'samples': 401408, 'steps': 783, 'loss/train': 3.7072436809539795} +02/24/2022 02:34:14 - INFO - codeparrot_training - Step 784: {'lr': 0.00019600000000000002, 'samples': 401920, 'steps': 784, 'loss/train': 6.311213493347168} +02/24/2022 02:34:20 - INFO - codeparrot_training - Step 785: {'lr': 0.00019625, 'samples': 402432, 'steps': 785, 'loss/train': 6.558773994445801} +02/24/2022 02:34:23 - INFO - codeparrot_training - Step 786: {'lr': 0.0001965, 'samples': 402944, 'steps': 786, 'loss/train': 5.938821792602539} +02/24/2022 02:34:29 - INFO - codeparrot_training - Step 787: {'lr': 0.00019675, 'samples': 403456, 'steps': 787, 'loss/train': 5.922603130340576} +02/24/2022 02:34:32 - INFO - codeparrot_training - Step 788: {'lr': 0.00019700000000000002, 'samples': 403968, 'steps': 788, 'loss/train': 8.191448211669922} +02/24/2022 02:34:38 - INFO - codeparrot_training - Step 789: {'lr': 0.00019725000000000002, 'samples': 404480, 'steps': 789, 'loss/train': 5.338706016540527} +02/24/2022 02:34:42 - INFO - codeparrot_training - Step 790: {'lr': 0.0001975, 'samples': 404992, 'steps': 790, 'loss/train': 7.5210161209106445} +02/24/2022 02:34:47 - INFO - codeparrot_training - Step 791: {'lr': 0.00019775, 'samples': 405504, 'steps': 791, 'loss/train': 7.093896865844727} +02/24/2022 02:34:51 - INFO - codeparrot_training - Step 792: {'lr': 0.00019800000000000002, 'samples': 406016, 'steps': 792, 'loss/train': 3.117628574371338} +02/24/2022 02:34:56 - INFO - codeparrot_training - Step 793: {'lr': 0.00019825000000000002, 'samples': 406528, 'steps': 793, 'loss/train': 5.884690284729004} +02/24/2022 02:34:59 - INFO - codeparrot_training - Step 794: {'lr': 0.00019850000000000003, 'samples': 407040, 'steps': 794, 'loss/train': 5.920525074005127} +02/24/2022 02:35:05 - INFO - codeparrot_training - Step 795: {'lr': 0.00019875, 'samples': 407552, 'steps': 795, 'loss/train': 6.231931209564209} +02/24/2022 02:35:08 - INFO - codeparrot_training - Step 796: {'lr': 0.000199, 'samples': 408064, 'steps': 796, 'loss/train': 5.558359146118164} +02/24/2022 02:35:14 - INFO - codeparrot_training - Step 797: {'lr': 0.00019925000000000002, 'samples': 408576, 'steps': 797, 'loss/train': 6.473911285400391} +02/24/2022 02:35:17 - INFO - codeparrot_training - Step 798: {'lr': 0.00019950000000000002, 'samples': 409088, 'steps': 798, 'loss/train': 6.409696578979492} +02/24/2022 02:35:23 - INFO - codeparrot_training - Step 799: {'lr': 0.00019975, 'samples': 409600, 'steps': 799, 'loss/train': 6.1711106300354} +02/24/2022 02:35:26 - INFO - codeparrot_training - Step 800: {'lr': 0.0002, 'samples': 410112, 'steps': 800, 'loss/train': 6.187984943389893} +02/24/2022 02:35:32 - INFO - codeparrot_training - Step 801: {'lr': 0.00020025000000000002, 'samples': 410624, 'steps': 801, 'loss/train': 7.001424312591553} +02/24/2022 02:35:35 - INFO - codeparrot_training - Step 802: {'lr': 0.00020050000000000002, 'samples': 411136, 'steps': 802, 'loss/train': 2.725290060043335} +02/24/2022 02:35:41 - INFO - codeparrot_training - Step 803: {'lr': 0.00020075000000000003, 'samples': 411648, 'steps': 803, 'loss/train': 6.731625556945801} +02/24/2022 02:35:44 - INFO - codeparrot_training - Step 804: {'lr': 0.000201, 'samples': 412160, 'steps': 804, 'loss/train': 6.009284019470215} +02/24/2022 02:35:50 - INFO - codeparrot_training - Step 805: {'lr': 0.00020125, 'samples': 412672, 'steps': 805, 'loss/train': 6.596358776092529} +02/24/2022 02:35:54 - INFO - codeparrot_training - Step 806: {'lr': 0.00020150000000000002, 'samples': 413184, 'steps': 806, 'loss/train': 6.380502700805664} +02/24/2022 02:35:59 - INFO - codeparrot_training - Step 807: {'lr': 0.00020175000000000003, 'samples': 413696, 'steps': 807, 'loss/train': 7.420418739318848} +02/24/2022 02:36:02 - INFO - codeparrot_training - Step 808: {'lr': 0.000202, 'samples': 414208, 'steps': 808, 'loss/train': 6.865699291229248} +02/24/2022 02:36:08 - INFO - codeparrot_training - Step 809: {'lr': 0.00020225, 'samples': 414720, 'steps': 809, 'loss/train': 6.789573669433594} +02/24/2022 02:36:11 - INFO - codeparrot_training - Step 810: {'lr': 0.00020250000000000002, 'samples': 415232, 'steps': 810, 'loss/train': 5.445998668670654} +02/24/2022 02:36:17 - INFO - codeparrot_training - Step 811: {'lr': 0.00020275000000000002, 'samples': 415744, 'steps': 811, 'loss/train': 9.829699516296387} +02/24/2022 02:36:21 - INFO - codeparrot_training - Step 812: {'lr': 0.00020300000000000003, 'samples': 416256, 'steps': 812, 'loss/train': 6.86912202835083} +02/24/2022 02:36:26 - INFO - codeparrot_training - Step 813: {'lr': 0.00020324999999999998, 'samples': 416768, 'steps': 813, 'loss/train': 6.221214771270752} +02/24/2022 02:36:29 - INFO - codeparrot_training - Step 814: {'lr': 0.00020349999999999999, 'samples': 417280, 'steps': 814, 'loss/train': 8.142473220825195} +02/24/2022 02:36:35 - INFO - codeparrot_training - Step 815: {'lr': 0.00020375, 'samples': 417792, 'steps': 815, 'loss/train': 5.884995937347412} +02/24/2022 02:36:38 - INFO - codeparrot_training - Step 816: {'lr': 0.000204, 'samples': 418304, 'steps': 816, 'loss/train': 5.674224853515625} +02/24/2022 02:36:45 - INFO - codeparrot_training - Step 817: {'lr': 0.00020425, 'samples': 418816, 'steps': 817, 'loss/train': 5.738276958465576} +02/24/2022 02:36:48 - INFO - codeparrot_training - Step 818: {'lr': 0.00020449999999999998, 'samples': 419328, 'steps': 818, 'loss/train': 2.4321036338806152} +02/24/2022 02:36:53 - INFO - codeparrot_training - Step 819: {'lr': 0.00020475, 'samples': 419840, 'steps': 819, 'loss/train': 6.171673774719238} +02/24/2022 02:36:57 - INFO - codeparrot_training - Step 820: {'lr': 0.000205, 'samples': 420352, 'steps': 820, 'loss/train': 6.443540096282959} +02/24/2022 02:37:02 - INFO - codeparrot_training - Step 821: {'lr': 0.00020525, 'samples': 420864, 'steps': 821, 'loss/train': 5.149608135223389} +02/24/2022 02:37:08 - INFO - codeparrot_training - Step 822: {'lr': 0.00020549999999999998, 'samples': 421376, 'steps': 822, 'loss/train': 5.614904880523682} +02/24/2022 02:37:11 - INFO - codeparrot_training - Step 823: {'lr': 0.00020575, 'samples': 421888, 'steps': 823, 'loss/train': 8.65012264251709} +02/24/2022 02:37:17 - INFO - codeparrot_training - Step 824: {'lr': 0.000206, 'samples': 422400, 'steps': 824, 'loss/train': 6.405791282653809} +02/24/2022 02:37:20 - INFO - codeparrot_training - Step 825: {'lr': 0.00020625, 'samples': 422912, 'steps': 825, 'loss/train': 5.550499439239502} +02/24/2022 02:37:27 - INFO - codeparrot_training - Step 826: {'lr': 0.0002065, 'samples': 423424, 'steps': 826, 'loss/train': 6.045300483703613} +02/24/2022 02:37:30 - INFO - codeparrot_training - Step 827: {'lr': 0.00020674999999999998, 'samples': 423936, 'steps': 827, 'loss/train': 6.330199241638184} +02/24/2022 02:37:35 - INFO - codeparrot_training - Step 828: {'lr': 0.000207, 'samples': 424448, 'steps': 828, 'loss/train': 4.9262375831604} +02/24/2022 02:37:39 - INFO - codeparrot_training - Step 829: {'lr': 0.00020725, 'samples': 424960, 'steps': 829, 'loss/train': 5.7279486656188965} +02/24/2022 02:37:44 - INFO - codeparrot_training - Step 830: {'lr': 0.0002075, 'samples': 425472, 'steps': 830, 'loss/train': 6.108678817749023} +02/24/2022 02:37:48 - INFO - codeparrot_training - Step 831: {'lr': 0.00020774999999999998, 'samples': 425984, 'steps': 831, 'loss/train': 5.691193580627441} +02/24/2022 02:37:53 - INFO - codeparrot_training - Step 832: {'lr': 0.000208, 'samples': 426496, 'steps': 832, 'loss/train': 6.178577899932861} +02/24/2022 02:37:57 - INFO - codeparrot_training - Step 833: {'lr': 0.00020825, 'samples': 427008, 'steps': 833, 'loss/train': 6.508179187774658} +02/24/2022 02:38:02 - INFO - codeparrot_training - Step 834: {'lr': 0.0002085, 'samples': 427520, 'steps': 834, 'loss/train': 5.967754364013672} +02/24/2022 02:38:06 - INFO - codeparrot_training - Step 835: {'lr': 0.00020875, 'samples': 428032, 'steps': 835, 'loss/train': 4.665596961975098} +02/24/2022 02:38:12 - INFO - codeparrot_training - Step 836: {'lr': 0.00020899999999999998, 'samples': 428544, 'steps': 836, 'loss/train': 6.464814186096191} +02/24/2022 02:38:15 - INFO - codeparrot_training - Step 837: {'lr': 0.00020925, 'samples': 429056, 'steps': 837, 'loss/train': 6.114542007446289} +02/24/2022 02:38:21 - INFO - codeparrot_training - Step 838: {'lr': 0.0002095, 'samples': 429568, 'steps': 838, 'loss/train': 7.669986724853516} +02/24/2022 02:38:24 - INFO - codeparrot_training - Step 839: {'lr': 0.00020975, 'samples': 430080, 'steps': 839, 'loss/train': 6.398980140686035} +02/24/2022 02:38:28 - INFO - codeparrot_training - Step 840: {'lr': 0.00021, 'samples': 430592, 'steps': 840, 'loss/train': 7.4006195068359375} +02/24/2022 02:38:33 - INFO - codeparrot_training - Step 841: {'lr': 0.00021025, 'samples': 431104, 'steps': 841, 'loss/train': 6.521213531494141} +02/24/2022 02:38:37 - INFO - codeparrot_training - Step 842: {'lr': 0.0002105, 'samples': 431616, 'steps': 842, 'loss/train': 6.6276726722717285} +02/24/2022 02:38:42 - INFO - codeparrot_training - Step 843: {'lr': 0.00021075, 'samples': 432128, 'steps': 843, 'loss/train': 6.0089006423950195} +02/24/2022 02:38:46 - INFO - codeparrot_training - Step 844: {'lr': 0.000211, 'samples': 432640, 'steps': 844, 'loss/train': 6.256646633148193} +02/24/2022 02:38:51 - INFO - codeparrot_training - Step 845: {'lr': 0.00021124999999999998, 'samples': 433152, 'steps': 845, 'loss/train': 6.229872226715088} +02/24/2022 02:38:57 - INFO - codeparrot_training - Step 846: {'lr': 0.0002115, 'samples': 433664, 'steps': 846, 'loss/train': 6.033138275146484} +02/24/2022 02:39:01 - INFO - codeparrot_training - Step 847: {'lr': 0.00021175, 'samples': 434176, 'steps': 847, 'loss/train': 5.928333282470703} +02/24/2022 02:39:06 - INFO - codeparrot_training - Step 848: {'lr': 0.000212, 'samples': 434688, 'steps': 848, 'loss/train': 2.4343342781066895} +02/24/2022 02:39:10 - INFO - codeparrot_training - Step 849: {'lr': 0.00021225, 'samples': 435200, 'steps': 849, 'loss/train': 5.912009239196777} +02/24/2022 02:39:13 - INFO - codeparrot_training - Step 850: {'lr': 0.0002125, 'samples': 435712, 'steps': 850, 'loss/train': 6.8696136474609375} +02/24/2022 02:39:19 - INFO - codeparrot_training - Step 851: {'lr': 0.00021275, 'samples': 436224, 'steps': 851, 'loss/train': 9.76145076751709} +02/24/2022 02:39:22 - INFO - codeparrot_training - Step 852: {'lr': 0.000213, 'samples': 436736, 'steps': 852, 'loss/train': 5.8401713371276855} +02/24/2022 02:39:28 - INFO - codeparrot_training - Step 853: {'lr': 0.00021325, 'samples': 437248, 'steps': 853, 'loss/train': 5.45580530166626} +02/24/2022 02:39:31 - INFO - codeparrot_training - Step 854: {'lr': 0.0002135, 'samples': 437760, 'steps': 854, 'loss/train': 5.753581523895264} +02/24/2022 02:39:37 - INFO - codeparrot_training - Step 855: {'lr': 0.00021375, 'samples': 438272, 'steps': 855, 'loss/train': 7.0582475662231445} +02/24/2022 02:39:40 - INFO - codeparrot_training - Step 856: {'lr': 0.000214, 'samples': 438784, 'steps': 856, 'loss/train': 5.507960796356201} +02/24/2022 02:39:46 - INFO - codeparrot_training - Step 857: {'lr': 0.00021425, 'samples': 439296, 'steps': 857, 'loss/train': 5.333662509918213} +02/24/2022 02:39:49 - INFO - codeparrot_training - Step 858: {'lr': 0.0002145, 'samples': 439808, 'steps': 858, 'loss/train': 5.536602020263672} +02/24/2022 02:39:55 - INFO - codeparrot_training - Step 859: {'lr': 0.00021475, 'samples': 440320, 'steps': 859, 'loss/train': 5.66477632522583} +02/24/2022 02:39:58 - INFO - codeparrot_training - Step 860: {'lr': 0.000215, 'samples': 440832, 'steps': 860, 'loss/train': 5.88394021987915} +02/24/2022 02:40:04 - INFO - codeparrot_training - Step 861: {'lr': 0.00021525, 'samples': 441344, 'steps': 861, 'loss/train': 7.011894226074219} +02/24/2022 02:40:10 - INFO - codeparrot_training - Step 862: {'lr': 0.0002155, 'samples': 441856, 'steps': 862, 'loss/train': 6.413607120513916} +02/24/2022 02:40:13 - INFO - codeparrot_training - Step 863: {'lr': 0.00021575, 'samples': 442368, 'steps': 863, 'loss/train': 6.333065986633301} +02/24/2022 02:40:19 - INFO - codeparrot_training - Step 864: {'lr': 0.000216, 'samples': 442880, 'steps': 864, 'loss/train': 7.09116268157959} +02/24/2022 02:40:22 - INFO - codeparrot_training - Step 865: {'lr': 0.00021625, 'samples': 443392, 'steps': 865, 'loss/train': 6.186008930206299} +02/24/2022 02:40:28 - INFO - codeparrot_training - Step 866: {'lr': 0.0002165, 'samples': 443904, 'steps': 866, 'loss/train': 6.317468643188477} +02/24/2022 02:40:31 - INFO - codeparrot_training - Step 867: {'lr': 0.00021675, 'samples': 444416, 'steps': 867, 'loss/train': 5.776604175567627} +02/24/2022 02:40:36 - INFO - codeparrot_training - Step 868: {'lr': 0.00021700000000000002, 'samples': 444928, 'steps': 868, 'loss/train': 5.373128890991211} +02/24/2022 02:40:40 - INFO - codeparrot_training - Step 869: {'lr': 0.00021725, 'samples': 445440, 'steps': 869, 'loss/train': 6.335923194885254} +02/24/2022 02:40:45 - INFO - codeparrot_training - Step 870: {'lr': 0.0002175, 'samples': 445952, 'steps': 870, 'loss/train': 6.347319602966309} +02/24/2022 02:40:49 - INFO - codeparrot_training - Step 871: {'lr': 0.00021775, 'samples': 446464, 'steps': 871, 'loss/train': 3.7130682468414307} +02/24/2022 02:40:55 - INFO - codeparrot_training - Step 872: {'lr': 0.000218, 'samples': 446976, 'steps': 872, 'loss/train': 8.389607429504395} +02/24/2022 02:40:58 - INFO - codeparrot_training - Step 873: {'lr': 0.00021825, 'samples': 447488, 'steps': 873, 'loss/train': 6.073687553405762} +02/24/2022 02:41:04 - INFO - codeparrot_training - Step 874: {'lr': 0.0002185, 'samples': 448000, 'steps': 874, 'loss/train': 6.788509845733643} +02/24/2022 02:41:07 - INFO - codeparrot_training - Step 875: {'lr': 0.00021875, 'samples': 448512, 'steps': 875, 'loss/train': 6.623488426208496} +02/24/2022 02:41:13 - INFO - codeparrot_training - Step 876: {'lr': 0.000219, 'samples': 449024, 'steps': 876, 'loss/train': 3.808100938796997} +02/24/2022 02:41:16 - INFO - codeparrot_training - Step 877: {'lr': 0.00021925000000000002, 'samples': 449536, 'steps': 877, 'loss/train': 5.787219047546387} +02/24/2022 02:41:22 - INFO - codeparrot_training - Step 878: {'lr': 0.0002195, 'samples': 450048, 'steps': 878, 'loss/train': 5.961319446563721} +02/24/2022 02:41:25 - INFO - codeparrot_training - Step 879: {'lr': 0.00021975, 'samples': 450560, 'steps': 879, 'loss/train': 6.06191873550415} +02/24/2022 02:41:31 - INFO - codeparrot_training - Step 880: {'lr': 0.00022, 'samples': 451072, 'steps': 880, 'loss/train': 5.730699062347412} +02/24/2022 02:41:34 - INFO - codeparrot_training - Step 881: {'lr': 0.00022025000000000001, 'samples': 451584, 'steps': 881, 'loss/train': 6.804256916046143} +02/24/2022 02:41:40 - INFO - codeparrot_training - Step 882: {'lr': 0.0002205, 'samples': 452096, 'steps': 882, 'loss/train': 5.483248233795166} +02/24/2022 02:41:43 - INFO - codeparrot_training - Step 883: {'lr': 0.00022075, 'samples': 452608, 'steps': 883, 'loss/train': 5.957112789154053} +02/24/2022 02:41:49 - INFO - codeparrot_training - Step 884: {'lr': 0.000221, 'samples': 453120, 'steps': 884, 'loss/train': 5.911479473114014} +02/24/2022 02:41:52 - INFO - codeparrot_training - Step 885: {'lr': 0.00022125, 'samples': 453632, 'steps': 885, 'loss/train': 5.84247350692749} +02/24/2022 02:41:58 - INFO - codeparrot_training - Step 886: {'lr': 0.00022150000000000002, 'samples': 454144, 'steps': 886, 'loss/train': 6.338929176330566} +02/24/2022 02:42:01 - INFO - codeparrot_training - Step 887: {'lr': 0.00022175, 'samples': 454656, 'steps': 887, 'loss/train': 7.415934085845947} +02/24/2022 02:42:07 - INFO - codeparrot_training - Step 888: {'lr': 0.000222, 'samples': 455168, 'steps': 888, 'loss/train': 6.88756799697876} +02/24/2022 02:42:10 - INFO - codeparrot_training - Step 889: {'lr': 0.00022225, 'samples': 455680, 'steps': 889, 'loss/train': 5.849971294403076} +02/24/2022 02:42:16 - INFO - codeparrot_training - Step 890: {'lr': 0.00022250000000000001, 'samples': 456192, 'steps': 890, 'loss/train': 6.605902671813965} +02/24/2022 02:42:19 - INFO - codeparrot_training - Step 891: {'lr': 0.00022275000000000002, 'samples': 456704, 'steps': 891, 'loss/train': 5.789719581604004} +02/24/2022 02:42:25 - INFO - codeparrot_training - Step 892: {'lr': 0.000223, 'samples': 457216, 'steps': 892, 'loss/train': 7.157408237457275} +02/24/2022 02:42:29 - INFO - codeparrot_training - Step 893: {'lr': 0.00022325, 'samples': 457728, 'steps': 893, 'loss/train': 5.905867099761963} +02/24/2022 02:42:34 - INFO - codeparrot_training - Step 894: {'lr': 0.0002235, 'samples': 458240, 'steps': 894, 'loss/train': 5.780603408813477} +02/24/2022 02:42:37 - INFO - codeparrot_training - Step 895: {'lr': 0.00022375000000000002, 'samples': 458752, 'steps': 895, 'loss/train': 5.804656028747559} +02/24/2022 02:42:43 - INFO - codeparrot_training - Step 896: {'lr': 0.000224, 'samples': 459264, 'steps': 896, 'loss/train': 5.820722579956055} +02/24/2022 02:42:48 - INFO - codeparrot_training - Step 897: {'lr': 0.00022425, 'samples': 459776, 'steps': 897, 'loss/train': 6.14693021774292} +02/24/2022 02:42:52 - INFO - codeparrot_training - Step 898: {'lr': 0.0002245, 'samples': 460288, 'steps': 898, 'loss/train': 6.593510627746582} +02/24/2022 02:42:57 - INFO - codeparrot_training - Step 899: {'lr': 0.00022475000000000001, 'samples': 460800, 'steps': 899, 'loss/train': 5.965980052947998} +02/24/2022 02:43:01 - INFO - codeparrot_training - Step 900: {'lr': 0.00022500000000000002, 'samples': 461312, 'steps': 900, 'loss/train': 5.33210563659668} +02/24/2022 02:43:06 - INFO - codeparrot_training - Step 901: {'lr': 0.00022525, 'samples': 461824, 'steps': 901, 'loss/train': 5.6558685302734375} +02/24/2022 02:43:10 - INFO - codeparrot_training - Step 902: {'lr': 0.0002255, 'samples': 462336, 'steps': 902, 'loss/train': 5.435738563537598} +02/24/2022 02:43:15 - INFO - codeparrot_training - Step 903: {'lr': 0.00022575, 'samples': 462848, 'steps': 903, 'loss/train': 6.804921627044678} +02/24/2022 02:43:19 - INFO - codeparrot_training - Step 904: {'lr': 0.00022600000000000002, 'samples': 463360, 'steps': 904, 'loss/train': 8.13878345489502} +02/24/2022 02:43:24 - INFO - codeparrot_training - Step 905: {'lr': 0.00022625000000000002, 'samples': 463872, 'steps': 905, 'loss/train': 6.534748077392578} +02/24/2022 02:43:28 - INFO - codeparrot_training - Step 906: {'lr': 0.0002265, 'samples': 464384, 'steps': 906, 'loss/train': 5.649563312530518} +02/24/2022 02:43:34 - INFO - codeparrot_training - Step 907: {'lr': 0.00022675, 'samples': 464896, 'steps': 907, 'loss/train': 6.934625625610352} +02/24/2022 02:43:37 - INFO - codeparrot_training - Step 908: {'lr': 0.00022700000000000002, 'samples': 465408, 'steps': 908, 'loss/train': 5.989643573760986} +02/24/2022 02:43:43 - INFO - codeparrot_training - Step 909: {'lr': 0.00022725000000000002, 'samples': 465920, 'steps': 909, 'loss/train': 6.277463436126709} +02/24/2022 02:43:46 - INFO - codeparrot_training - Step 910: {'lr': 0.0002275, 'samples': 466432, 'steps': 910, 'loss/train': 6.045421600341797} +02/24/2022 02:43:51 - INFO - codeparrot_training - Step 911: {'lr': 0.00022775, 'samples': 466944, 'steps': 911, 'loss/train': 5.83571720123291} +02/24/2022 02:43:55 - INFO - codeparrot_training - Step 912: {'lr': 0.000228, 'samples': 467456, 'steps': 912, 'loss/train': 4.9673309326171875} +02/24/2022 02:44:00 - INFO - codeparrot_training - Step 913: {'lr': 0.00022825000000000002, 'samples': 467968, 'steps': 913, 'loss/train': 5.016026020050049} +02/24/2022 02:44:04 - INFO - codeparrot_training - Step 914: {'lr': 0.00022850000000000002, 'samples': 468480, 'steps': 914, 'loss/train': 5.969403266906738} +02/24/2022 02:44:09 - INFO - codeparrot_training - Step 915: {'lr': 0.00022875, 'samples': 468992, 'steps': 915, 'loss/train': 5.889241695404053} +02/24/2022 02:44:13 - INFO - codeparrot_training - Step 916: {'lr': 0.000229, 'samples': 469504, 'steps': 916, 'loss/train': 5.977917671203613} +02/24/2022 02:44:19 - INFO - codeparrot_training - Step 917: {'lr': 0.00022925000000000002, 'samples': 470016, 'steps': 917, 'loss/train': 6.2948713302612305} +02/24/2022 02:44:22 - INFO - codeparrot_training - Step 918: {'lr': 0.00022950000000000002, 'samples': 470528, 'steps': 918, 'loss/train': 6.78879451751709} +02/24/2022 02:44:28 - INFO - codeparrot_training - Step 919: {'lr': 0.00022975000000000003, 'samples': 471040, 'steps': 919, 'loss/train': 6.277073860168457} +02/24/2022 02:44:31 - INFO - codeparrot_training - Step 920: {'lr': 0.00023, 'samples': 471552, 'steps': 920, 'loss/train': 6.629997730255127} +02/24/2022 02:44:37 - INFO - codeparrot_training - Step 921: {'lr': 0.00023025, 'samples': 472064, 'steps': 921, 'loss/train': 5.085743427276611} +02/24/2022 02:44:40 - INFO - codeparrot_training - Step 922: {'lr': 0.00023050000000000002, 'samples': 472576, 'steps': 922, 'loss/train': 5.860753536224365} +02/24/2022 02:44:46 - INFO - codeparrot_training - Step 923: {'lr': 0.00023075000000000003, 'samples': 473088, 'steps': 923, 'loss/train': 6.716079235076904} +02/24/2022 02:44:49 - INFO - codeparrot_training - Step 924: {'lr': 0.000231, 'samples': 473600, 'steps': 924, 'loss/train': 6.65992546081543} +02/24/2022 02:44:54 - INFO - codeparrot_training - Step 925: {'lr': 0.00023125, 'samples': 474112, 'steps': 925, 'loss/train': 6.191816329956055} +02/24/2022 02:44:58 - INFO - codeparrot_training - Step 926: {'lr': 0.00023150000000000002, 'samples': 474624, 'steps': 926, 'loss/train': 5.812952518463135} +02/24/2022 02:45:04 - INFO - codeparrot_training - Step 927: {'lr': 0.00023175000000000002, 'samples': 475136, 'steps': 927, 'loss/train': 4.18288516998291} +02/24/2022 02:45:08 - INFO - codeparrot_training - Step 928: {'lr': 0.00023200000000000003, 'samples': 475648, 'steps': 928, 'loss/train': 5.7861175537109375} +02/24/2022 02:45:13 - INFO - codeparrot_training - Step 929: {'lr': 0.00023225, 'samples': 476160, 'steps': 929, 'loss/train': 7.031640529632568} +02/24/2022 02:45:16 - INFO - codeparrot_training - Step 930: {'lr': 0.0002325, 'samples': 476672, 'steps': 930, 'loss/train': 6.2350239753723145} +02/24/2022 02:45:22 - INFO - codeparrot_training - Step 931: {'lr': 0.00023275000000000002, 'samples': 477184, 'steps': 931, 'loss/train': 5.944024562835693} +02/24/2022 02:45:25 - INFO - codeparrot_training - Step 932: {'lr': 0.00023300000000000003, 'samples': 477696, 'steps': 932, 'loss/train': 4.8677167892456055} +02/24/2022 02:45:31 - INFO - codeparrot_training - Step 933: {'lr': 0.00023325, 'samples': 478208, 'steps': 933, 'loss/train': 6.226642608642578} +02/24/2022 02:45:35 - INFO - codeparrot_training - Step 934: {'lr': 0.0002335, 'samples': 478720, 'steps': 934, 'loss/train': 5.019423484802246} +02/24/2022 02:45:40 - INFO - codeparrot_training - Step 935: {'lr': 0.00023375000000000002, 'samples': 479232, 'steps': 935, 'loss/train': 5.15052604675293} +02/24/2022 02:45:43 - INFO - codeparrot_training - Step 936: {'lr': 0.00023400000000000002, 'samples': 479744, 'steps': 936, 'loss/train': 6.223147392272949} +02/24/2022 02:45:49 - INFO - codeparrot_training - Step 937: {'lr': 0.00023425000000000003, 'samples': 480256, 'steps': 937, 'loss/train': 5.998800754547119} +02/24/2022 02:45:52 - INFO - codeparrot_training - Step 938: {'lr': 0.00023449999999999998, 'samples': 480768, 'steps': 938, 'loss/train': 5.864666938781738} +02/24/2022 02:45:58 - INFO - codeparrot_training - Step 939: {'lr': 0.00023475, 'samples': 481280, 'steps': 939, 'loss/train': 5.786550998687744} +02/24/2022 02:46:02 - INFO - codeparrot_training - Step 940: {'lr': 0.000235, 'samples': 481792, 'steps': 940, 'loss/train': 5.671061038970947} +02/24/2022 02:46:07 - INFO - codeparrot_training - Step 941: {'lr': 0.00023525, 'samples': 482304, 'steps': 941, 'loss/train': 5.6021623611450195} +02/24/2022 02:46:11 - INFO - codeparrot_training - Step 942: {'lr': 0.0002355, 'samples': 482816, 'steps': 942, 'loss/train': 5.248650550842285} +02/24/2022 02:46:16 - INFO - codeparrot_training - Step 943: {'lr': 0.00023574999999999998, 'samples': 483328, 'steps': 943, 'loss/train': 5.5844831466674805} +02/24/2022 02:46:20 - INFO - codeparrot_training - Step 944: {'lr': 0.000236, 'samples': 483840, 'steps': 944, 'loss/train': 6.027729034423828} +02/24/2022 02:46:25 - INFO - codeparrot_training - Step 945: {'lr': 0.00023625, 'samples': 484352, 'steps': 945, 'loss/train': 6.658106327056885} +02/24/2022 02:46:29 - INFO - codeparrot_training - Step 946: {'lr': 0.0002365, 'samples': 484864, 'steps': 946, 'loss/train': 6.287262916564941} +02/24/2022 02:46:34 - INFO - codeparrot_training - Step 947: {'lr': 0.00023674999999999998, 'samples': 485376, 'steps': 947, 'loss/train': 4.877590656280518} +02/24/2022 02:46:40 - INFO - codeparrot_training - Step 948: {'lr': 0.000237, 'samples': 485888, 'steps': 948, 'loss/train': 6.26812744140625} +02/24/2022 02:46:44 - INFO - codeparrot_training - Step 949: {'lr': 0.00023725, 'samples': 486400, 'steps': 949, 'loss/train': 6.118881702423096} +02/24/2022 02:46:49 - INFO - codeparrot_training - Step 950: {'lr': 0.0002375, 'samples': 486912, 'steps': 950, 'loss/train': 5.752974510192871} +02/24/2022 02:46:53 - INFO - codeparrot_training - Step 951: {'lr': 0.00023775, 'samples': 487424, 'steps': 951, 'loss/train': 6.376278400421143} +02/24/2022 02:46:58 - INFO - codeparrot_training - Step 952: {'lr': 0.00023799999999999998, 'samples': 487936, 'steps': 952, 'loss/train': 5.967774391174316} +02/24/2022 02:47:01 - INFO - codeparrot_training - Step 953: {'lr': 0.00023825, 'samples': 488448, 'steps': 953, 'loss/train': 6.946618556976318} +02/24/2022 02:47:07 - INFO - codeparrot_training - Step 954: {'lr': 0.0002385, 'samples': 488960, 'steps': 954, 'loss/train': 5.575984477996826} +02/24/2022 02:47:10 - INFO - codeparrot_training - Step 955: {'lr': 0.00023875, 'samples': 489472, 'steps': 955, 'loss/train': 5.178292274475098} +02/24/2022 02:47:16 - INFO - codeparrot_training - Step 956: {'lr': 0.00023899999999999998, 'samples': 489984, 'steps': 956, 'loss/train': 5.378644943237305} +02/24/2022 02:47:19 - INFO - codeparrot_training - Step 957: {'lr': 0.00023925, 'samples': 490496, 'steps': 957, 'loss/train': 5.6738996505737305} +02/24/2022 02:47:25 - INFO - codeparrot_training - Step 958: {'lr': 0.0002395, 'samples': 491008, 'steps': 958, 'loss/train': 5.746028423309326} +02/24/2022 02:47:28 - INFO - codeparrot_training - Step 959: {'lr': 0.00023975, 'samples': 491520, 'steps': 959, 'loss/train': 6.612490653991699} +02/24/2022 02:47:34 - INFO - codeparrot_training - Step 960: {'lr': 0.00024, 'samples': 492032, 'steps': 960, 'loss/train': 6.502299785614014} +02/24/2022 02:47:37 - INFO - codeparrot_training - Step 961: {'lr': 0.00024024999999999999, 'samples': 492544, 'steps': 961, 'loss/train': 9.734945297241211} +02/24/2022 02:47:43 - INFO - codeparrot_training - Step 962: {'lr': 0.0002405, 'samples': 493056, 'steps': 962, 'loss/train': 5.183950901031494} +02/24/2022 02:47:46 - INFO - codeparrot_training - Step 963: {'lr': 0.00024075, 'samples': 493568, 'steps': 963, 'loss/train': 5.328657150268555} +02/24/2022 02:47:52 - INFO - codeparrot_training - Step 964: {'lr': 0.000241, 'samples': 494080, 'steps': 964, 'loss/train': 6.440547466278076} +02/24/2022 02:47:56 - INFO - codeparrot_training - Step 965: {'lr': 0.00024125, 'samples': 494592, 'steps': 965, 'loss/train': 4.988674640655518} +02/24/2022 02:48:01 - INFO - codeparrot_training - Step 966: {'lr': 0.0002415, 'samples': 495104, 'steps': 966, 'loss/train': 5.711447715759277} +02/24/2022 02:48:05 - INFO - codeparrot_training - Step 967: {'lr': 0.00024175, 'samples': 495616, 'steps': 967, 'loss/train': 5.55291223526001} +02/24/2022 02:48:10 - INFO - codeparrot_training - Step 968: {'lr': 0.000242, 'samples': 496128, 'steps': 968, 'loss/train': 6.4952921867370605} +02/24/2022 02:48:13 - INFO - codeparrot_training - Step 969: {'lr': 0.00024225, 'samples': 496640, 'steps': 969, 'loss/train': 7.697079658508301} +02/24/2022 02:48:19 - INFO - codeparrot_training - Step 970: {'lr': 0.00024249999999999999, 'samples': 497152, 'steps': 970, 'loss/train': 6.5585784912109375} +02/24/2022 02:48:22 - INFO - codeparrot_training - Step 971: {'lr': 0.00024275, 'samples': 497664, 'steps': 971, 'loss/train': 5.085152626037598} +02/24/2022 02:48:28 - INFO - codeparrot_training - Step 972: {'lr': 0.000243, 'samples': 498176, 'steps': 972, 'loss/train': 5.955501556396484} +02/24/2022 02:48:31 - INFO - codeparrot_training - Step 973: {'lr': 0.00024325, 'samples': 498688, 'steps': 973, 'loss/train': 5.7349467277526855} +02/24/2022 02:48:37 - INFO - codeparrot_training - Step 974: {'lr': 0.0002435, 'samples': 499200, 'steps': 974, 'loss/train': 5.855245113372803} +02/24/2022 02:48:41 - INFO - codeparrot_training - Step 975: {'lr': 0.00024375, 'samples': 499712, 'steps': 975, 'loss/train': 6.428284645080566} +02/24/2022 02:48:46 - INFO - codeparrot_training - Step 976: {'lr': 0.000244, 'samples': 500224, 'steps': 976, 'loss/train': 4.7485737800598145} +02/24/2022 02:48:50 - INFO - codeparrot_training - Step 977: {'lr': 0.00024425, 'samples': 500736, 'steps': 977, 'loss/train': 5.669659614562988} +02/24/2022 02:48:55 - INFO - codeparrot_training - Step 978: {'lr': 0.0002445, 'samples': 501248, 'steps': 978, 'loss/train': 3.911973476409912} +02/24/2022 02:48:59 - INFO - codeparrot_training - Step 979: {'lr': 0.00024475, 'samples': 501760, 'steps': 979, 'loss/train': 6.481731414794922} +02/24/2022 02:49:04 - INFO - codeparrot_training - Step 980: {'lr': 0.000245, 'samples': 502272, 'steps': 980, 'loss/train': 6.454158782958984} +02/24/2022 02:49:08 - INFO - codeparrot_training - Step 981: {'lr': 0.00024525, 'samples': 502784, 'steps': 981, 'loss/train': 6.572815895080566} +02/24/2022 02:49:13 - INFO - codeparrot_training - Step 982: {'lr': 0.0002455, 'samples': 503296, 'steps': 982, 'loss/train': 5.37235164642334} +02/24/2022 02:49:16 - INFO - codeparrot_training - Step 983: {'lr': 0.00024575, 'samples': 503808, 'steps': 983, 'loss/train': 5.381284236907959} +02/24/2022 02:49:23 - INFO - codeparrot_training - Step 984: {'lr': 0.000246, 'samples': 504320, 'steps': 984, 'loss/train': 6.182966232299805} +02/24/2022 02:49:26 - INFO - codeparrot_training - Step 985: {'lr': 0.00024625, 'samples': 504832, 'steps': 985, 'loss/train': 4.443139553070068} +02/24/2022 02:49:32 - INFO - codeparrot_training - Step 986: {'lr': 0.00024650000000000003, 'samples': 505344, 'steps': 986, 'loss/train': 5.667211055755615} +02/24/2022 02:49:36 - INFO - codeparrot_training - Step 987: {'lr': 0.00024675, 'samples': 505856, 'steps': 987, 'loss/train': 4.547412872314453} +02/24/2022 02:49:41 - INFO - codeparrot_training - Step 988: {'lr': 0.000247, 'samples': 506368, 'steps': 988, 'loss/train': 5.689558982849121} +02/24/2022 02:49:45 - INFO - codeparrot_training - Step 989: {'lr': 0.00024725, 'samples': 506880, 'steps': 989, 'loss/train': 5.532820224761963} +02/24/2022 02:49:50 - INFO - codeparrot_training - Step 990: {'lr': 0.0002475, 'samples': 507392, 'steps': 990, 'loss/train': 6.047245979309082} +02/24/2022 02:49:54 - INFO - codeparrot_training - Step 991: {'lr': 0.00024775, 'samples': 507904, 'steps': 991, 'loss/train': 5.9628520011901855} +02/24/2022 02:49:59 - INFO - codeparrot_training - Step 992: {'lr': 0.000248, 'samples': 508416, 'steps': 992, 'loss/train': 5.37191104888916} +02/24/2022 02:50:03 - INFO - codeparrot_training - Step 993: {'lr': 0.00024825, 'samples': 508928, 'steps': 993, 'loss/train': 6.091874599456787} +02/24/2022 02:50:09 - INFO - codeparrot_training - Step 994: {'lr': 0.0002485, 'samples': 509440, 'steps': 994, 'loss/train': 5.176259994506836} +02/24/2022 02:50:12 - INFO - codeparrot_training - Step 995: {'lr': 0.00024875, 'samples': 509952, 'steps': 995, 'loss/train': 6.878044605255127} +02/24/2022 02:50:18 - INFO - codeparrot_training - Step 996: {'lr': 0.000249, 'samples': 510464, 'steps': 996, 'loss/train': 5.136017799377441} +02/24/2022 02:50:21 - INFO - codeparrot_training - Step 997: {'lr': 0.00024925, 'samples': 510976, 'steps': 997, 'loss/train': 6.837099075317383} +02/24/2022 02:50:27 - INFO - codeparrot_training - Step 998: {'lr': 0.0002495, 'samples': 511488, 'steps': 998, 'loss/train': 5.783529758453369} +02/24/2022 02:50:30 - INFO - codeparrot_training - Step 999: {'lr': 0.00024975, 'samples': 512000, 'steps': 999, 'loss/train': 6.577051639556885} +02/24/2022 02:50:30 - INFO - codeparrot_training - Evaluating and saving model checkpoint