diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -34703,3 +34703,1344 @@ Time to load utils op: 0.00040650367736816406 seconds [2022-12-20 13:50:34,343] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-1000/global_step1012/zero_pp_rank_0_mp_rank_00_optim_states.pt [2022-12-20 13:50:34,343] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1012 is ready now! [INFO|feature_extraction_utils.py:368] 2022-12-20 13:50:36,874 >> Feature extractor saved in ./preprocessor_config.json +[2022-12-20 13:52:34,361] [INFO] [timer.py:197:stop] 0/2026, RunningAvgSamplesPerSec=5.8634860661683135, CurrSamplesPerSec=5.058270704673658, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1001/5000 [3:30:09<485:03:32, 436.66s/it][2022-12-20 13:52:45,337] [INFO] [timer.py:197:stop] 0/2028, RunningAvgSamplesPerSec=5.863520996769987, CurrSamplesPerSec=5.3502200435821905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1002/5000 [3:30:20<343:06:46, 308.96s/it][2022-12-20 13:52:56,312] [INFO] [timer.py:197:stop] 0/2030, RunningAvgSamplesPerSec=5.8635547211988195, CurrSamplesPerSec=5.317756293587114, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1003/5000 [3:30:31<243:46:28, 219.56s/it][2022-12-20 13:53:07,250] [INFO] [timer.py:197:stop] 0/2032, RunningAvgSamplesPerSec=5.863606636750225, CurrSamplesPerSec=5.362122717423734, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1004/5000 [3:30:42<174:14:31, 156.97s/it][2022-12-20 13:53:18,340] [INFO] [timer.py:197:stop] 0/2034, RunningAvgSamplesPerSec=5.86358959190176, CurrSamplesPerSec=5.306349746791664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1005/5000 [3:30:53<125:37:51, 113.21s/it][2022-12-20 13:53:29,346] [INFO] [timer.py:197:stop] 0/2036, RunningAvgSamplesPerSec=5.863607921293516, CurrSamplesPerSec=5.311056749218157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1006/5000 [3:31:04<91:34:58, 82.55s/it] [2022-12-20 13:53:40,440] [INFO] [timer.py:197:stop] 0/2038, RunningAvgSamplesPerSec=5.863578509413674, CurrSamplesPerSec=5.243325682683028, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1007/5000 [3:31:16<67:47:00, 61.11s/it][2022-12-20 13:53:51,433] [INFO] [logging.py:68:log_dist] [Rank 0] step=1020, skipped=3, lr=[8.853333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 13:53:51,435] [INFO] [timer.py:197:stop] 0/2040, RunningAvgSamplesPerSec=5.863603336779432, CurrSamplesPerSec=5.34758721999845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1008/5000 [3:31:27<51:05:38, 46.08s/it][2022-12-20 13:54:02,491] [INFO] [timer.py:197:stop] 0/2042, RunningAvgSamplesPerSec=5.863593984023428, CurrSamplesPerSec=5.295950596806758, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1009/5000 [3:31:38<39:26:01, 35.57s/it][2022-12-20 13:54:13,532] [INFO] [timer.py:197:stop] 0/2044, RunningAvgSamplesPerSec=5.86359209027383, CurrSamplesPerSec=5.287658827953365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1010/5000 [3:31:49<31:16:05, 28.21s/it][2022-12-20 13:54:24,560] [INFO] [timer.py:197:stop] 0/2046, RunningAvgSamplesPerSec=5.863597384976187, CurrSamplesPerSec=5.348200695684302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1011/5000 [3:32:00<25:32:52, 23.06s/it][2022-12-20 13:54:35,601] [INFO] [timer.py:197:stop] 0/2048, RunningAvgSamplesPerSec=5.863603788022735, CurrSamplesPerSec=5.295188185541611, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1012/5000 [3:32:11<21:32:54, 19.45s/it][2022-12-20 13:54:46,847] [INFO] [timer.py:197:stop] 0/2050, RunningAvgSamplesPerSec=5.863493825304255, CurrSamplesPerSec=5.365005978273556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1013/5000 [3:32:22<18:49:00, 16.99s/it][2022-12-20 13:54:57,865] [INFO] [timer.py:197:stop] 0/2052, RunningAvgSamplesPerSec=5.863507377947548, CurrSamplesPerSec=5.347278298037127, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1014/5000 [3:32:33<16:49:40, 15.20s/it][2022-12-20 13:55:09,105] [INFO] [timer.py:197:stop] 0/2054, RunningAvgSamplesPerSec=5.863401559240622, CurrSamplesPerSec=5.156595057476339, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1015/5000 [3:32:44<15:30:33, 14.01s/it][2022-12-20 13:55:20,086] [INFO] [timer.py:197:stop] 0/2056, RunningAvgSamplesPerSec=5.8634313586809395, CurrSamplesPerSec=5.3284888337248315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1016/5000 [3:32:55<14:29:58, 13.10s/it][2022-12-20 13:55:31,107] [INFO] [timer.py:197:stop] 0/2058, RunningAvgSamplesPerSec=5.863441457895085, CurrSamplesPerSec=5.310022328424466, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1017/5000 [3:33:06<13:48:18, 12.48s/it][2022-12-20 13:55:42,249] [INFO] [logging.py:68:log_dist] [Rank 0] step=1030, skipped=3, lr=[8.831111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-20 13:55:42,251] [INFO] [timer.py:197:stop] 0/2060, RunningAvgSamplesPerSec=5.863389686432189, CurrSamplesPerSec=5.313772575948109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1018/5000 [3:33:17<13:21:32, 12.08s/it][2022-12-20 13:55:53,267] [INFO] [timer.py:197:stop] 0/2062, RunningAvgSamplesPerSec=5.863401051275269, CurrSamplesPerSec=5.31035070233564, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1019/5000 [3:33:28<13:00:13, 11.76s/it][2022-12-20 13:56:04,405] [INFO] [timer.py:197:stop] 0/2064, RunningAvgSamplesPerSec=5.863349304991636, CurrSamplesPerSec=5.2426029202440985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1020/5000 [3:33:40<12:47:39, 11.57s/it][2022-12-20 13:56:15,416] [INFO] [timer.py:197:stop] 0/2066, RunningAvgSamplesPerSec=5.863363634420066, CurrSamplesPerSec=5.31510353697576, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1021/5000 [3:33:51<12:36:17, 11.40s/it][2022-12-20 13:56:26,417] [INFO] [timer.py:197:stop] 0/2068, RunningAvgSamplesPerSec=5.863382961010374, CurrSamplesPerSec=5.351581917232091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1022/5000 [3:34:02<12:28:04, 11.28s/it][2022-12-20 13:56:37,441] [INFO] [timer.py:197:stop] 0/2070, RunningAvgSamplesPerSec=5.863399234920631, CurrSamplesPerSec=5.350668165134254, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1023/5000 [3:34:13<12:22:44, 11.21s/it][2022-12-20 13:56:48,532] [INFO] [timer.py:197:stop] 0/2072, RunningAvgSamplesPerSec=5.863372749033641, CurrSamplesPerSec=5.263555079867624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1024/5000 [3:34:24<12:20:16, 11.17s/it][2022-12-20 13:56:59,576] [INFO] [timer.py:197:stop] 0/2074, RunningAvgSamplesPerSec=5.86337001283808, CurrSamplesPerSec=5.323514610363855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 20%|██ | 1025/5000 [3:34:35<12:17:35, 11.13s/it] {'loss': 0.003, 'learning_rate': 8.815555555555557e-06, 'epoch': 24.99} + 20%|██ | 1025/5000 [3:34:35<12:17:35, 11.13s/it][2022-12-20 13:57:09,695] [INFO] [timer.py:197:stop] 0/2076, RunningAvgSamplesPerSec=5.863850987293994, CurrSamplesPerSec=5.35015435667869, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1026/5000 [3:34:50<13:35:09, 12.31s/it][2022-12-20 13:57:20,677] [INFO] [timer.py:197:stop] 0/2078, RunningAvgSamplesPerSec=5.863879770832041, CurrSamplesPerSec=5.3430524234289845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1027/5000 [3:35:01<13:08:33, 11.91s/it][2022-12-20 13:57:31,743] [INFO] [logging.py:68:log_dist] [Rank 0] step=1040, skipped=3, lr=[8.80888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 13:57:31,745] [INFO] [timer.py:197:stop] 0/2080, RunningAvgSamplesPerSec=5.863862710020031, CurrSamplesPerSec=5.26310946021707, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1028/5000 [3:35:12<12:51:43, 11.66s/it][2022-12-20 13:57:42,728] [INFO] [timer.py:197:stop] 0/2082, RunningAvgSamplesPerSec=5.863890630642203, CurrSamplesPerSec=5.341398336648782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1029/5000 [3:35:23<12:38:04, 11.45s/it][2022-12-20 13:57:53,734] [INFO] [timer.py:197:stop] 0/2084, RunningAvgSamplesPerSec=5.863905922714832, CurrSamplesPerSec=5.3176779175110465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1030/5000 [3:35:34<12:31:24, 11.36s/it][2022-12-20 13:58:04,901] [INFO] [timer.py:197:stop] 0/2086, RunningAvgSamplesPerSec=5.863839121847008, CurrSamplesPerSec=5.284583621939008, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1031/5000 [3:35:45<12:24:15, 11.25s/it][2022-12-20 13:58:15,859] [INFO] [timer.py:197:stop] 0/2088, RunningAvgSamplesPerSec=5.863879072806387, CurrSamplesPerSec=5.3251217206869645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1032/5000 [3:35:56<12:18:55, 11.17s/it][2022-12-20 13:58:26,836] [INFO] [timer.py:197:stop] 0/2090, RunningAvgSamplesPerSec=5.8639157452497646, CurrSamplesPerSec=5.338732988467036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1033/5000 [3:36:07<12:14:49, 11.11s/it][2022-12-20 13:58:37,823] [INFO] [timer.py:197:stop] 0/2092, RunningAvgSamplesPerSec=5.863947036883069, CurrSamplesPerSec=5.3289015856867294, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1034/5000 [3:36:18<12:11:44, 11.07s/it][2022-12-20 13:58:48,828] [INFO] [timer.py:197:stop] 0/2094, RunningAvgSamplesPerSec=5.863962377593371, CurrSamplesPerSec=5.295487566144904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1035/5000 [3:36:29<12:11:05, 11.06s/it][2022-12-20 13:58:59,831] [INFO] [timer.py:197:stop] 0/2096, RunningAvgSamplesPerSec=5.863983407890965, CurrSamplesPerSec=5.341273136407529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1036/5000 [3:36:40<12:10:13, 11.05s/it][2022-12-20 13:59:10,881] [INFO] [timer.py:197:stop] 0/2098, RunningAvgSamplesPerSec=5.86397699376101, CurrSamplesPerSec=5.316498131113512, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1037/5000 [3:36:51<12:10:17, 11.06s/it][2022-12-20 13:59:21,953] [INFO] [logging.py:68:log_dist] [Rank 0] step=1050, skipped=3, lr=[8.786666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-20 13:59:21,955] [INFO] [timer.py:197:stop] 0/2100, RunningAvgSamplesPerSec=5.8639581315535265, CurrSamplesPerSec=5.3083583934251495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1038/5000 [3:37:02<12:10:17, 11.06s/it][2022-12-20 13:59:32,979] [INFO] [timer.py:197:stop] 0/2102, RunningAvgSamplesPerSec=5.863964950532257, CurrSamplesPerSec=5.346437784905685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1039/5000 [3:37:13<12:08:54, 11.04s/it][2022-12-20 13:59:43,980] [INFO] [timer.py:197:stop] 0/2104, RunningAvgSamplesPerSec=5.863983120647728, CurrSamplesPerSec=5.343611459441752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1040/5000 [3:37:24<12:08:23, 11.04s/it][2022-12-20 13:59:55,002] [INFO] [timer.py:197:stop] 0/2106, RunningAvgSamplesPerSec=5.863990451893773, CurrSamplesPerSec=5.345059796031264, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1041/5000 [3:37:35<12:07:24, 11.02s/it][2022-12-20 14:00:06,022] [INFO] [timer.py:197:stop] 0/2108, RunningAvgSamplesPerSec=5.863998780648715, CurrSamplesPerSec=5.324498108366836, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1042/5000 [3:37:46<12:07:28, 11.03s/it][2022-12-20 14:00:17,072] [INFO] [timer.py:197:stop] 0/2110, RunningAvgSamplesPerSec=5.863999195673888, CurrSamplesPerSec=5.312305608950852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1043/5000 [3:37:57<12:07:37, 11.03s/it][2022-12-20 14:00:28,106] [INFO] [timer.py:197:stop] 0/2112, RunningAvgSamplesPerSec=5.864001645544187, CurrSamplesPerSec=5.323496662842479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1044/5000 [3:38:08<12:07:26, 11.03s/it][2022-12-20 14:00:39,139] [INFO] [timer.py:197:stop] 0/2114, RunningAvgSamplesPerSec=5.864003600212431, CurrSamplesPerSec=5.323034925284297, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1045/5000 [3:38:19<12:07:06, 11.03s/it][2022-12-20 14:00:50,130] [INFO] [timer.py:197:stop] 0/2116, RunningAvgSamplesPerSec=5.864035291846092, CurrSamplesPerSec=5.353817797026638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1046/5000 [3:38:30<12:06:05, 11.02s/it][2022-12-20 14:01:01,168] [INFO] [timer.py:197:stop] 0/2118, RunningAvgSamplesPerSec=5.864033742378036, CurrSamplesPerSec=5.308203456734344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1047/5000 [3:38:41<12:06:21, 11.02s/it][2022-12-20 14:01:12,200] [INFO] [logging.py:68:log_dist] [Rank 0] step=1060, skipped=3, lr=[8.764444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:01:12,201] [INFO] [timer.py:197:stop] 0/2120, RunningAvgSamplesPerSec=5.864035287320744, CurrSamplesPerSec=5.315322236073764, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1048/5000 [3:38:52<12:05:25, 11.01s/it][2022-12-20 14:01:23,199] [INFO] [timer.py:197:stop] 0/2122, RunningAvgSamplesPerSec=5.86405479210543, CurrSamplesPerSec=5.30661325434298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1049/5000 [3:39:03<12:05:25, 11.02s/it][2022-12-20 14:01:34,236] [INFO] [timer.py:197:stop] 0/2124, RunningAvgSamplesPerSec=5.864053769168871, CurrSamplesPerSec=5.293740647201711, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1050/5000 [3:39:14<12:06:17, 11.03s/it] {'loss': 0.0027, 'learning_rate': 8.76e-06, 'epoch': 25.6} + 21%|██ | 1050/5000 [3:39:14<12:06:17, 11.03s/it][2022-12-20 14:01:45,307] [INFO] [timer.py:197:stop] 0/2126, RunningAvgSamplesPerSec=5.8640368067427655, CurrSamplesPerSec=5.293431651952121, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1051/5000 [3:39:25<12:06:19, 11.04s/it][2022-12-20 14:01:56,339] [INFO] [timer.py:197:stop] 0/2128, RunningAvgSamplesPerSec=5.864039055321194, CurrSamplesPerSec=5.3028689117070025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1052/5000 [3:39:36<12:06:32, 11.04s/it][2022-12-20 14:02:07,354] [INFO] [timer.py:197:stop] 0/2130, RunningAvgSamplesPerSec=5.864049667097402, CurrSamplesPerSec=5.338703258656914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1053/5000 [3:39:47<12:06:02, 11.04s/it][2022-12-20 14:02:18,399] [INFO] [timer.py:197:stop] 0/2132, RunningAvgSamplesPerSec=5.86404652139603, CurrSamplesPerSec=5.321801484483407, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1054/5000 [3:39:58<12:05:51, 11.04s/it][2022-12-20 14:02:29,475] [INFO] [timer.py:197:stop] 0/2134, RunningAvgSamplesPerSec=5.864026025930713, CurrSamplesPerSec=5.286453177309822, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1055/5000 [3:40:10<12:06:35, 11.05s/it][2022-12-20 14:02:40,507] [INFO] [timer.py:197:stop] 0/2136, RunningAvgSamplesPerSec=5.864028488316095, CurrSamplesPerSec=5.332904214420549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1056/5000 [3:40:21<12:06:04, 11.05s/it][2022-12-20 14:02:51,536] [INFO] [timer.py:197:stop] 0/2138, RunningAvgSamplesPerSec=5.864031985893871, CurrSamplesPerSec=5.337082211265904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1057/5000 [3:40:32<12:05:24, 11.04s/it][2022-12-20 14:03:02,586] [INFO] [logging.py:68:log_dist] [Rank 0] step=1070, skipped=3, lr=[8.742222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:03:02,587] [INFO] [timer.py:197:stop] 0/2140, RunningAvgSamplesPerSec=5.8640244493423435, CurrSamplesPerSec=5.310837560404776, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1058/5000 [3:40:43<12:05:24, 11.04s/it][2022-12-20 14:03:13,604] [INFO] [timer.py:197:stop] 0/2142, RunningAvgSamplesPerSec=5.864034272343851, CurrSamplesPerSec=5.3383492870029, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1059/5000 [3:40:54<12:03:53, 11.02s/it][2022-12-20 14:03:24,612] [INFO] [timer.py:197:stop] 0/2144, RunningAvgSamplesPerSec=5.864048094117084, CurrSamplesPerSec=5.307198897070904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1060/5000 [3:41:05<12:03:30, 11.02s/it][2022-12-20 14:03:35,591] [INFO] [timer.py:197:stop] 0/2146, RunningAvgSamplesPerSec=5.864076728453261, CurrSamplesPerSec=5.335307109721164, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1061/5000 [3:41:16<12:02:47, 11.01s/it][2022-12-20 14:03:46,585] [INFO] [timer.py:197:stop] 0/2148, RunningAvgSamplesPerSec=5.864098346108194, CurrSamplesPerSec=5.333846247510869, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██ | 1062/5000 [3:41:27<12:02:51, 11.01s/it][2022-12-20 14:03:57,651] [INFO] [timer.py:197:stop] 0/2150, RunningAvgSamplesPerSec=5.864083366518799, CurrSamplesPerSec=5.294184577836855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1063/5000 [3:41:38<12:03:33, 11.03s/it][2022-12-20 14:04:08,630] [INFO] [timer.py:197:stop] 0/2152, RunningAvgSamplesPerSec=5.864111529783501, CurrSamplesPerSec=5.364261074716845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1064/5000 [3:41:49<12:02:03, 11.01s/it][2022-12-20 14:04:19,619] [INFO] [timer.py:197:stop] 0/2154, RunningAvgSamplesPerSec=5.8641352790665415, CurrSamplesPerSec=5.339265845281512, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1065/5000 [3:42:00<12:02:10, 11.01s/it][2022-12-20 14:04:30,722] [INFO] [timer.py:197:stop] 0/2156, RunningAvgSamplesPerSec=5.864102400751884, CurrSamplesPerSec=5.26852702491582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1066/5000 [3:42:11<12:03:32, 11.04s/it][2022-12-20 14:04:40,849] [INFO] [timer.py:197:stop] 0/2158, RunningAvgSamplesPerSec=5.864553343976343, CurrSamplesPerSec=6.2492822306413425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 14:04:51,835] [INFO] [logging.py:68:log_dist] [Rank 0] step=1080, skipped=3, lr=[8.720000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:04:51,836] [INFO] [timer.py:197:stop] 0/2160, RunningAvgSamplesPerSec=5.864581100801598, CurrSamplesPerSec=5.324717792519189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1067/5000 [3:42:27<13:44:09, 12.57s/it][2022-12-20 14:05:02,847] [INFO] [timer.py:197:stop] 0/2162, RunningAvgSamplesPerSec=5.8645934464406455, CurrSamplesPerSec=5.324314348048252, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1068/5000 [3:42:38<13:13:14, 12.10s/it][2022-12-20 14:05:13,903] [INFO] [timer.py:197:stop] 0/2164, RunningAvgSamplesPerSec=5.864582772100132, CurrSamplesPerSec=5.298026662843538, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1069/5000 [3:42:49<12:52:26, 11.79s/it][2022-12-20 14:05:24,931] [INFO] [timer.py:197:stop] 0/2166, RunningAvgSamplesPerSec=5.864592894167747, CurrSamplesPerSec=5.314301092839466, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1070/5000 [3:43:00<12:37:15, 11.56s/it][2022-12-20 14:05:35,942] [INFO] [timer.py:197:stop] 0/2168, RunningAvgSamplesPerSec=5.86460474887039, CurrSamplesPerSec=5.347219500383081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1071/5000 [3:43:11<12:26:15, 11.40s/it][2022-12-20 14:05:46,910] [INFO] [timer.py:197:stop] 0/2170, RunningAvgSamplesPerSec=5.864638793598877, CurrSamplesPerSec=5.353111438016366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1072/5000 [3:43:22<12:17:40, 11.27s/it][2022-12-20 14:05:57,981] [INFO] [timer.py:197:stop] 0/2172, RunningAvgSamplesPerSec=5.864621245502965, CurrSamplesPerSec=5.26190260007154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1073/5000 [3:43:33<12:13:36, 11.21s/it][2022-12-20 14:06:09,027] [INFO] [timer.py:197:stop] 0/2174, RunningAvgSamplesPerSec=5.864618818421614, CurrSamplesPerSec=5.308806669068894, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 21%|██▏ | 1074/5000 [3:43:44<12:10:13, 11.16s/it][2022-12-20 14:06:20,007] [INFO] [timer.py:197:stop] 0/2176, RunningAvgSamplesPerSec=5.864646519507423, CurrSamplesPerSec=5.366300724234963, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1075/5000 [3:43:55<12:06:30, 11.11s/it] {'loss': 0.0025, 'learning_rate': 8.702222222222222e-06, 'epoch': 26.22} + 22%|██▏ | 1075/5000 [3:43:55<12:06:30, 11.11s/it][2022-12-20 14:06:31,031] [INFO] [timer.py:197:stop] 0/2178, RunningAvgSamplesPerSec=5.864653092648156, CurrSamplesPerSec=5.3329116307042, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1076/5000 [3:44:06<12:04:43, 11.08s/it][2022-12-20 14:06:42,043] [INFO] [logging.py:68:log_dist] [Rank 0] step=1090, skipped=3, lr=[8.697777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:06:42,045] [INFO] [timer.py:197:stop] 0/2180, RunningAvgSamplesPerSec=5.8646634641746385, CurrSamplesPerSec=5.34102742939782, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1077/5000 [3:44:17<12:03:13, 11.06s/it][2022-12-20 14:06:52,978] [INFO] [timer.py:197:stop] 0/2182, RunningAvgSamplesPerSec=5.864714493048281, CurrSamplesPerSec=5.341413429094641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1078/5000 [3:44:28<12:00:31, 11.02s/it][2022-12-20 14:07:03,954] [INFO] [timer.py:197:stop] 0/2184, RunningAvgSamplesPerSec=5.864743401605117, CurrSamplesPerSec=5.343175367281848, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1079/5000 [3:44:39<11:59:25, 11.01s/it][2022-12-20 14:07:15,044] [INFO] [timer.py:197:stop] 0/2186, RunningAvgSamplesPerSec=5.864716731782743, CurrSamplesPerSec=5.299642901789582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1080/5000 [3:44:50<12:00:49, 11.03s/it][2022-12-20 14:07:26,099] [INFO] [timer.py:197:stop] 0/2188, RunningAvgSamplesPerSec=5.864706815875364, CurrSamplesPerSec=5.3000855207086515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1081/5000 [3:45:01<12:01:04, 11.04s/it][2022-12-20 14:07:37,147] [INFO] [timer.py:197:stop] 0/2190, RunningAvgSamplesPerSec=5.864708923970429, CurrSamplesPerSec=5.299555642386944, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1082/5000 [3:45:12<12:01:03, 11.04s/it][2022-12-20 14:07:48,140] [INFO] [timer.py:197:stop] 0/2192, RunningAvgSamplesPerSec=5.864729893569766, CurrSamplesPerSec=5.347439998253347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1083/5000 [3:45:23<11:59:54, 11.03s/it][2022-12-20 14:07:59,135] [INFO] [timer.py:197:stop] 0/2194, RunningAvgSamplesPerSec=5.864750009143748, CurrSamplesPerSec=5.315612950608003, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1084/5000 [3:45:34<11:59:05, 11.02s/it][2022-12-20 14:08:10,108] [INFO] [timer.py:197:stop] 0/2196, RunningAvgSamplesPerSec=5.864780426585327, CurrSamplesPerSec=5.364609056590229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1085/5000 [3:45:45<11:58:02, 11.00s/it][2022-12-20 14:08:21,117] [INFO] [timer.py:197:stop] 0/2198, RunningAvgSamplesPerSec=5.864795321272736, CurrSamplesPerSec=5.3543314550037655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1086/5000 [3:45:56<11:57:56, 11.01s/it][2022-12-20 14:08:32,168] [INFO] [logging.py:68:log_dist] [Rank 0] step=1100, skipped=3, lr=[8.675555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:08:32,170] [INFO] [timer.py:197:stop] 0/2200, RunningAvgSamplesPerSec=5.864794535738305, CurrSamplesPerSec=5.298841771703382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1087/5000 [3:46:07<11:58:40, 11.02s/it][2022-12-20 14:08:43,221] [INFO] [timer.py:197:stop] 0/2202, RunningAvgSamplesPerSec=5.8647877379434545, CurrSamplesPerSec=5.320138806364519, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1088/5000 [3:46:18<11:59:06, 11.03s/it][2022-12-20 14:08:54,229] [INFO] [timer.py:197:stop] 0/2204, RunningAvgSamplesPerSec=5.8648068275083585, CurrSamplesPerSec=5.35050648292483, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1089/5000 [3:46:29<11:58:30, 11.02s/it][2022-12-20 14:09:05,311] [INFO] [timer.py:197:stop] 0/2206, RunningAvgSamplesPerSec=5.86478458568544, CurrSamplesPerSec=5.29473990915237, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1090/5000 [3:46:40<11:59:28, 11.04s/it][2022-12-20 14:09:16,335] [INFO] [timer.py:197:stop] 0/2208, RunningAvgSamplesPerSec=5.864790337118027, CurrSamplesPerSec=5.3328898056998515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1091/5000 [3:46:51<11:58:58, 11.04s/it][2022-12-20 14:09:27,393] [INFO] [timer.py:197:stop] 0/2210, RunningAvgSamplesPerSec=5.864779369409788, CurrSamplesPerSec=5.32268535012933, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1092/5000 [3:47:02<11:59:13, 11.04s/it][2022-12-20 14:09:38,548] [INFO] [timer.py:197:stop] 0/2212, RunningAvgSamplesPerSec=5.864721783778854, CurrSamplesPerSec=5.230506379030451, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1093/5000 [3:47:14<12:01:14, 11.08s/it][2022-12-20 14:09:49,614] [INFO] [timer.py:197:stop] 0/2214, RunningAvgSamplesPerSec=5.864709157834904, CurrSamplesPerSec=5.301934436927559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1094/5000 [3:47:25<12:00:50, 11.07s/it][2022-12-20 14:10:00,570] [INFO] [timer.py:197:stop] 0/2216, RunningAvgSamplesPerSec=5.864747393800292, CurrSamplesPerSec=5.346892941296639, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1095/5000 [3:47:36<11:58:22, 11.04s/it][2022-12-20 14:10:11,628] [INFO] [timer.py:197:stop] 0/2218, RunningAvgSamplesPerSec=5.8647376400973705, CurrSamplesPerSec=5.303320451628331, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1096/5000 [3:47:47<11:58:35, 11.04s/it][2022-12-20 14:10:22,671] [INFO] [logging.py:68:log_dist] [Rank 0] step=1110, skipped=3, lr=[8.653333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:10:22,673] [INFO] [timer.py:197:stop] 0/2220, RunningAvgSamplesPerSec=5.864739129465555, CurrSamplesPerSec=5.321221264343892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1097/5000 [3:47:58<11:58:25, 11.04s/it][2022-12-20 14:10:33,716] [INFO] [timer.py:197:stop] 0/2222, RunningAvgSamplesPerSec=5.864735069539521, CurrSamplesPerSec=5.3211984800985315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1098/5000 [3:48:09<11:58:13, 11.04s/it][2022-12-20 14:10:44,741] [INFO] [timer.py:197:stop] 0/2224, RunningAvgSamplesPerSec=5.8647404409024055, CurrSamplesPerSec=5.306076406342588, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1099/5000 [3:48:20<11:57:40, 11.04s/it][2022-12-20 14:10:55,794] [INFO] [timer.py:197:stop] 0/2226, RunningAvgSamplesPerSec=5.8647349478035204, CurrSamplesPerSec=5.307838614455876, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1100/5000 [3:48:31<11:57:46, 11.04s/it] {'loss': 0.0019, 'learning_rate': 8.646666666666668e-06, 'epoch': 26.82} + 22%|██▏ | 1100/5000 [3:48:31<11:57:46, 11.04s/it][2022-12-20 14:11:06,859] [INFO] [timer.py:197:stop] 0/2228, RunningAvgSamplesPerSec=5.864721885493288, CurrSamplesPerSec=5.291202735734523, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1101/5000 [3:48:42<11:58:00, 11.05s/it][2022-12-20 14:11:17,914] [INFO] [timer.py:197:stop] 0/2230, RunningAvgSamplesPerSec=5.864716545123169, CurrSamplesPerSec=5.302519885280004, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1102/5000 [3:48:53<11:57:57, 11.05s/it][2022-12-20 14:11:28,939] [INFO] [timer.py:197:stop] 0/2232, RunningAvgSamplesPerSec=5.864721719243963, CurrSamplesPerSec=5.308745564747245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1103/5000 [3:49:04<11:57:15, 11.04s/it][2022-12-20 14:11:39,973] [INFO] [timer.py:197:stop] 0/2234, RunningAvgSamplesPerSec=5.864722708580744, CurrSamplesPerSec=5.330234633577298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1104/5000 [3:49:15<11:56:53, 11.04s/it][2022-12-20 14:11:51,035] [INFO] [timer.py:197:stop] 0/2236, RunningAvgSamplesPerSec=5.8647136474348835, CurrSamplesPerSec=5.312328737628104, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1105/5000 [3:49:26<11:57:07, 11.05s/it][2022-12-20 14:12:02,034] [INFO] [timer.py:197:stop] 0/2238, RunningAvgSamplesPerSec=5.864733430074749, CurrSamplesPerSec=5.332309707474836, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1106/5000 [3:49:37<11:56:00, 11.03s/it][2022-12-20 14:12:13,051] [INFO] [logging.py:68:log_dist] [Rank 0] step=1120, skipped=3, lr=[8.631111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:12:13,052] [INFO] [timer.py:197:stop] 0/2240, RunningAvgSamplesPerSec=5.864743512827763, CurrSamplesPerSec=5.31612983157818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1107/5000 [3:49:48<11:55:32, 11.03s/it][2022-12-20 14:12:23,219] [INFO] [timer.py:197:stop] 0/2242, RunningAvgSamplesPerSec=5.86515615693459, CurrSamplesPerSec=5.304526890794296, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1108/5000 [3:50:03<13:14:29, 12.25s/it][2022-12-20 14:12:34,224] [INFO] [timer.py:197:stop] 0/2244, RunningAvgSamplesPerSec=5.865170598574616, CurrSamplesPerSec=5.321723622140125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1109/5000 [3:50:14<12:50:31, 11.88s/it][2022-12-20 14:12:45,264] [INFO] [timer.py:197:stop] 0/2246, RunningAvgSamplesPerSec=5.865168219554229, CurrSamplesPerSec=5.3102681323286465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1110/5000 [3:50:25<12:34:13, 11.63s/it][2022-12-20 14:12:56,290] [INFO] [timer.py:197:stop] 0/2248, RunningAvgSamplesPerSec=5.865173781563321, CurrSamplesPerSec=5.336660764378463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1111/5000 [3:50:36<12:21:27, 11.44s/it][2022-12-20 14:13:07,316] [INFO] [timer.py:197:stop] 0/2250, RunningAvgSamplesPerSec=5.865180007392161, CurrSamplesPerSec=5.30172772809063, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1112/5000 [3:50:47<12:13:36, 11.32s/it][2022-12-20 14:13:18,365] [INFO] [timer.py:197:stop] 0/2252, RunningAvgSamplesPerSec=5.865172791576657, CurrSamplesPerSec=5.295400234600223, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1113/5000 [3:50:58<12:07:25, 11.23s/it][2022-12-20 14:13:29,488] [INFO] [timer.py:197:stop] 0/2254, RunningAvgSamplesPerSec=5.865130687223691, CurrSamplesPerSec=5.2008883121909495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1114/5000 [3:51:10<12:06:29, 11.22s/it][2022-12-20 14:13:40,522] [INFO] [timer.py:197:stop] 0/2256, RunningAvgSamplesPerSec=5.8651382310160844, CurrSamplesPerSec=5.336662674108486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1115/5000 [3:51:21<12:01:37, 11.14s/it][2022-12-20 14:13:51,582] [INFO] [timer.py:197:stop] 0/2258, RunningAvgSamplesPerSec=5.865126760247001, CurrSamplesPerSec=5.263164565181997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1116/5000 [3:51:32<11:59:57, 11.12s/it][2022-12-20 14:14:02,552] [INFO] [logging.py:68:log_dist] [Rank 0] step=1130, skipped=3, lr=[8.60888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:14:02,554] [INFO] [timer.py:197:stop] 0/2260, RunningAvgSamplesPerSec=5.865157653267203, CurrSamplesPerSec=5.349653653685572, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1117/5000 [3:51:43<11:57:41, 11.09s/it][2022-12-20 14:14:13,565] [INFO] [timer.py:197:stop] 0/2262, RunningAvgSamplesPerSec=5.865168645626259, CurrSamplesPerSec=5.350817911199397, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1118/5000 [3:51:54<11:54:42, 11.05s/it][2022-12-20 14:14:24,525] [INFO] [timer.py:197:stop] 0/2264, RunningAvgSamplesPerSec=5.865204938025142, CurrSamplesPerSec=5.339099116989073, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1119/5000 [3:52:05<11:54:21, 11.04s/it][2022-12-20 14:14:35,609] [INFO] [timer.py:197:stop] 0/2266, RunningAvgSamplesPerSec=5.865184046889684, CurrSamplesPerSec=5.297849116581928, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1120/5000 [3:52:16<11:53:31, 11.03s/it][2022-12-20 14:14:46,596] [INFO] [timer.py:197:stop] 0/2268, RunningAvgSamplesPerSec=5.865206825467563, CurrSamplesPerSec=5.318219644525164, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1121/5000 [3:52:27<11:51:57, 11.01s/it][2022-12-20 14:14:57,525] [INFO] [timer.py:197:stop] 0/2270, RunningAvgSamplesPerSec=5.8652567208296915, CurrSamplesPerSec=5.347790701890485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1122/5000 [3:52:38<11:50:39, 11.00s/it][2022-12-20 14:15:08,487] [INFO] [timer.py:197:stop] 0/2272, RunningAvgSamplesPerSec=5.86529128143706, CurrSamplesPerSec=5.3418541236015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1123/5000 [3:52:49<11:50:13, 10.99s/it][2022-12-20 14:15:19,511] [INFO] [timer.py:197:stop] 0/2274, RunningAvgSamplesPerSec=5.865295920018038, CurrSamplesPerSec=5.304192319310985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▏ | 1124/5000 [3:53:00<11:51:11, 11.01s/it][2022-12-20 14:15:30,544] [INFO] [timer.py:197:stop] 0/2276, RunningAvgSamplesPerSec=5.865296439071898, CurrSamplesPerSec=5.3206972760597475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 22%|██▎ | 1125/5000 [3:53:11<11:51:39, 11.02s/it] {'loss': 0.0021, 'learning_rate': 8.591111111111112e-06, 'epoch': 27.43} + 22%|██▎ | 1125/5000 [3:53:11<11:51:39, 11.02s/it][2022-12-20 14:15:41,579] [INFO] [timer.py:197:stop] 0/2278, RunningAvgSamplesPerSec=5.865298541262584, CurrSamplesPerSec=5.327499843212781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1126/5000 [3:53:22<11:51:17, 11.02s/it][2022-12-20 14:15:52,588] [INFO] [logging.py:68:log_dist] [Rank 0] step=1140, skipped=3, lr=[8.586666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:15:52,590] [INFO] [timer.py:197:stop] 0/2280, RunningAvgSamplesPerSec=5.865309157514421, CurrSamplesPerSec=5.325291591630112, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1127/5000 [3:53:33<11:51:22, 11.02s/it][2022-12-20 14:16:03,581] [INFO] [timer.py:197:stop] 0/2282, RunningAvgSamplesPerSec=5.86532916902633, CurrSamplesPerSec=5.361328714891081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1128/5000 [3:53:44<11:49:32, 11.00s/it][2022-12-20 14:16:14,516] [INFO] [timer.py:197:stop] 0/2284, RunningAvgSamplesPerSec=5.865375576271883, CurrSamplesPerSec=5.361487410916991, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1129/5000 [3:53:55<11:48:27, 10.98s/it][2022-12-20 14:16:25,488] [INFO] [timer.py:197:stop] 0/2286, RunningAvgSamplesPerSec=5.865404946836134, CurrSamplesPerSec=5.341338392846767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1130/5000 [3:54:06<11:48:17, 10.98s/it][2022-12-20 14:16:36,485] [INFO] [timer.py:197:stop] 0/2288, RunningAvgSamplesPerSec=5.86542233096877, CurrSamplesPerSec=5.327405531815613, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1131/5000 [3:54:17<11:49:01, 11.00s/it][2022-12-20 14:16:47,556] [INFO] [timer.py:197:stop] 0/2290, RunningAvgSamplesPerSec=5.865404769565402, CurrSamplesPerSec=5.288957355158159, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1132/5000 [3:54:28<11:50:04, 11.01s/it][2022-12-20 14:16:58,583] [INFO] [timer.py:197:stop] 0/2292, RunningAvgSamplesPerSec=5.86540775171255, CurrSamplesPerSec=5.317277645627119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1133/5000 [3:54:39<11:49:47, 11.01s/it][2022-12-20 14:17:09,595] [INFO] [timer.py:197:stop] 0/2294, RunningAvgSamplesPerSec=5.865417214120428, CurrSamplesPerSec=5.314231024643213, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1134/5000 [3:54:50<11:49:50, 11.02s/it][2022-12-20 14:17:20,620] [INFO] [timer.py:197:stop] 0/2296, RunningAvgSamplesPerSec=5.865421892145623, CurrSamplesPerSec=5.316322503357713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1135/5000 [3:55:01<11:49:14, 11.01s/it][2022-12-20 14:17:31,627] [INFO] [timer.py:197:stop] 0/2298, RunningAvgSamplesPerSec=5.865434501497301, CurrSamplesPerSec=5.3049684388164575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1136/5000 [3:55:12<11:48:50, 11.01s/it][2022-12-20 14:17:42,609] [INFO] [logging.py:68:log_dist] [Rank 0] step=1150, skipped=3, lr=[8.564444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:17:42,611] [INFO] [timer.py:197:stop] 0/2300, RunningAvgSamplesPerSec=5.865457691582508, CurrSamplesPerSec=5.318497820298302, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1137/5000 [3:55:23<11:48:56, 11.01s/it][2022-12-20 14:17:53,605] [INFO] [timer.py:197:stop] 0/2302, RunningAvgSamplesPerSec=5.865475697674752, CurrSamplesPerSec=5.34173039016067, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1138/5000 [3:55:34<11:47:50, 11.00s/it][2022-12-20 14:18:04,601] [INFO] [timer.py:197:stop] 0/2304, RunningAvgSamplesPerSec=5.8655005303686965, CurrSamplesPerSec=5.326890684296372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1139/5000 [3:55:45<11:48:30, 11.01s/it][2022-12-20 14:18:15,643] [INFO] [timer.py:197:stop] 0/2306, RunningAvgSamplesPerSec=5.865496280394534, CurrSamplesPerSec=5.313038675712788, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1140/5000 [3:55:56<11:48:59, 11.02s/it][2022-12-20 14:18:26,673] [INFO] [timer.py:197:stop] 0/2308, RunningAvgSamplesPerSec=5.865497587354025, CurrSamplesPerSec=5.325734067573845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1141/5000 [3:56:07<11:49:07, 11.03s/it][2022-12-20 14:18:37,709] [INFO] [timer.py:197:stop] 0/2310, RunningAvgSamplesPerSec=5.865497024436438, CurrSamplesPerSec=5.327805426617686, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1142/5000 [3:56:18<11:48:38, 11.02s/it][2022-12-20 14:18:48,735] [INFO] [timer.py:197:stop] 0/2312, RunningAvgSamplesPerSec=5.865500428844639, CurrSamplesPerSec=5.31282205719024, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1143/5000 [3:56:29<11:48:14, 11.02s/it][2022-12-20 14:18:59,761] [INFO] [timer.py:197:stop] 0/2314, RunningAvgSamplesPerSec=5.8655046402636675, CurrSamplesPerSec=5.299324010860995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1144/5000 [3:56:40<11:48:13, 11.02s/it][2022-12-20 14:19:10,784] [INFO] [timer.py:197:stop] 0/2316, RunningAvgSamplesPerSec=5.865509465851486, CurrSamplesPerSec=5.301385552535815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1145/5000 [3:56:51<11:47:47, 11.02s/it][2022-12-20 14:19:21,767] [INFO] [timer.py:197:stop] 0/2318, RunningAvgSamplesPerSec=5.865533465552191, CurrSamplesPerSec=5.3235148215118855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1146/5000 [3:57:02<11:47:26, 11.01s/it][2022-12-20 14:19:32,804] [INFO] [logging.py:68:log_dist] [Rank 0] step=1160, skipped=3, lr=[8.542222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:19:32,805] [INFO] [timer.py:197:stop] 0/2320, RunningAvgSamplesPerSec=5.865532716517903, CurrSamplesPerSec=5.295595167583055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1147/5000 [3:57:13<11:48:10, 11.03s/it][2022-12-20 14:19:43,844] [INFO] [timer.py:197:stop] 0/2322, RunningAvgSamplesPerSec=5.865530540159725, CurrSamplesPerSec=5.315744319380331, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1148/5000 [3:57:24<11:47:35, 11.02s/it][2022-12-20 14:19:53,950] [INFO] [timer.py:197:stop] 0/2324, RunningAvgSamplesPerSec=5.865956033698763, CurrSamplesPerSec=6.24039639415963, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 14:20:05,028] [INFO] [timer.py:197:stop] 0/2326, RunningAvgSamplesPerSec=5.865934394399958, CurrSamplesPerSec=5.289685451108795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1149/5000 [3:57:40<13:28:16, 12.59s/it][2022-12-20 14:20:16,051] [INFO] [timer.py:197:stop] 0/2328, RunningAvgSamplesPerSec=5.865939758476308, CurrSamplesPerSec=5.307168678016946, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1150/5000 [3:57:51<12:57:50, 12.12s/it] {'loss': 0.0018, 'learning_rate': 8.533333333333335e-06, 'epoch': 28.05} + 23%|██▎ | 1150/5000 [3:57:51<12:57:50, 12.12s/it][2022-12-20 14:20:27,062] [INFO] [timer.py:197:stop] 0/2330, RunningAvgSamplesPerSec=5.865950835960259, CurrSamplesPerSec=5.3302888245755655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1151/5000 [3:58:02<12:36:15, 11.79s/it][2022-12-20 14:20:38,107] [INFO] [timer.py:197:stop] 0/2332, RunningAvgSamplesPerSec=5.865945061077047, CurrSamplesPerSec=5.312614288253192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1152/5000 [3:58:13<12:21:44, 11.57s/it][2022-12-20 14:20:49,050] [INFO] [timer.py:197:stop] 0/2334, RunningAvgSamplesPerSec=5.865987180419062, CurrSamplesPerSec=5.361222280309993, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1153/5000 [3:58:24<12:09:34, 11.38s/it][2022-12-20 14:21:00,040] [INFO] [timer.py:197:stop] 0/2336, RunningAvgSamplesPerSec=5.86600722984544, CurrSamplesPerSec=5.317433955159826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1154/5000 [3:58:35<12:01:54, 11.26s/it][2022-12-20 14:21:11,047] [INFO] [timer.py:197:stop] 0/2338, RunningAvgSamplesPerSec=5.866019490246221, CurrSamplesPerSec=5.319441938212023, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1155/5000 [3:58:46<11:56:49, 11.19s/it][2022-12-20 14:21:22,088] [INFO] [logging.py:68:log_dist] [Rank 0] step=1170, skipped=3, lr=[8.52e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:21:22,089] [INFO] [timer.py:197:stop] 0/2340, RunningAvgSamplesPerSec=5.866015352677619, CurrSamplesPerSec=5.314046709923048, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1156/5000 [3:58:57<11:53:52, 11.14s/it][2022-12-20 14:21:33,177] [INFO] [timer.py:197:stop] 0/2342, RunningAvgSamplesPerSec=5.865990808005341, CurrSamplesPerSec=5.281120053946874, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1157/5000 [3:59:08<11:52:37, 11.13s/it][2022-12-20 14:21:44,180] [INFO] [timer.py:197:stop] 0/2344, RunningAvgSamplesPerSec=5.866004904138269, CurrSamplesPerSec=5.350195730709113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1158/5000 [3:59:19<11:50:04, 11.09s/it][2022-12-20 14:21:55,133] [INFO] [timer.py:197:stop] 0/2346, RunningAvgSamplesPerSec=5.866042003931394, CurrSamplesPerSec=5.366760127392136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1159/5000 [3:59:30<11:47:16, 11.05s/it][2022-12-20 14:22:06,032] [INFO] [timer.py:197:stop] 0/2348, RunningAvgSamplesPerSec=5.86610316435128, CurrSamplesPerSec=5.400620739877447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1160/5000 [3:59:41<11:44:14, 11.00s/it][2022-12-20 14:22:17,047] [INFO] [timer.py:197:stop] 0/2350, RunningAvgSamplesPerSec=5.866111642478457, CurrSamplesPerSec=5.337300388167726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1161/5000 [3:59:52<11:44:16, 11.01s/it][2022-12-20 14:22:28,038] [INFO] [timer.py:197:stop] 0/2352, RunningAvgSamplesPerSec=5.866130688903831, CurrSamplesPerSec=5.355272527018808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1162/5000 [4:00:03<11:43:46, 11.00s/it][2022-12-20 14:22:39,079] [INFO] [timer.py:197:stop] 0/2354, RunningAvgSamplesPerSec=5.86612719306718, CurrSamplesPerSec=5.3206898937092735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1163/5000 [4:00:14<11:44:19, 11.01s/it][2022-12-20 14:22:50,039] [INFO] [timer.py:197:stop] 0/2356, RunningAvgSamplesPerSec=5.866161114379853, CurrSamplesPerSec=5.359317024101602, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1164/5000 [4:00:25<11:43:07, 11.00s/it][2022-12-20 14:23:01,028] [INFO] [timer.py:197:stop] 0/2358, RunningAvgSamplesPerSec=5.866181281292867, CurrSamplesPerSec=5.348691748106546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1165/5000 [4:00:36<11:42:45, 10.99s/it][2022-12-20 14:23:12,042] [INFO] [logging.py:68:log_dist] [Rank 0] step=1180, skipped=3, lr=[8.497777777777777e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:23:12,043] [INFO] [timer.py:197:stop] 0/2360, RunningAvgSamplesPerSec=5.8661896824405035, CurrSamplesPerSec=5.327635818212209, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1166/5000 [4:00:47<11:42:58, 11.00s/it][2022-12-20 14:23:23,018] [INFO] [timer.py:197:stop] 0/2362, RunningAvgSamplesPerSec=5.866217316938825, CurrSamplesPerSec=5.349279467119528, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1167/5000 [4:00:58<11:42:16, 10.99s/it][2022-12-20 14:23:34,023] [INFO] [timer.py:197:stop] 0/2364, RunningAvgSamplesPerSec=5.866230198120765, CurrSamplesPerSec=5.331378382058555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1168/5000 [4:01:09<11:42:19, 11.00s/it][2022-12-20 14:23:45,074] [INFO] [timer.py:197:stop] 0/2366, RunningAvgSamplesPerSec=5.866221819988924, CurrSamplesPerSec=5.306074308673665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1169/5000 [4:01:20<11:43:11, 11.01s/it][2022-12-20 14:23:56,091] [INFO] [timer.py:197:stop] 0/2368, RunningAvgSamplesPerSec=5.866228810395212, CurrSamplesPerSec=5.322969693102323, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1170/5000 [4:01:31<11:43:04, 11.01s/it][2022-12-20 14:24:07,126] [INFO] [timer.py:197:stop] 0/2370, RunningAvgSamplesPerSec=5.866234434708928, CurrSamplesPerSec=5.301017878224371, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1171/5000 [4:01:42<11:43:18, 11.02s/it][2022-12-20 14:24:18,150] [INFO] [timer.py:197:stop] 0/2372, RunningAvgSamplesPerSec=5.866248521951817, CurrSamplesPerSec=5.341322450572729, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1172/5000 [4:01:53<11:43:10, 11.02s/it][2022-12-20 14:24:29,202] [INFO] [timer.py:197:stop] 0/2374, RunningAvgSamplesPerSec=5.866240058534632, CurrSamplesPerSec=5.296656163808751, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1173/5000 [4:02:04<11:43:34, 11.03s/it][2022-12-20 14:24:40,175] [INFO] [timer.py:197:stop] 0/2376, RunningAvgSamplesPerSec=5.8662677316419645, CurrSamplesPerSec=5.33105926032227, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 23%|██▎ | 1174/5000 [4:02:15<11:42:16, 11.01s/it][2022-12-20 14:24:51,196] [INFO] [timer.py:197:stop] 0/2378, RunningAvgSamplesPerSec=5.866273388393853, CurrSamplesPerSec=5.320795779667754, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1175/5000 [4:02:26<11:42:15, 11.02s/it] {'loss': 0.0015, 'learning_rate': 8.477777777777778e-06, 'epoch': 28.65} + 24%|██▎ | 1175/5000 [4:02:26<11:42:15, 11.02s/it][2022-12-20 14:25:02,235] [INFO] [logging.py:68:log_dist] [Rank 0] step=1190, skipped=3, lr=[8.475555555555555e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:25:02,236] [INFO] [timer.py:197:stop] 0/2380, RunningAvgSamplesPerSec=5.866270932574624, CurrSamplesPerSec=5.328444410108352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1176/5000 [4:02:37<11:42:31, 11.02s/it][2022-12-20 14:25:13,250] [INFO] [timer.py:197:stop] 0/2382, RunningAvgSamplesPerSec=5.866279706920595, CurrSamplesPerSec=5.3410852407781055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1177/5000 [4:02:48<11:42:09, 11.02s/it][2022-12-20 14:25:24,316] [INFO] [timer.py:197:stop] 0/2384, RunningAvgSamplesPerSec=5.866264919998554, CurrSamplesPerSec=5.294317813314271, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1178/5000 [4:02:59<11:42:52, 11.03s/it][2022-12-20 14:25:35,379] [INFO] [timer.py:197:stop] 0/2386, RunningAvgSamplesPerSec=5.866259420389441, CurrSamplesPerSec=5.277050551398603, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1179/5000 [4:03:10<11:43:14, 11.04s/it][2022-12-20 14:25:46,448] [INFO] [timer.py:197:stop] 0/2388, RunningAvgSamplesPerSec=5.866258168720059, CurrSamplesPerSec=5.319883653419607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1180/5000 [4:03:22<11:43:34, 11.05s/it][2022-12-20 14:25:57,559] [INFO] [timer.py:197:stop] 0/2390, RunningAvgSamplesPerSec=5.866238173802574, CurrSamplesPerSec=5.3043575034651615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1181/5000 [4:03:33<11:44:32, 11.07s/it][2022-12-20 14:26:08,599] [INFO] [timer.py:197:stop] 0/2392, RunningAvgSamplesPerSec=5.866243090994037, CurrSamplesPerSec=5.324683571295812, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1182/5000 [4:03:44<11:43:46, 11.06s/it][2022-12-20 14:26:19,559] [INFO] [timer.py:197:stop] 0/2394, RunningAvgSamplesPerSec=5.866277685655989, CurrSamplesPerSec=5.340624909849068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1183/5000 [4:03:55<11:41:42, 11.03s/it][2022-12-20 14:26:30,558] [INFO] [timer.py:197:stop] 0/2396, RunningAvgSamplesPerSec=5.866301360689517, CurrSamplesPerSec=5.354256909749205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1184/5000 [4:04:06<11:40:53, 11.02s/it][2022-12-20 14:26:41,562] [INFO] [timer.py:197:stop] 0/2398, RunningAvgSamplesPerSec=5.866314006200123, CurrSamplesPerSec=5.344340211278222, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1185/5000 [4:04:17<11:40:24, 11.02s/it][2022-12-20 14:26:52,577] [INFO] [logging.py:68:log_dist] [Rank 0] step=1200, skipped=3, lr=[8.453333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:26:52,578] [INFO] [timer.py:197:stop] 0/2400, RunningAvgSamplesPerSec=5.866321830021514, CurrSamplesPerSec=5.339912254788865, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1186/5000 [4:04:28<11:40:14, 11.02s/it][2022-12-20 14:27:03,578] [INFO] [timer.py:197:stop] 0/2402, RunningAvgSamplesPerSec=5.866342924130218, CurrSamplesPerSec=5.363667915799298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▎ | 1187/5000 [4:04:39<11:39:45, 11.01s/it][2022-12-20 14:27:14,670] [INFO] [timer.py:197:stop] 0/2404, RunningAvgSamplesPerSec=5.866323472040068, CurrSamplesPerSec=5.322167404705935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1188/5000 [4:04:50<11:41:06, 11.04s/it][2022-12-20 14:27:25,680] [INFO] [timer.py:197:stop] 0/2406, RunningAvgSamplesPerSec=5.866333335281994, CurrSamplesPerSec=5.3121636873863425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1189/5000 [4:05:01<11:40:26, 11.03s/it][2022-12-20 14:27:35,805] [INFO] [timer.py:197:stop] 0/2408, RunningAvgSamplesPerSec=5.866735126668492, CurrSamplesPerSec=5.319866362948497, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1190/5000 [4:05:16<12:57:50, 12.25s/it][2022-12-20 14:27:46,938] [INFO] [timer.py:197:stop] 0/2410, RunningAvgSamplesPerSec=5.866690003407645, CurrSamplesPerSec=5.250663287908185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1191/5000 [4:05:27<12:35:58, 11.91s/it][2022-12-20 14:27:57,975] [INFO] [timer.py:197:stop] 0/2412, RunningAvgSamplesPerSec=5.86668728388693, CurrSamplesPerSec=5.315811269425496, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1192/5000 [4:05:38<12:18:28, 11.64s/it][2022-12-20 14:28:08,984] [INFO] [timer.py:197:stop] 0/2414, RunningAvgSamplesPerSec=5.866697793970734, CurrSamplesPerSec=5.308342857359818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1193/5000 [4:05:49<12:07:30, 11.47s/it][2022-12-20 14:28:20,114] [INFO] [timer.py:197:stop] 0/2416, RunningAvgSamplesPerSec=5.866654404791631, CurrSamplesPerSec=5.256519641806495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1194/5000 [4:06:00<11:59:38, 11.34s/it][2022-12-20 14:28:31,105] [INFO] [timer.py:197:stop] 0/2418, RunningAvgSamplesPerSec=5.866673234860263, CurrSamplesPerSec=5.319040557558386, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1195/5000 [4:06:11<11:52:38, 11.24s/it][2022-12-20 14:28:42,060] [INFO] [logging.py:68:log_dist] [Rank 0] step=1210, skipped=3, lr=[8.431111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:28:42,062] [INFO] [timer.py:197:stop] 0/2420, RunningAvgSamplesPerSec=5.866706810744663, CurrSamplesPerSec=5.345323543310957, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1196/5000 [4:06:22<11:47:59, 11.17s/it][2022-12-20 14:28:53,431] [INFO] [timer.py:197:stop] 0/2422, RunningAvgSamplesPerSec=5.866557098315779, CurrSamplesPerSec=5.036977225099463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1197/5000 [4:06:34<11:52:04, 11.23s/it][2022-12-20 14:29:04,476] [INFO] [timer.py:197:stop] 0/2424, RunningAvgSamplesPerSec=5.866551769893157, CurrSamplesPerSec=5.3283517575204, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1198/5000 [4:06:45<11:47:47, 11.17s/it][2022-12-20 14:29:15,513] [INFO] [timer.py:197:stop] 0/2426, RunningAvgSamplesPerSec=5.866549631747129, CurrSamplesPerSec=5.311750791324563, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1199/5000 [4:06:56<11:45:03, 11.13s/it][2022-12-20 14:29:26,854] [INFO] [timer.py:197:stop] 0/2428, RunningAvgSamplesPerSec=5.866413203666896, CurrSamplesPerSec=5.056082632148807, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1200/5000 [4:07:07<11:49:20, 11.20s/it] {'loss': 0.0012, 'learning_rate': 8.422222222222223e-06, 'epoch': 29.27} + 24%|██▍ | 1200/5000 [4:07:07<11:49:20, 11.20s/it][2022-12-20 14:29:37,931] [INFO] [timer.py:197:stop] 0/2430, RunningAvgSamplesPerSec=5.866393851256666, CurrSamplesPerSec=5.296994802408528, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1201/5000 [4:07:18<11:46:32, 11.16s/it][2022-12-20 14:29:48,985] [INFO] [timer.py:197:stop] 0/2432, RunningAvgSamplesPerSec=5.86638361714413, CurrSamplesPerSec=5.302136763312669, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1202/5000 [4:07:29<11:43:39, 11.12s/it][2022-12-20 14:30:00,129] [INFO] [timer.py:197:stop] 0/2434, RunningAvgSamplesPerSec=5.866334355478099, CurrSamplesPerSec=5.194048236660044, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1203/5000 [4:07:40<11:44:08, 11.13s/it][2022-12-20 14:30:11,139] [INFO] [timer.py:197:stop] 0/2436, RunningAvgSamplesPerSec=5.866343779590564, CurrSamplesPerSec=5.315572109716659, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1204/5000 [4:07:51<11:42:06, 11.10s/it][2022-12-20 14:30:22,192] [INFO] [timer.py:197:stop] 0/2438, RunningAvgSamplesPerSec=5.866340029215908, CurrSamplesPerSec=5.295177531307941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1205/5000 [4:08:02<11:41:15, 11.09s/it][2022-12-20 14:30:33,268] [INFO] [logging.py:68:log_dist] [Rank 0] step=1220, skipped=3, lr=[8.40888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:30:33,270] [INFO] [timer.py:197:stop] 0/2440, RunningAvgSamplesPerSec=5.866319653101191, CurrSamplesPerSec=5.281954496344795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1206/5000 [4:08:13<11:40:44, 11.08s/it][2022-12-20 14:30:44,300] [INFO] [timer.py:197:stop] 0/2442, RunningAvgSamplesPerSec=5.866320287619543, CurrSamplesPerSec=5.3164258991089115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1207/5000 [4:08:24<11:39:48, 11.07s/it][2022-12-20 14:30:55,313] [INFO] [timer.py:197:stop] 0/2444, RunningAvgSamplesPerSec=5.8663287960797925, CurrSamplesPerSec=5.342777840893666, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1208/5000 [4:08:35<11:37:38, 11.04s/it][2022-12-20 14:31:06,448] [INFO] [timer.py:197:stop] 0/2446, RunningAvgSamplesPerSec=5.866283189551714, CurrSamplesPerSec=5.194655537071338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1209/5000 [4:08:46<11:40:02, 11.08s/it][2022-12-20 14:31:17,498] [INFO] [timer.py:197:stop] 0/2448, RunningAvgSamplesPerSec=5.8662757146520965, CurrSamplesPerSec=5.303761379649366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1210/5000 [4:08:58<11:39:26, 11.07s/it][2022-12-20 14:31:28,490] [INFO] [timer.py:197:stop] 0/2450, RunningAvgSamplesPerSec=5.866293484110395, CurrSamplesPerSec=5.3618690908382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1211/5000 [4:09:09<11:37:08, 11.04s/it][2022-12-20 14:31:39,616] [INFO] [timer.py:197:stop] 0/2452, RunningAvgSamplesPerSec=5.866252045631158, CurrSamplesPerSec=5.2163481454814455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1212/5000 [4:09:20<11:38:55, 11.07s/it][2022-12-20 14:31:50,605] [INFO] [timer.py:197:stop] 0/2454, RunningAvgSamplesPerSec=5.866271718732502, CurrSamplesPerSec=5.353360606871251, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1213/5000 [4:09:31<11:37:24, 11.05s/it][2022-12-20 14:32:01,654] [INFO] [timer.py:197:stop] 0/2456, RunningAvgSamplesPerSec=5.8662642275321, CurrSamplesPerSec=5.308947151348811, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1214/5000 [4:09:42<11:36:29, 11.04s/it][2022-12-20 14:32:12,653] [INFO] [timer.py:197:stop] 0/2458, RunningAvgSamplesPerSec=5.866279437545395, CurrSamplesPerSec=5.32057051340014, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1215/5000 [4:09:53<11:35:22, 11.02s/it][2022-12-20 14:32:23,644] [INFO] [logging.py:68:log_dist] [Rank 0] step=1230, skipped=3, lr=[8.386666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:32:23,645] [INFO] [timer.py:197:stop] 0/2460, RunningAvgSamplesPerSec=5.866296952599812, CurrSamplesPerSec=5.316530141259214, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1216/5000 [4:10:04<11:34:38, 11.01s/it][2022-12-20 14:32:34,607] [INFO] [timer.py:197:stop] 0/2462, RunningAvgSamplesPerSec=5.866327738215617, CurrSamplesPerSec=5.344379154561608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1217/5000 [4:10:15<11:33:44, 11.00s/it][2022-12-20 14:32:45,643] [INFO] [timer.py:197:stop] 0/2464, RunningAvgSamplesPerSec=5.866326308963281, CurrSamplesPerSec=5.291882626406358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1218/5000 [4:10:26<11:34:53, 11.02s/it][2022-12-20 14:32:56,692] [INFO] [timer.py:197:stop] 0/2466, RunningAvgSamplesPerSec=5.8663199379621895, CurrSamplesPerSec=5.314945680668286, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1219/5000 [4:10:37<11:34:01, 11.01s/it][2022-12-20 14:33:07,685] [INFO] [timer.py:197:stop] 0/2468, RunningAvgSamplesPerSec=5.8663375598948795, CurrSamplesPerSec=5.310083462141238, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1220/5000 [4:10:48<11:34:14, 11.02s/it][2022-12-20 14:33:18,828] [INFO] [timer.py:197:stop] 0/2470, RunningAvgSamplesPerSec=5.8662889443349036, CurrSamplesPerSec=5.215359809380851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1221/5000 [4:10:59<11:36:04, 11.05s/it][2022-12-20 14:33:29,812] [INFO] [timer.py:197:stop] 0/2472, RunningAvgSamplesPerSec=5.866310152878439, CurrSamplesPerSec=5.3391466918986925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1222/5000 [4:11:10<11:34:11, 11.02s/it][2022-12-20 14:33:40,755] [INFO] [timer.py:197:stop] 0/2474, RunningAvgSamplesPerSec=5.866349132494918, CurrSamplesPerSec=5.3568706501380206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1223/5000 [4:11:21<11:33:22, 11.01s/it][2022-12-20 14:33:51,773] [INFO] [timer.py:197:stop] 0/2476, RunningAvgSamplesPerSec=5.866355595508482, CurrSamplesPerSec=5.333332273695414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1224/5000 [4:11:32<11:32:46, 11.01s/it][2022-12-20 14:34:02,780] [INFO] [timer.py:197:stop] 0/2478, RunningAvgSamplesPerSec=5.866366271959182, CurrSamplesPerSec=5.319785394432974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 24%|██▍ | 1225/5000 [4:11:43<11:33:01, 11.01s/it] {'loss': 0.0011, 'learning_rate': 8.366666666666667e-06, 'epoch': 29.87} + 24%|██▍ | 1225/5000 [4:11:43<11:33:01, 11.01s/it][2022-12-20 14:34:13,832] [INFO] [logging.py:68:log_dist] [Rank 0] step=1240, skipped=3, lr=[8.364444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:34:13,833] [INFO] [timer.py:197:stop] 0/2480, RunningAvgSamplesPerSec=5.8663624963996766, CurrSamplesPerSec=5.3107463596516356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1226/5000 [4:11:54<11:33:38, 11.03s/it][2022-12-20 14:34:24,879] [INFO] [timer.py:197:stop] 0/2482, RunningAvgSamplesPerSec=5.866356473048373, CurrSamplesPerSec=5.309833684110582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1227/5000 [4:12:05<11:34:38, 11.05s/it][2022-12-20 14:34:35,965] [INFO] [timer.py:197:stop] 0/2484, RunningAvgSamplesPerSec=5.866338980521272, CurrSamplesPerSec=5.313235540486574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1228/5000 [4:12:16<11:34:07, 11.04s/it][2022-12-20 14:34:47,008] [INFO] [timer.py:197:stop] 0/2486, RunningAvgSamplesPerSec=5.866342291838639, CurrSamplesPerSec=5.31729133814346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1229/5000 [4:12:27<11:34:16, 11.05s/it][2022-12-20 14:34:58,150] [INFO] [timer.py:197:stop] 0/2488, RunningAvgSamplesPerSec=5.86629436505756, CurrSamplesPerSec=5.228989271702679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1230/5000 [4:12:38<11:36:06, 11.08s/it][2022-12-20 14:35:08,289] [INFO] [timer.py:197:stop] 0/2490, RunningAvgSamplesPerSec=5.866683051449626, CurrSamplesPerSec=6.255712617522649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 14:35:19,365] [INFO] [timer.py:197:stop] 0/2492, RunningAvgSamplesPerSec=5.866662220221625, CurrSamplesPerSec=5.290438771318746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1231/5000 [4:12:54<13:13:15, 12.63s/it][2022-12-20 14:35:30,406] [INFO] [timer.py:197:stop] 0/2494, RunningAvgSamplesPerSec=5.866658069700753, CurrSamplesPerSec=5.317211711881955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1232/5000 [4:13:06<12:43:09, 12.15s/it][2022-12-20 14:35:41,401] [INFO] [timer.py:197:stop] 0/2496, RunningAvgSamplesPerSec=5.866674264185871, CurrSamplesPerSec=5.3262005007554505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1233/5000 [4:13:16<12:21:09, 11.80s/it][2022-12-20 14:35:52,400] [INFO] [timer.py:197:stop] 0/2498, RunningAvgSamplesPerSec=5.866688874988825, CurrSamplesPerSec=5.356402891484871, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1234/5000 [4:13:27<12:05:46, 11.56s/it][2022-12-20 14:36:03,580] [INFO] [logging.py:68:log_dist] [Rank 0] step=1250, skipped=3, lr=[8.342222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:36:03,582] [INFO] [timer.py:197:stop] 0/2500, RunningAvgSamplesPerSec=5.866624530729723, CurrSamplesPerSec=5.182571068299581, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1235/5000 [4:13:39<11:58:24, 11.45s/it][2022-12-20 14:36:14,516] [INFO] [timer.py:197:stop] 0/2502, RunningAvgSamplesPerSec=5.866667110443997, CurrSamplesPerSec=5.365425908578023, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1236/5000 [4:13:50<11:48:32, 11.29s/it][2022-12-20 14:36:25,557] [INFO] [timer.py:197:stop] 0/2504, RunningAvgSamplesPerSec=5.866662660511366, CurrSamplesPerSec=5.325509439508219, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1237/5000 [4:14:01<11:43:35, 11.22s/it][2022-12-20 14:36:36,824] [INFO] [timer.py:197:stop] 0/2506, RunningAvgSamplesPerSec=5.866565210964884, CurrSamplesPerSec=5.142926618483068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1238/5000 [4:14:12<11:44:27, 11.24s/it][2022-12-20 14:36:47,913] [INFO] [timer.py:197:stop] 0/2508, RunningAvgSamplesPerSec=5.866572477495271, CurrSamplesPerSec=5.318034210175345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1239/5000 [4:14:23<11:41:22, 11.19s/it][2022-12-20 14:36:58,900] [INFO] [timer.py:197:stop] 0/2510, RunningAvgSamplesPerSec=5.866600153879609, CurrSamplesPerSec=5.338725555983464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1240/5000 [4:14:34<11:37:22, 11.13s/it][2022-12-20 14:37:10,314] [INFO] [timer.py:197:stop] 0/2512, RunningAvgSamplesPerSec=5.86643679765559, CurrSamplesPerSec=4.98516155769956, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1241/5000 [4:14:45<11:42:33, 11.21s/it][2022-12-20 14:37:21,316] [INFO] [timer.py:197:stop] 0/2514, RunningAvgSamplesPerSec=5.866449943668701, CurrSamplesPerSec=5.3272116329982016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1242/5000 [4:14:56<11:38:23, 11.15s/it][2022-12-20 14:37:32,338] [INFO] [timer.py:197:stop] 0/2516, RunningAvgSamplesPerSec=5.866461011796087, CurrSamplesPerSec=5.310245021590694, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1243/5000 [4:15:07<11:35:47, 11.11s/it][2022-12-20 14:37:43,454] [INFO] [timer.py:197:stop] 0/2518, RunningAvgSamplesPerSec=5.866425892772194, CurrSamplesPerSec=5.212907613883006, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1244/5000 [4:15:19<11:35:43, 11.11s/it][2022-12-20 14:37:54,473] [INFO] [logging.py:68:log_dist] [Rank 0] step=1260, skipped=3, lr=[8.32e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:37:54,474] [INFO] [timer.py:197:stop] 0/2520, RunningAvgSamplesPerSec=5.86644255249078, CurrSamplesPerSec=5.332693176822377, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1245/5000 [4:15:30<11:33:44, 11.09s/it][2022-12-20 14:38:05,517] [INFO] [timer.py:197:stop] 0/2522, RunningAvgSamplesPerSec=5.866444891204957, CurrSamplesPerSec=5.3370699022057435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1246/5000 [4:15:41<11:32:47, 11.07s/it][2022-12-20 14:38:16,534] [INFO] [timer.py:197:stop] 0/2524, RunningAvgSamplesPerSec=5.866458659194212, CurrSamplesPerSec=5.328032998001378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1247/5000 [4:15:52<11:31:31, 11.06s/it][2022-12-20 14:38:27,613] [INFO] [timer.py:197:stop] 0/2526, RunningAvgSamplesPerSec=5.866439026504298, CurrSamplesPerSec=5.311446626378874, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1248/5000 [4:16:03<11:31:46, 11.06s/it][2022-12-20 14:38:38,668] [INFO] [timer.py:197:stop] 0/2528, RunningAvgSamplesPerSec=5.866428974068871, CurrSamplesPerSec=5.300253170155539, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▍ | 1249/5000 [4:16:14<11:31:27, 11.06s/it][2022-12-20 14:38:49,702] [INFO] [timer.py:197:stop] 0/2530, RunningAvgSamplesPerSec=5.866428517493221, CurrSamplesPerSec=5.314769103190194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1250/5000 [4:16:25<11:30:46, 11.05s/it] {'loss': 0.0009, 'learning_rate': 8.30888888888889e-06, 'epoch': 30.48} + 25%|██▌ | 1250/5000 [4:16:25<11:30:46, 11.05s/it][2022-12-20 14:39:01,184] [INFO] [timer.py:197:stop] 0/2532, RunningAvgSamplesPerSec=5.866420303426205, CurrSamplesPerSec=5.316974111182338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1251/5000 [4:16:36<11:38:38, 11.18s/it][2022-12-20 14:39:12,230] [INFO] [timer.py:197:stop] 0/2534, RunningAvgSamplesPerSec=5.866414357189164, CurrSamplesPerSec=5.304399430014414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1252/5000 [4:16:47<11:35:55, 11.14s/it][2022-12-20 14:39:23,285] [INFO] [timer.py:197:stop] 0/2536, RunningAvgSamplesPerSec=5.866405074679397, CurrSamplesPerSec=5.309512095365863, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1253/5000 [4:16:58<11:34:07, 11.11s/it][2022-12-20 14:39:34,592] [INFO] [timer.py:197:stop] 0/2538, RunningAvgSamplesPerSec=5.866398287009394, CurrSamplesPerSec=5.310256366836915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1254/5000 [4:17:10<11:37:32, 11.17s/it][2022-12-20 14:39:45,600] [INFO] [logging.py:68:log_dist] [Rank 0] step=1270, skipped=3, lr=[8.297777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:39:45,602] [INFO] [timer.py:197:stop] 0/2540, RunningAvgSamplesPerSec=5.866407689182847, CurrSamplesPerSec=5.316591003717835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1255/5000 [4:17:21<11:34:18, 11.12s/it][2022-12-20 14:39:56,650] [INFO] [timer.py:197:stop] 0/2542, RunningAvgSamplesPerSec=5.866400554596771, CurrSamplesPerSec=5.317751658378555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1256/5000 [4:17:32<11:32:42, 11.10s/it][2022-12-20 14:40:07,743] [INFO] [timer.py:197:stop] 0/2544, RunningAvgSamplesPerSec=5.866374632998121, CurrSamplesPerSec=5.325266025782945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1257/5000 [4:17:43<11:32:22, 11.10s/it][2022-12-20 14:40:18,749] [INFO] [timer.py:197:stop] 0/2546, RunningAvgSamplesPerSec=5.866385946310674, CurrSamplesPerSec=5.3002286813726345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1258/5000 [4:17:54<11:30:26, 11.07s/it][2022-12-20 14:40:29,709] [INFO] [timer.py:197:stop] 0/2548, RunningAvgSamplesPerSec=5.866416185594421, CurrSamplesPerSec=5.34435063865912, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1259/5000 [4:18:05<11:28:11, 11.04s/it][2022-12-20 14:40:40,656] [INFO] [timer.py:197:stop] 0/2550, RunningAvgSamplesPerSec=5.866452290321615, CurrSamplesPerSec=5.360355970084697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1260/5000 [4:18:16<11:26:18, 11.01s/it][2022-12-20 14:40:51,640] [INFO] [timer.py:197:stop] 0/2552, RunningAvgSamplesPerSec=5.86647274191724, CurrSamplesPerSec=5.34167554108762, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1261/5000 [4:18:27<11:25:38, 11.00s/it][2022-12-20 14:41:02,695] [INFO] [timer.py:197:stop] 0/2554, RunningAvgSamplesPerSec=5.866463119436804, CurrSamplesPerSec=5.319463442456576, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1262/5000 [4:18:38<11:26:27, 11.02s/it][2022-12-20 14:41:13,733] [INFO] [timer.py:197:stop] 0/2556, RunningAvgSamplesPerSec=5.866464812760032, CurrSamplesPerSec=5.3356748896233634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1263/5000 [4:18:49<11:26:37, 11.02s/it][2022-12-20 14:41:24,798] [INFO] [timer.py:197:stop] 0/2558, RunningAvgSamplesPerSec=5.86645108537351, CurrSamplesPerSec=5.304089608463672, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1264/5000 [4:19:00<11:27:12, 11.04s/it][2022-12-20 14:41:35,884] [INFO] [logging.py:68:log_dist] [Rank 0] step=1280, skipped=3, lr=[8.275555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:41:35,885] [INFO] [timer.py:197:stop] 0/2560, RunningAvgSamplesPerSec=5.866435162823652, CurrSamplesPerSec=5.304401735993852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1265/5000 [4:19:11<11:27:57, 11.05s/it][2022-12-20 14:41:46,916] [INFO] [timer.py:197:stop] 0/2562, RunningAvgSamplesPerSec=5.866438772156116, CurrSamplesPerSec=5.352324370543916, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1266/5000 [4:19:22<11:27:22, 11.05s/it][2022-12-20 14:41:57,907] [INFO] [timer.py:197:stop] 0/2564, RunningAvgSamplesPerSec=5.866456043934021, CurrSamplesPerSec=5.3387862905984536, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1267/5000 [4:19:33<11:26:11, 11.03s/it][2022-12-20 14:42:08,976] [INFO] [timer.py:197:stop] 0/2566, RunningAvgSamplesPerSec=5.866445757930909, CurrSamplesPerSec=5.311216055988494, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1268/5000 [4:19:44<11:26:45, 11.04s/it][2022-12-20 14:42:20,010] [INFO] [timer.py:197:stop] 0/2568, RunningAvgSamplesPerSec=5.86644502153176, CurrSamplesPerSec=5.296935223960517, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1269/5000 [4:19:55<11:26:26, 11.04s/it][2022-12-20 14:42:30,995] [INFO] [timer.py:197:stop] 0/2570, RunningAvgSamplesPerSec=5.8664646152671525, CurrSamplesPerSec=5.337238201688508, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1270/5000 [4:20:06<11:25:15, 11.02s/it][2022-12-20 14:42:42,041] [INFO] [timer.py:197:stop] 0/2572, RunningAvgSamplesPerSec=5.866465979708828, CurrSamplesPerSec=5.329003355478731, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1271/5000 [4:20:17<11:25:29, 11.03s/it][2022-12-20 14:42:52,214] [INFO] [timer.py:197:stop] 0/2574, RunningAvgSamplesPerSec=5.866821180582034, CurrSamplesPerSec=5.272568266615608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1272/5000 [4:20:32<12:40:56, 12.25s/it][2022-12-20 14:43:03,210] [INFO] [timer.py:197:stop] 0/2576, RunningAvgSamplesPerSec=5.866835855180823, CurrSamplesPerSec=5.317548138551296, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1273/5000 [4:20:43<12:18:13, 11.88s/it][2022-12-20 14:43:14,291] [INFO] [timer.py:197:stop] 0/2578, RunningAvgSamplesPerSec=5.866815882840631, CurrSamplesPerSec=5.282412252696115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 25%|██▌ | 1274/5000 [4:20:54<12:03:05, 11.64s/it][2022-12-20 14:43:25,457] [INFO] [logging.py:68:log_dist] [Rank 0] step=1290, skipped=3, lr=[8.253333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:43:25,458] [INFO] [timer.py:197:stop] 0/2580, RunningAvgSamplesPerSec=5.866814189672369, CurrSamplesPerSec=5.321991613220825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1275/5000 [4:21:06<11:54:21, 11.51s/it] {'loss': 0.0011, 'learning_rate': 8.253333333333334e-06, 'epoch': 31.1} + 26%|██▌ | 1275/5000 [4:21:06<11:54:21, 11.51s/it][2022-12-20 14:43:36,493] [INFO] [timer.py:197:stop] 0/2582, RunningAvgSamplesPerSec=5.866813256736886, CurrSamplesPerSec=5.33998958799783, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1276/5000 [4:21:17<11:44:33, 11.35s/it][2022-12-20 14:43:47,534] [INFO] [timer.py:197:stop] 0/2584, RunningAvgSamplesPerSec=5.866809048074333, CurrSamplesPerSec=5.293974714236095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1277/5000 [4:21:28<11:39:37, 11.28s/it][2022-12-20 14:43:58,615] [INFO] [timer.py:197:stop] 0/2586, RunningAvgSamplesPerSec=5.866788005198868, CurrSamplesPerSec=5.307037103230776, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1278/5000 [4:21:39<11:35:32, 11.21s/it][2022-12-20 14:44:09,685] [INFO] [timer.py:197:stop] 0/2588, RunningAvgSamplesPerSec=5.86677112614875, CurrSamplesPerSec=5.303040927825822, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1279/5000 [4:21:50<11:32:35, 11.17s/it][2022-12-20 14:44:20,691] [INFO] [timer.py:197:stop] 0/2590, RunningAvgSamplesPerSec=5.866781356573359, CurrSamplesPerSec=5.354594836378892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1280/5000 [4:22:01<11:29:14, 11.12s/it][2022-12-20 14:44:31,852] [INFO] [timer.py:197:stop] 0/2592, RunningAvgSamplesPerSec=5.866727194956834, CurrSamplesPerSec=5.213148153521151, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1281/5000 [4:22:12<11:29:47, 11.13s/it][2022-12-20 14:44:42,871] [INFO] [timer.py:197:stop] 0/2594, RunningAvgSamplesPerSec=5.866731793977151, CurrSamplesPerSec=5.3322245466290905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1282/5000 [4:22:23<11:27:39, 11.10s/it][2022-12-20 14:44:53,909] [INFO] [timer.py:197:stop] 0/2596, RunningAvgSamplesPerSec=5.866732904322087, CurrSamplesPerSec=5.319831571487487, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1283/5000 [4:22:34<11:26:25, 11.08s/it][2022-12-20 14:45:05,036] [INFO] [timer.py:197:stop] 0/2598, RunningAvgSamplesPerSec=5.866692714824776, CurrSamplesPerSec=5.244622402872517, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1284/5000 [4:22:45<11:26:30, 11.08s/it][2022-12-20 14:45:16,066] [INFO] [logging.py:68:log_dist] [Rank 0] step=1300, skipped=3, lr=[8.231111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:45:16,068] [INFO] [timer.py:197:stop] 0/2600, RunningAvgSamplesPerSec=5.8666919755742715, CurrSamplesPerSec=5.298228900231777, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1285/5000 [4:22:56<11:26:13, 11.08s/it][2022-12-20 14:45:27,151] [INFO] [timer.py:197:stop] 0/2602, RunningAvgSamplesPerSec=5.8666760184045845, CurrSamplesPerSec=5.296026452959583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1286/5000 [4:23:07<11:24:50, 11.06s/it][2022-12-20 14:45:38,236] [INFO] [timer.py:197:stop] 0/2604, RunningAvgSamplesPerSec=5.866653736164829, CurrSamplesPerSec=5.238799875315857, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1287/5000 [4:23:18<11:25:17, 11.07s/it][2022-12-20 14:45:49,220] [INFO] [timer.py:197:stop] 0/2606, RunningAvgSamplesPerSec=5.866673237070117, CurrSamplesPerSec=5.338110855054394, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1288/5000 [4:23:29<11:23:54, 11.05s/it][2022-12-20 14:46:00,247] [INFO] [timer.py:197:stop] 0/2608, RunningAvgSamplesPerSec=5.8666750357810615, CurrSamplesPerSec=5.32300663669837, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1289/5000 [4:23:40<11:23:56, 11.06s/it][2022-12-20 14:46:11,589] [INFO] [timer.py:197:stop] 0/2610, RunningAvgSamplesPerSec=5.86667991060729, CurrSamplesPerSec=5.361265538925755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1290/5000 [4:23:52<11:28:27, 11.13s/it][2022-12-20 14:46:22,556] [INFO] [timer.py:197:stop] 0/2612, RunningAvgSamplesPerSec=5.866706986025993, CurrSamplesPerSec=5.384632123344585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1291/5000 [4:24:03<11:25:10, 11.08s/it][2022-12-20 14:46:33,567] [INFO] [timer.py:197:stop] 0/2614, RunningAvgSamplesPerSec=5.866715708805527, CurrSamplesPerSec=5.344920801683196, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1292/5000 [4:24:14<11:23:37, 11.06s/it][2022-12-20 14:46:44,580] [INFO] [timer.py:197:stop] 0/2616, RunningAvgSamplesPerSec=5.866723491105463, CurrSamplesPerSec=5.342253000998336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1293/5000 [4:24:25<11:22:49, 11.05s/it][2022-12-20 14:46:55,613] [INFO] [timer.py:197:stop] 0/2618, RunningAvgSamplesPerSec=5.8667232221646355, CurrSamplesPerSec=5.339256287327342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1294/5000 [4:24:36<11:20:58, 11.02s/it][2022-12-20 14:47:06,593] [INFO] [logging.py:68:log_dist] [Rank 0] step=1310, skipped=3, lr=[8.20888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:47:06,594] [INFO] [timer.py:197:stop] 0/2620, RunningAvgSamplesPerSec=5.866743855801862, CurrSamplesPerSec=5.321492580630581, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1295/5000 [4:24:47<11:20:08, 11.01s/it][2022-12-20 14:47:17,630] [INFO] [timer.py:197:stop] 0/2622, RunningAvgSamplesPerSec=5.866743818081918, CurrSamplesPerSec=5.281109456249781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1296/5000 [4:24:58<11:20:04, 11.02s/it][2022-12-20 14:47:28,609] [INFO] [timer.py:197:stop] 0/2624, RunningAvgSamplesPerSec=5.866765203046419, CurrSamplesPerSec=5.317507899933124, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1297/5000 [4:25:09<11:20:18, 11.02s/it][2022-12-20 14:47:39,585] [INFO] [timer.py:197:stop] 0/2626, RunningAvgSamplesPerSec=5.8667884842650135, CurrSamplesPerSec=5.374452474892769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1298/5000 [4:25:20<11:19:04, 11.01s/it][2022-12-20 14:47:50,648] [INFO] [timer.py:197:stop] 0/2628, RunningAvgSamplesPerSec=5.866776093047751, CurrSamplesPerSec=5.2886072401084485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1299/5000 [4:25:31<11:20:20, 11.03s/it][2022-12-20 14:48:01,698] [INFO] [timer.py:197:stop] 0/2630, RunningAvgSamplesPerSec=5.866769019160517, CurrSamplesPerSec=5.318527536248883, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1300/5000 [4:25:42<11:20:00, 11.03s/it] {'loss': 0.0008, 'learning_rate': 8.197777777777779e-06, 'epoch': 31.7} + 26%|██▌ | 1300/5000 [4:25:42<11:20:00, 11.03s/it][2022-12-20 14:48:12,719] [INFO] [timer.py:197:stop] 0/2632, RunningAvgSamplesPerSec=5.866774325392706, CurrSamplesPerSec=5.320704869355892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1301/5000 [4:25:53<11:19:45, 11.03s/it][2022-12-20 14:48:23,742] [INFO] [timer.py:197:stop] 0/2634, RunningAvgSamplesPerSec=5.8667787109203395, CurrSamplesPerSec=5.3207086660120915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1302/5000 [4:26:04<11:19:50, 11.03s/it][2022-12-20 14:48:34,803] [INFO] [timer.py:197:stop] 0/2636, RunningAvgSamplesPerSec=5.8667663860237615, CurrSamplesPerSec=5.300755135405741, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1303/5000 [4:26:15<11:19:04, 11.02s/it][2022-12-20 14:48:45,758] [INFO] [timer.py:197:stop] 0/2638, RunningAvgSamplesPerSec=5.8667983476471495, CurrSamplesPerSec=5.34139684866578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1304/5000 [4:26:26<11:18:31, 11.02s/it][2022-12-20 14:48:56,789] [INFO] [logging.py:68:log_dist] [Rank 0] step=1320, skipped=3, lr=[8.186666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:48:56,791] [INFO] [timer.py:197:stop] 0/2640, RunningAvgSamplesPerSec=5.866797300352509, CurrSamplesPerSec=5.3116742738501355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1305/5000 [4:26:37<11:17:58, 11.01s/it][2022-12-20 14:49:07,755] [INFO] [timer.py:197:stop] 0/2642, RunningAvgSamplesPerSec=5.8668251450220525, CurrSamplesPerSec=5.34044662200489, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1306/5000 [4:26:48<11:17:35, 11.01s/it][2022-12-20 14:49:18,769] [INFO] [timer.py:197:stop] 0/2644, RunningAvgSamplesPerSec=5.866831687384138, CurrSamplesPerSec=5.324439810616205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1307/5000 [4:26:59<11:17:50, 11.01s/it][2022-12-20 14:49:29,898] [INFO] [timer.py:197:stop] 0/2646, RunningAvgSamplesPerSec=5.8667925884831496, CurrSamplesPerSec=5.239993500455335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1308/5000 [4:27:10<11:20:52, 11.07s/it][2022-12-20 14:49:40,971] [INFO] [timer.py:197:stop] 0/2648, RunningAvgSamplesPerSec=5.866824998765912, CurrSamplesPerSec=5.338632333161435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1309/5000 [4:27:21<11:18:44, 11.03s/it][2022-12-20 14:49:51,932] [INFO] [timer.py:197:stop] 0/2650, RunningAvgSamplesPerSec=5.866852764414854, CurrSamplesPerSec=5.33621053062638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1310/5000 [4:27:32<11:17:20, 11.01s/it][2022-12-20 14:50:02,886] [INFO] [timer.py:197:stop] 0/2652, RunningAvgSamplesPerSec=5.866884156249551, CurrSamplesPerSec=5.348771041067318, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1311/5000 [4:27:43<11:17:38, 11.02s/it][2022-12-20 14:50:13,923] [INFO] [timer.py:197:stop] 0/2654, RunningAvgSamplesPerSec=5.866881730060096, CurrSamplesPerSec=5.35244582042356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▌ | 1312/5000 [4:27:54<11:16:08, 11.00s/it][2022-12-20 14:50:23,994] [INFO] [timer.py:197:stop] 0/2656, RunningAvgSamplesPerSec=5.867266908406734, CurrSamplesPerSec=6.262873907064042, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 14:50:34,977] [INFO] [timer.py:197:stop] 0/2658, RunningAvgSamplesPerSec=5.867285090561456, CurrSamplesPerSec=5.340587083781192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1313/5000 [4:28:10<12:50:49, 12.54s/it][2022-12-20 14:50:46,028] [INFO] [logging.py:68:log_dist] [Rank 0] step=1330, skipped=3, lr=[8.164444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:50:46,030] [INFO] [timer.py:197:stop] 0/2660, RunningAvgSamplesPerSec=5.867276005265758, CurrSamplesPerSec=5.328606031010614, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1314/5000 [4:28:21<12:23:08, 12.10s/it][2022-12-20 14:50:57,089] [INFO] [timer.py:197:stop] 0/2662, RunningAvgSamplesPerSec=5.867264634360738, CurrSamplesPerSec=5.288355727730571, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1315/5000 [4:28:32<12:03:48, 11.79s/it][2022-12-20 14:51:08,077] [INFO] [timer.py:197:stop] 0/2664, RunningAvgSamplesPerSec=5.8672822486468466, CurrSamplesPerSec=5.299272749167656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1316/5000 [4:28:43<11:48:56, 11.55s/it][2022-12-20 14:51:19,214] [INFO] [timer.py:197:stop] 0/2666, RunningAvgSamplesPerSec=5.867239660302382, CurrSamplesPerSec=5.3288338823649735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1317/5000 [4:28:54<11:41:12, 11.42s/it][2022-12-20 14:51:30,209] [INFO] [timer.py:197:stop] 0/2668, RunningAvgSamplesPerSec=5.867254224246213, CurrSamplesPerSec=5.3139932694613945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1318/5000 [4:29:05<11:33:08, 11.29s/it][2022-12-20 14:51:41,232] [INFO] [timer.py:197:stop] 0/2670, RunningAvgSamplesPerSec=5.867257901401531, CurrSamplesPerSec=5.331558818155095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1319/5000 [4:29:16<11:27:56, 11.21s/it][2022-12-20 14:51:52,267] [INFO] [timer.py:197:stop] 0/2672, RunningAvgSamplesPerSec=5.867263905428857, CurrSamplesPerSec=5.344410862900042, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1320/5000 [4:29:27<11:24:28, 11.16s/it][2022-12-20 14:52:03,271] [INFO] [timer.py:197:stop] 0/2674, RunningAvgSamplesPerSec=5.867274408940137, CurrSamplesPerSec=5.341787366359527, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1321/5000 [4:29:38<11:21:25, 11.11s/it][2022-12-20 14:52:14,367] [INFO] [timer.py:197:stop] 0/2676, RunningAvgSamplesPerSec=5.867248295502644, CurrSamplesPerSec=5.293153795813945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1322/5000 [4:29:49<11:20:54, 11.11s/it][2022-12-20 14:52:25,380] [INFO] [timer.py:197:stop] 0/2678, RunningAvgSamplesPerSec=5.867256981804069, CurrSamplesPerSec=5.351379640283278, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1323/5000 [4:30:00<11:18:59, 11.08s/it][2022-12-20 14:52:36,369] [INFO] [logging.py:68:log_dist] [Rank 0] step=1340, skipped=3, lr=[8.142222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:52:36,371] [INFO] [timer.py:197:stop] 0/2680, RunningAvgSamplesPerSec=5.8672728814667785, CurrSamplesPerSec=5.357554904990007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1324/5000 [4:30:11<11:17:10, 11.05s/it][2022-12-20 14:52:47,384] [INFO] [timer.py:197:stop] 0/2682, RunningAvgSamplesPerSec=5.867279948228556, CurrSamplesPerSec=5.329852787625319, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 26%|██▋ | 1325/5000 [4:30:22<11:16:15, 11.04s/it] {'loss': 0.0006, 'learning_rate': 8.14e-06, 'epoch': 32.31} + 26%|██▋ | 1325/5000 [4:30:22<11:16:15, 11.04s/it][2022-12-20 14:52:58,648] [INFO] [timer.py:197:stop] 0/2684, RunningAvgSamplesPerSec=5.8671873148079445, CurrSamplesPerSec=5.089257000264098, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1326/5000 [4:30:34<11:20:10, 11.11s/it][2022-12-20 14:53:09,716] [INFO] [timer.py:197:stop] 0/2686, RunningAvgSamplesPerSec=5.86717231435457, CurrSamplesPerSec=5.304238226057561, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1327/5000 [4:30:45<11:19:15, 11.10s/it][2022-12-20 14:53:20,715] [INFO] [timer.py:197:stop] 0/2688, RunningAvgSamplesPerSec=5.8671902036318535, CurrSamplesPerSec=5.3446607122320975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1328/5000 [4:30:56<11:17:17, 11.07s/it][2022-12-20 14:53:31,724] [INFO] [timer.py:197:stop] 0/2690, RunningAvgSamplesPerSec=5.867198853654752, CurrSamplesPerSec=5.319216997369674, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1329/5000 [4:31:07<11:16:02, 11.05s/it][2022-12-20 14:53:42,774] [INFO] [timer.py:197:stop] 0/2692, RunningAvgSamplesPerSec=5.8671912004198425, CurrSamplesPerSec=5.315058915308872, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1330/5000 [4:31:18<11:15:52, 11.05s/it][2022-12-20 14:53:53,796] [INFO] [timer.py:197:stop] 0/2694, RunningAvgSamplesPerSec=5.867194948034198, CurrSamplesPerSec=5.326780116065605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1331/5000 [4:31:29<11:15:10, 11.04s/it][2022-12-20 14:54:04,945] [INFO] [timer.py:197:stop] 0/2696, RunningAvgSamplesPerSec=5.867148181595067, CurrSamplesPerSec=5.229207053416619, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1332/5000 [4:31:40<11:16:58, 11.07s/it][2022-12-20 14:54:15,953] [INFO] [timer.py:197:stop] 0/2698, RunningAvgSamplesPerSec=5.867157803444068, CurrSamplesPerSec=5.320486781644827, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1333/5000 [4:31:51<11:15:36, 11.05s/it][2022-12-20 14:54:27,049] [INFO] [logging.py:68:log_dist] [Rank 0] step=1350, skipped=3, lr=[8.120000000000002e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:54:27,050] [INFO] [timer.py:197:stop] 0/2700, RunningAvgSamplesPerSec=5.867140936807669, CurrSamplesPerSec=5.2963597860505915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1334/5000 [4:32:02<11:16:10, 11.07s/it][2022-12-20 14:54:38,159] [INFO] [timer.py:197:stop] 0/2702, RunningAvgSamplesPerSec=5.867110763849676, CurrSamplesPerSec=5.302708638765092, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1335/5000 [4:32:13<11:16:45, 11.08s/it][2022-12-20 14:54:49,144] [INFO] [timer.py:197:stop] 0/2704, RunningAvgSamplesPerSec=5.867128856162109, CurrSamplesPerSec=5.34210862431608, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1336/5000 [4:32:24<11:14:52, 11.05s/it][2022-12-20 14:55:00,186] [INFO] [timer.py:197:stop] 0/2706, RunningAvgSamplesPerSec=5.86713148198925, CurrSamplesPerSec=5.332181543554038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1337/5000 [4:32:35<11:14:29, 11.05s/it][2022-12-20 14:55:11,280] [INFO] [timer.py:197:stop] 0/2708, RunningAvgSamplesPerSec=5.867106555650406, CurrSamplesPerSec=5.200583008320454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1338/5000 [4:32:46<11:15:08, 11.06s/it][2022-12-20 14:55:22,304] [INFO] [timer.py:197:stop] 0/2710, RunningAvgSamplesPerSec=5.867109746674261, CurrSamplesPerSec=5.346511473663643, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1339/5000 [4:32:57<11:14:15, 11.05s/it][2022-12-20 14:55:33,336] [INFO] [timer.py:197:stop] 0/2712, RunningAvgSamplesPerSec=5.867109268480701, CurrSamplesPerSec=5.303738744676808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1340/5000 [4:33:08<11:13:45, 11.05s/it][2022-12-20 14:55:44,573] [INFO] [timer.py:197:stop] 0/2714, RunningAvgSamplesPerSec=5.867028065655926, CurrSamplesPerSec=5.140866532197013, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1341/5000 [4:33:20<11:17:04, 11.10s/it][2022-12-20 14:55:55,571] [INFO] [timer.py:197:stop] 0/2716, RunningAvgSamplesPerSec=5.867041013729103, CurrSamplesPerSec=5.32446325629723, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1342/5000 [4:33:31<11:14:58, 11.07s/it][2022-12-20 14:56:06,567] [INFO] [timer.py:197:stop] 0/2718, RunningAvgSamplesPerSec=5.867055199909407, CurrSamplesPerSec=5.319817865885367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1343/5000 [4:33:42<11:13:25, 11.05s/it][2022-12-20 14:56:17,623] [INFO] [logging.py:68:log_dist] [Rank 0] step=1360, skipped=3, lr=[8.09777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:56:17,625] [INFO] [timer.py:197:stop] 0/2720, RunningAvgSamplesPerSec=5.867044344601528, CurrSamplesPerSec=5.28937692906708, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1344/5000 [4:33:53<11:13:23, 11.05s/it][2022-12-20 14:56:28,670] [INFO] [timer.py:197:stop] 0/2722, RunningAvgSamplesPerSec=5.867039050593108, CurrSamplesPerSec=5.329665694614278, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1345/5000 [4:34:04<11:13:05, 11.05s/it][2022-12-20 14:56:39,681] [INFO] [timer.py:197:stop] 0/2724, RunningAvgSamplesPerSec=5.8670470465713445, CurrSamplesPerSec=5.314920003595611, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1346/5000 [4:34:15<11:12:13, 11.04s/it][2022-12-20 14:56:50,801] [INFO] [timer.py:197:stop] 0/2726, RunningAvgSamplesPerSec=5.867012136388272, CurrSamplesPerSec=5.2200764224035066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1347/5000 [4:34:26<11:13:31, 11.06s/it][2022-12-20 14:57:01,822] [INFO] [timer.py:197:stop] 0/2728, RunningAvgSamplesPerSec=5.867016487729158, CurrSamplesPerSec=5.305950129628746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1348/5000 [4:34:37<11:12:35, 11.05s/it][2022-12-20 14:57:12,815] [INFO] [timer.py:197:stop] 0/2730, RunningAvgSamplesPerSec=5.867037933345068, CurrSamplesPerSec=5.365565542459984, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1349/5000 [4:34:48<11:11:21, 11.03s/it][2022-12-20 14:57:24,036] [INFO] [timer.py:197:stop] 0/2732, RunningAvgSamplesPerSec=5.866963522194716, CurrSamplesPerSec=5.153858568615388, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1350/5000 [4:34:59<11:14:36, 11.09s/it] {'loss': 0.0006, 'learning_rate': 8.084444444444444e-06, 'epoch': 32.92} + 27%|██▋ | 1350/5000 [4:34:59<11:14:36, 11.09s/it][2022-12-20 14:57:35,075] [INFO] [timer.py:197:stop] 0/2734, RunningAvgSamplesPerSec=5.866960909313108, CurrSamplesPerSec=5.326678219771939, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1351/5000 [4:35:10<11:13:29, 11.07s/it][2022-12-20 14:57:46,059] [INFO] [timer.py:197:stop] 0/2736, RunningAvgSamplesPerSec=5.866984988698939, CurrSamplesPerSec=5.358850763780918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1352/5000 [4:35:21<11:11:40, 11.05s/it][2022-12-20 14:57:57,176] [INFO] [timer.py:197:stop] 0/2738, RunningAvgSamplesPerSec=5.866951168860731, CurrSamplesPerSec=5.241161881620681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1353/5000 [4:35:32<11:12:45, 11.07s/it][2022-12-20 14:58:07,277] [INFO] [logging.py:68:log_dist] [Rank 0] step=1370, skipped=3, lr=[8.075555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:58:07,279] [INFO] [timer.py:197:stop] 0/2740, RunningAvgSamplesPerSec=5.867311835449159, CurrSamplesPerSec=5.319994568138901, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1354/5000 [4:35:47<12:24:49, 12.26s/it][2022-12-20 14:58:18,278] [INFO] [timer.py:197:stop] 0/2742, RunningAvgSamplesPerSec=5.867323816245555, CurrSamplesPerSec=5.3269276823392335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1355/5000 [4:35:58<12:01:21, 11.87s/it][2022-12-20 14:58:29,399] [INFO] [timer.py:197:stop] 0/2744, RunningAvgSamplesPerSec=5.867288330204955, CurrSamplesPerSec=5.2052450615555275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1356/5000 [4:36:09<11:47:16, 11.65s/it][2022-12-20 14:58:40,344] [INFO] [timer.py:197:stop] 0/2746, RunningAvgSamplesPerSec=5.867321968544436, CurrSamplesPerSec=5.350604813552862, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1357/5000 [4:36:20<11:34:48, 11.44s/it][2022-12-20 14:58:51,315] [INFO] [timer.py:197:stop] 0/2748, RunningAvgSamplesPerSec=5.867345307267528, CurrSamplesPerSec=5.351716990418064, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1358/5000 [4:36:31<11:26:41, 11.31s/it][2022-12-20 14:59:02,492] [INFO] [timer.py:197:stop] 0/2750, RunningAvgSamplesPerSec=5.867287941720961, CurrSamplesPerSec=5.205512553479383, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1359/5000 [4:36:43<11:23:47, 11.27s/it][2022-12-20 14:59:13,476] [INFO] [timer.py:197:stop] 0/2752, RunningAvgSamplesPerSec=5.867306221692765, CurrSamplesPerSec=5.362882455469781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1360/5000 [4:36:54<11:18:29, 11.18s/it][2022-12-20 14:59:24,499] [INFO] [timer.py:197:stop] 0/2754, RunningAvgSamplesPerSec=5.867309435728499, CurrSamplesPerSec=5.330072490530641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1361/5000 [4:37:05<11:15:30, 11.14s/it][2022-12-20 14:59:35,577] [INFO] [timer.py:197:stop] 0/2756, RunningAvgSamplesPerSec=5.867290636310909, CurrSamplesPerSec=5.288418655675564, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1362/5000 [4:37:16<11:14:10, 11.12s/it][2022-12-20 14:59:46,567] [INFO] [timer.py:197:stop] 0/2758, RunningAvgSamplesPerSec=5.86730639472436, CurrSamplesPerSec=5.362812386012836, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1363/5000 [4:37:27<11:11:08, 11.07s/it][2022-12-20 14:59:57,579] [INFO] [logging.py:68:log_dist] [Rank 0] step=1380, skipped=3, lr=[8.053333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-20 14:59:57,580] [INFO] [timer.py:197:stop] 0/2760, RunningAvgSamplesPerSec=5.867312972266146, CurrSamplesPerSec=5.31772321523023, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1364/5000 [4:37:38<11:09:45, 11.05s/it][2022-12-20 15:00:08,673] [INFO] [timer.py:197:stop] 0/2762, RunningAvgSamplesPerSec=5.867288437756883, CurrSamplesPerSec=5.242430297776019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1365/5000 [4:37:49<11:11:06, 11.08s/it][2022-12-20 15:00:19,732] [INFO] [timer.py:197:stop] 0/2764, RunningAvgSamplesPerSec=5.867277296831097, CurrSamplesPerSec=5.309571326943358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1366/5000 [4:38:00<11:10:08, 11.06s/it][2022-12-20 15:00:30,740] [INFO] [timer.py:197:stop] 0/2766, RunningAvgSamplesPerSec=5.867286157355487, CurrSamplesPerSec=5.332981344778789, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1367/5000 [4:38:11<11:08:55, 11.05s/it][2022-12-20 15:00:41,891] [INFO] [timer.py:197:stop] 0/2768, RunningAvgSamplesPerSec=5.867239115212334, CurrSamplesPerSec=5.2082219278702215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1368/5000 [4:38:22<11:10:39, 11.08s/it][2022-12-20 15:00:52,914] [INFO] [timer.py:197:stop] 0/2770, RunningAvgSamplesPerSec=5.867241686279702, CurrSamplesPerSec=5.320572622546393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1369/5000 [4:38:33<11:09:32, 11.06s/it][2022-12-20 15:01:03,909] [INFO] [timer.py:197:stop] 0/2772, RunningAvgSamplesPerSec=5.867255286219318, CurrSamplesPerSec=5.350025333528226, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1370/5000 [4:38:44<11:07:59, 11.04s/it][2022-12-20 15:01:15,033] [INFO] [timer.py:197:stop] 0/2774, RunningAvgSamplesPerSec=5.8672187708264465, CurrSamplesPerSec=5.23199255050194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1371/5000 [4:38:55<11:09:39, 11.07s/it][2022-12-20 15:01:26,029] [INFO] [timer.py:197:stop] 0/2776, RunningAvgSamplesPerSec=5.867232050620762, CurrSamplesPerSec=5.361408811306173, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1372/5000 [4:39:06<11:08:02, 11.05s/it][2022-12-20 15:01:37,050] [INFO] [timer.py:197:stop] 0/2778, RunningAvgSamplesPerSec=5.867235770852498, CurrSamplesPerSec=5.336036355951057, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1373/5000 [4:39:17<11:07:17, 11.04s/it][2022-12-20 15:01:48,176] [INFO] [logging.py:68:log_dist] [Rank 0] step=1390, skipped=3, lr=[8.031111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:01:48,179] [INFO] [timer.py:197:stop] 0/2780, RunningAvgSamplesPerSec=5.867197504425831, CurrSamplesPerSec=5.2383582328035745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 27%|██▋ | 1374/5000 [4:39:28<11:08:35, 11.06s/it][2022-12-20 15:01:59,202] [INFO] [timer.py:197:stop] 0/2782, RunningAvgSamplesPerSec=5.867200010984818, CurrSamplesPerSec=5.322838600251385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1375/5000 [4:39:39<11:07:57, 11.06s/it] {'loss': 0.0006, 'learning_rate': 8.02888888888889e-06, 'epoch': 33.53} + 28%|██▊ | 1375/5000 [4:39:39<11:07:57, 11.06s/it][2022-12-20 15:02:10,247] [INFO] [timer.py:197:stop] 0/2784, RunningAvgSamplesPerSec=5.867201221496921, CurrSamplesPerSec=5.318055913754904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1376/5000 [4:39:50<11:09:46, 11.09s/it][2022-12-20 15:02:21,549] [INFO] [timer.py:197:stop] 0/2786, RunningAvgSamplesPerSec=5.867155904526517, CurrSamplesPerSec=5.314694813669712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1377/5000 [4:40:02<11:10:54, 11.11s/it][2022-12-20 15:02:32,588] [INFO] [timer.py:197:stop] 0/2788, RunningAvgSamplesPerSec=5.867152144509463, CurrSamplesPerSec=5.304910358263791, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1378/5000 [4:40:13<11:09:54, 11.10s/it][2022-12-20 15:02:43,655] [INFO] [timer.py:197:stop] 0/2790, RunningAvgSamplesPerSec=5.867141676761146, CurrSamplesPerSec=5.304385803813207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1379/5000 [4:40:24<11:07:58, 11.07s/it][2022-12-20 15:02:54,752] [INFO] [timer.py:197:stop] 0/2792, RunningAvgSamplesPerSec=5.86711616600331, CurrSamplesPerSec=5.221499997160066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1380/5000 [4:40:35<11:09:38, 11.10s/it][2022-12-20 15:03:05,842] [INFO] [timer.py:197:stop] 0/2794, RunningAvgSamplesPerSec=5.86709356103946, CurrSamplesPerSec=5.291232773241094, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1381/5000 [4:40:46<11:08:44, 11.09s/it][2022-12-20 15:03:16,873] [INFO] [timer.py:197:stop] 0/2796, RunningAvgSamplesPerSec=5.867099498805753, CurrSamplesPerSec=5.3161496245381015, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1382/5000 [4:40:57<11:06:48, 11.06s/it][2022-12-20 15:03:27,880] [INFO] [timer.py:197:stop] 0/2798, RunningAvgSamplesPerSec=5.867108097276354, CurrSamplesPerSec=5.301269130778188, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1383/5000 [4:41:08<11:06:03, 11.05s/it][2022-12-20 15:03:38,895] [INFO] [logging.py:68:log_dist] [Rank 0] step=1400, skipped=3, lr=[8.00888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:03:38,897] [INFO] [timer.py:197:stop] 0/2800, RunningAvgSamplesPerSec=5.867113614161458, CurrSamplesPerSec=5.310326750466165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1384/5000 [4:41:19<11:05:05, 11.04s/it][2022-12-20 15:03:49,905] [INFO] [timer.py:197:stop] 0/2802, RunningAvgSamplesPerSec=5.867122233358075, CurrSamplesPerSec=5.307900537545747, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1385/5000 [4:41:30<11:04:34, 11.03s/it][2022-12-20 15:04:01,225] [INFO] [timer.py:197:stop] 0/2804, RunningAvgSamplesPerSec=5.867011169317707, CurrSamplesPerSec=5.053979424736549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1386/5000 [4:41:41<11:10:13, 11.13s/it][2022-12-20 15:04:12,234] [INFO] [timer.py:197:stop] 0/2806, RunningAvgSamplesPerSec=5.867019343469579, CurrSamplesPerSec=5.344345531365432, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1387/5000 [4:41:52<11:06:56, 11.08s/it][2022-12-20 15:04:23,199] [INFO] [timer.py:197:stop] 0/2808, RunningAvgSamplesPerSec=5.8670449305129555, CurrSamplesPerSec=5.337076056728726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1388/5000 [4:42:03<11:05:42, 11.06s/it][2022-12-20 15:04:34,226] [INFO] [timer.py:197:stop] 0/2810, RunningAvgSamplesPerSec=5.867045932593934, CurrSamplesPerSec=5.32805562930396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1389/5000 [4:42:14<11:04:25, 11.04s/it][2022-12-20 15:04:45,236] [INFO] [timer.py:197:stop] 0/2812, RunningAvgSamplesPerSec=5.867059625862733, CurrSamplesPerSec=5.316675876531561, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1390/5000 [4:42:25<11:04:24, 11.04s/it][2022-12-20 15:04:56,265] [INFO] [timer.py:197:stop] 0/2814, RunningAvgSamplesPerSec=5.867059887804062, CurrSamplesPerSec=5.334593328744016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1391/5000 [4:42:36<11:03:33, 11.03s/it][2022-12-20 15:05:07,324] [INFO] [timer.py:197:stop] 0/2816, RunningAvgSamplesPerSec=5.8670490291784665, CurrSamplesPerSec=5.2889638160500185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1392/5000 [4:42:47<11:04:29, 11.05s/it][2022-12-20 15:05:18,400] [INFO] [timer.py:197:stop] 0/2818, RunningAvgSamplesPerSec=5.867034415730452, CurrSamplesPerSec=5.3041747114546505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1393/5000 [4:42:58<11:03:26, 11.04s/it][2022-12-20 15:05:29,401] [INFO] [logging.py:68:log_dist] [Rank 0] step=1410, skipped=3, lr=[7.986666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:05:29,403] [INFO] [timer.py:197:stop] 0/2820, RunningAvgSamplesPerSec=5.867045058085155, CurrSamplesPerSec=5.303396938163012, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1394/5000 [4:43:09<11:03:27, 11.04s/it][2022-12-20 15:05:39,642] [INFO] [timer.py:197:stop] 0/2822, RunningAvgSamplesPerSec=5.86734343328232, CurrSamplesPerSec=6.111975174997072, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 15:05:50,699] [INFO] [timer.py:197:stop] 0/2824, RunningAvgSamplesPerSec=5.867338862693114, CurrSamplesPerSec=5.323293336434344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1395/5000 [4:43:26<12:38:56, 12.63s/it][2022-12-20 15:06:01,797] [INFO] [timer.py:197:stop] 0/2826, RunningAvgSamplesPerSec=5.867326226765326, CurrSamplesPerSec=5.294675786349785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1396/5000 [4:43:37<12:11:05, 12.17s/it][2022-12-20 15:06:13,180] [INFO] [timer.py:197:stop] 0/2828, RunningAvgSamplesPerSec=5.867192010740171, CurrSamplesPerSec=4.986640689031509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1397/5000 [4:43:48<11:56:41, 11.93s/it][2022-12-20 15:06:24,194] [INFO] [timer.py:197:stop] 0/2830, RunningAvgSamplesPerSec=5.867198645599095, CurrSamplesPerSec=5.347763641102168, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1398/5000 [4:43:59<11:39:54, 11.66s/it][2022-12-20 15:06:35,232] [INFO] [timer.py:197:stop] 0/2832, RunningAvgSamplesPerSec=5.867196186659145, CurrSamplesPerSec=5.319893985584041, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1399/5000 [4:44:10<11:28:32, 11.47s/it][2022-12-20 15:06:46,411] [INFO] [timer.py:197:stop] 0/2834, RunningAvgSamplesPerSec=5.867140617621161, CurrSamplesPerSec=5.204313394928186, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1400/5000 [4:44:22<11:23:04, 11.38s/it] {'loss': 0.001, 'learning_rate': 7.971111111111111e-06, 'epoch': 34.14} + 28%|██▊ | 1400/5000 [4:44:22<11:23:04, 11.38s/it][2022-12-20 15:06:57,438] [INFO] [timer.py:197:stop] 0/2836, RunningAvgSamplesPerSec=5.8671491837554175, CurrSamplesPerSec=5.335715191541583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1401/5000 [4:44:33<11:16:26, 11.28s/it][2022-12-20 15:07:08,436] [INFO] [timer.py:197:stop] 0/2838, RunningAvgSamplesPerSec=5.86716195502852, CurrSamplesPerSec=5.354130892406157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1402/5000 [4:44:44<11:11:13, 11.19s/it][2022-12-20 15:07:19,529] [INFO] [logging.py:68:log_dist] [Rank 0] step=1420, skipped=3, lr=[7.964444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:07:19,530] [INFO] [timer.py:197:stop] 0/2840, RunningAvgSamplesPerSec=5.8671376598872245, CurrSamplesPerSec=5.274742328287568, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1403/5000 [4:44:55<11:09:15, 11.16s/it][2022-12-20 15:07:30,626] [INFO] [timer.py:197:stop] 0/2842, RunningAvgSamplesPerSec=5.8671191455769565, CurrSamplesPerSec=5.304923777484285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1404/5000 [4:45:06<11:07:51, 11.14s/it][2022-12-20 15:07:41,682] [INFO] [timer.py:197:stop] 0/2844, RunningAvgSamplesPerSec=5.867115892199886, CurrSamplesPerSec=5.310166656676801, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1405/5000 [4:45:17<11:06:06, 11.12s/it][2022-12-20 15:07:52,865] [INFO] [timer.py:197:stop] 0/2846, RunningAvgSamplesPerSec=5.867058747024386, CurrSamplesPerSec=5.191834732693543, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1406/5000 [4:45:28<11:07:05, 11.14s/it][2022-12-20 15:08:03,883] [INFO] [timer.py:197:stop] 0/2848, RunningAvgSamplesPerSec=5.8670640955115605, CurrSamplesPerSec=5.324719693711162, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1407/5000 [4:45:39<11:04:46, 11.10s/it][2022-12-20 15:08:14,866] [INFO] [timer.py:197:stop] 0/2850, RunningAvgSamplesPerSec=5.867088915224344, CurrSamplesPerSec=5.345768289960463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1408/5000 [4:45:50<11:02:27, 11.07s/it][2022-12-20 15:08:26,189] [INFO] [timer.py:197:stop] 0/2852, RunningAvgSamplesPerSec=5.866979385388325, CurrSamplesPerSec=5.087858901602813, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1409/5000 [4:46:01<11:06:55, 11.14s/it][2022-12-20 15:08:37,162] [INFO] [timer.py:197:stop] 0/2854, RunningAvgSamplesPerSec=5.8670037442008995, CurrSamplesPerSec=5.337148214263844, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1410/5000 [4:46:12<11:03:40, 11.09s/it][2022-12-20 15:08:48,142] [INFO] [timer.py:197:stop] 0/2856, RunningAvgSamplesPerSec=5.867023199587913, CurrSamplesPerSec=5.344922291630221, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1411/5000 [4:46:23<11:01:28, 11.06s/it][2022-12-20 15:08:59,167] [INFO] [timer.py:197:stop] 0/2858, RunningAvgSamplesPerSec=5.867025529500357, CurrSamplesPerSec=5.28827196504118, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1412/5000 [4:46:34<11:00:42, 11.05s/it][2022-12-20 15:09:10,134] [INFO] [logging.py:68:log_dist] [Rank 0] step=1430, skipped=3, lr=[7.942222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:09:10,135] [INFO] [timer.py:197:stop] 0/2860, RunningAvgSamplesPerSec=5.867055039669358, CurrSamplesPerSec=5.364055265651521, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1413/5000 [4:46:45<10:59:03, 11.02s/it][2022-12-20 15:09:21,147] [INFO] [timer.py:197:stop] 0/2862, RunningAvgSamplesPerSec=5.867061760960472, CurrSamplesPerSec=5.3138590418611855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1414/5000 [4:46:56<10:58:39, 11.02s/it][2022-12-20 15:09:32,507] [INFO] [timer.py:197:stop] 0/2864, RunningAvgSamplesPerSec=5.866937931910629, CurrSamplesPerSec=5.042016311736068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1415/5000 [4:47:08<11:04:34, 11.12s/it][2022-12-20 15:09:43,451] [INFO] [timer.py:197:stop] 0/2866, RunningAvgSamplesPerSec=5.866971301400676, CurrSamplesPerSec=5.345289908230867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1416/5000 [4:47:19<11:01:10, 11.07s/it][2022-12-20 15:09:54,511] [INFO] [timer.py:197:stop] 0/2868, RunningAvgSamplesPerSec=5.866960266775052, CurrSamplesPerSec=5.311099202067241, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1417/5000 [4:47:30<11:00:50, 11.07s/it][2022-12-20 15:10:05,475] [INFO] [timer.py:197:stop] 0/2870, RunningAvgSamplesPerSec=5.866985461643725, CurrSamplesPerSec=5.36118351947005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1418/5000 [4:47:41<10:58:49, 11.04s/it][2022-12-20 15:10:16,514] [INFO] [timer.py:197:stop] 0/2872, RunningAvgSamplesPerSec=5.866982604336558, CurrSamplesPerSec=5.3334467170235955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1419/5000 [4:47:52<10:58:41, 11.04s/it][2022-12-20 15:10:27,548] [INFO] [timer.py:197:stop] 0/2874, RunningAvgSamplesPerSec=5.86698156694766, CurrSamplesPerSec=5.3236098398253295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1420/5000 [4:48:03<10:58:28, 11.04s/it][2022-12-20 15:10:38,915] [INFO] [timer.py:197:stop] 0/2876, RunningAvgSamplesPerSec=5.866856104918715, CurrSamplesPerSec=5.0569619781005954, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1421/5000 [4:48:14<11:04:12, 11.14s/it][2022-12-20 15:10:49,925] [INFO] [timer.py:197:stop] 0/2878, RunningAvgSamplesPerSec=5.86686484900348, CurrSamplesPerSec=5.306912039785037, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1422/5000 [4:48:25<11:01:46, 11.10s/it][2022-12-20 15:11:00,946] [INFO] [logging.py:68:log_dist] [Rank 0] step=1440, skipped=3, lr=[7.92e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:11:00,949] [INFO] [timer.py:197:stop] 0/2880, RunningAvgSamplesPerSec=5.866874588050978, CurrSamplesPerSec=5.351646785652113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1423/5000 [4:48:36<11:00:16, 11.08s/it][2022-12-20 15:11:12,008] [INFO] [timer.py:197:stop] 0/2882, RunningAvgSamplesPerSec=5.866864039138834, CurrSamplesPerSec=5.29073219265182, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1424/5000 [4:48:47<10:59:48, 11.07s/it][2022-12-20 15:11:22,989] [INFO] [timer.py:197:stop] 0/2884, RunningAvgSamplesPerSec=5.866890220820885, CurrSamplesPerSec=5.378103806853964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 28%|██▊ | 1425/5000 [4:48:58<10:58:00, 11.04s/it] {'loss': 0.0009, 'learning_rate': 7.915555555555557e-06, 'epoch': 34.75} + 28%|██▊ | 1425/5000 [4:48:58<10:58:00, 11.04s/it][2022-12-20 15:11:34,049] [INFO] [timer.py:197:stop] 0/2886, RunningAvgSamplesPerSec=5.866885264751231, CurrSamplesPerSec=5.279103309733641, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1426/5000 [4:49:09<10:58:08, 11.05s/it][2022-12-20 15:11:45,134] [INFO] [timer.py:197:stop] 0/2888, RunningAvgSamplesPerSec=5.866865850099601, CurrSamplesPerSec=5.279858188051182, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1427/5000 [4:49:20<10:58:35, 11.06s/it][2022-12-20 15:11:56,172] [INFO] [timer.py:197:stop] 0/2890, RunningAvgSamplesPerSec=5.866863865739063, CurrSamplesPerSec=5.298604345623919, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1428/5000 [4:49:31<10:58:01, 11.05s/it][2022-12-20 15:12:07,200] [INFO] [timer.py:197:stop] 0/2892, RunningAvgSamplesPerSec=5.866866299764048, CurrSamplesPerSec=5.294540653151145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1429/5000 [4:49:42<10:57:24, 11.05s/it][2022-12-20 15:12:18,281] [INFO] [timer.py:197:stop] 0/2894, RunningAvgSamplesPerSec=5.866848617162379, CurrSamplesPerSec=5.292986177705614, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1430/5000 [4:49:53<10:57:50, 11.06s/it][2022-12-20 15:12:29,248] [INFO] [timer.py:197:stop] 0/2896, RunningAvgSamplesPerSec=5.86687662329632, CurrSamplesPerSec=5.352104750222908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1431/5000 [4:50:04<10:56:04, 11.03s/it][2022-12-20 15:12:40,294] [INFO] [timer.py:197:stop] 0/2898, RunningAvgSamplesPerSec=5.8668784047420335, CurrSamplesPerSec=5.335385157964976, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1432/5000 [4:50:15<10:56:10, 11.03s/it][2022-12-20 15:12:51,273] [INFO] [logging.py:68:log_dist] [Rank 0] step=1450, skipped=3, lr=[7.897777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:12:51,280] [INFO] [timer.py:197:stop] 0/2900, RunningAvgSamplesPerSec=5.866895486059719, CurrSamplesPerSec=5.357797429754562, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1433/5000 [4:50:26<10:55:19, 11.02s/it][2022-12-20 15:13:02,369] [INFO] [timer.py:197:stop] 0/2902, RunningAvgSamplesPerSec=5.866903808135881, CurrSamplesPerSec=5.349864116651812, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1434/5000 [4:50:37<10:56:07, 11.04s/it][2022-12-20 15:13:13,428] [INFO] [timer.py:197:stop] 0/2904, RunningAvgSamplesPerSec=5.866893323105897, CurrSamplesPerSec=5.3015800887893425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1435/5000 [4:50:49<10:56:16, 11.05s/it][2022-12-20 15:13:23,529] [INFO] [timer.py:197:stop] 0/2906, RunningAvgSamplesPerSec=5.867233968061934, CurrSamplesPerSec=5.316965054129023, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1436/5000 [4:51:04<12:08:58, 12.27s/it][2022-12-20 15:13:34,646] [INFO] [timer.py:197:stop] 0/2908, RunningAvgSamplesPerSec=5.867201605389572, CurrSamplesPerSec=5.315080173651068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▊ | 1437/5000 [4:51:15<11:47:16, 11.91s/it][2022-12-20 15:13:45,694] [INFO] [timer.py:197:stop] 0/2910, RunningAvgSamplesPerSec=5.867195205222465, CurrSamplesPerSec=5.331888166698414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1438/5000 [4:51:26<11:30:19, 11.63s/it][2022-12-20 15:13:56,653] [INFO] [timer.py:197:stop] 0/2912, RunningAvgSamplesPerSec=5.8672217783593545, CurrSamplesPerSec=5.341562232422383, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1439/5000 [4:51:37<11:19:11, 11.44s/it][2022-12-20 15:14:07,698] [INFO] [timer.py:197:stop] 0/2914, RunningAvgSamplesPerSec=5.867216504522773, CurrSamplesPerSec=5.313482693735088, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1440/5000 [4:51:48<11:12:02, 11.33s/it][2022-12-20 15:14:18,760] [INFO] [timer.py:197:stop] 0/2916, RunningAvgSamplesPerSec=5.867204581431818, CurrSamplesPerSec=5.30529115501448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1441/5000 [4:51:59<11:07:34, 11.25s/it][2022-12-20 15:14:29,825] [INFO] [timer.py:197:stop] 0/2918, RunningAvgSamplesPerSec=5.867191804776315, CurrSamplesPerSec=5.3240522475203385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1442/5000 [4:52:10<11:02:55, 11.18s/it][2022-12-20 15:14:40,834] [INFO] [logging.py:68:log_dist] [Rank 0] step=1460, skipped=3, lr=[7.875555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:14:40,835] [INFO] [timer.py:197:stop] 0/2920, RunningAvgSamplesPerSec=5.867199079372331, CurrSamplesPerSec=5.317994385621187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1443/5000 [4:52:21<11:00:33, 11.14s/it][2022-12-20 15:14:51,963] [INFO] [timer.py:197:stop] 0/2922, RunningAvgSamplesPerSec=5.867163609735234, CurrSamplesPerSec=5.256612283581605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1444/5000 [4:52:32<10:59:39, 11.13s/it][2022-12-20 15:15:03,026] [INFO] [timer.py:197:stop] 0/2924, RunningAvgSamplesPerSec=5.867151792571252, CurrSamplesPerSec=5.290244634796273, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1445/5000 [4:52:43<10:57:50, 11.10s/it][2022-12-20 15:15:14,034] [INFO] [timer.py:197:stop] 0/2926, RunningAvgSamplesPerSec=5.867160064009995, CurrSamplesPerSec=5.317455021837314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1446/5000 [4:52:54<10:56:44, 11.09s/it][2022-12-20 15:15:25,081] [INFO] [timer.py:197:stop] 0/2928, RunningAvgSamplesPerSec=5.867154230800252, CurrSamplesPerSec=5.320977187718031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1447/5000 [4:53:05<10:55:26, 11.07s/it][2022-12-20 15:15:36,091] [INFO] [timer.py:197:stop] 0/2930, RunningAvgSamplesPerSec=5.86716221353412, CurrSamplesPerSec=5.334001836847998, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1448/5000 [4:53:16<10:53:28, 11.04s/it][2022-12-20 15:15:47,122] [INFO] [timer.py:197:stop] 0/2932, RunningAvgSamplesPerSec=5.867167395489543, CurrSamplesPerSec=5.2794615125578925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1449/5000 [4:53:27<10:54:32, 11.06s/it][2022-12-20 15:15:58,239] [INFO] [timer.py:197:stop] 0/2934, RunningAvgSamplesPerSec=5.86714863172802, CurrSamplesPerSec=5.287083945794458, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1450/5000 [4:53:38<10:55:03, 11.07s/it] {'loss': 0.0007, 'learning_rate': 7.860000000000001e-06, 'epoch': 35.36} + 29%|██▉ | 1450/5000 [4:53:38<10:55:03, 11.07s/it][2022-12-20 15:16:09,321] [INFO] [timer.py:197:stop] 0/2936, RunningAvgSamplesPerSec=5.867140142287413, CurrSamplesPerSec=5.299458760613213, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1451/5000 [4:53:49<10:54:30, 11.07s/it][2022-12-20 15:16:20,278] [INFO] [timer.py:197:stop] 0/2938, RunningAvgSamplesPerSec=5.867171443541817, CurrSamplesPerSec=5.370010380915503, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1452/5000 [4:54:00<10:52:02, 11.03s/it][2022-12-20 15:16:31,293] [INFO] [logging.py:68:log_dist] [Rank 0] step=1470, skipped=3, lr=[7.853333333333333e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:16:31,294] [INFO] [timer.py:197:stop] 0/2940, RunningAvgSamplesPerSec=5.867176768975583, CurrSamplesPerSec=5.298791983678718, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1453/5000 [4:54:11<10:51:30, 11.02s/it][2022-12-20 15:16:42,288] [INFO] [timer.py:197:stop] 0/2942, RunningAvgSamplesPerSec=5.86719038388301, CurrSamplesPerSec=5.3109327571587315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1454/5000 [4:54:22<10:50:51, 11.01s/it][2022-12-20 15:16:53,330] [INFO] [timer.py:197:stop] 0/2944, RunningAvgSamplesPerSec=5.867186414520064, CurrSamplesPerSec=5.269410659324282, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1455/5000 [4:54:33<10:51:32, 11.03s/it][2022-12-20 15:17:04,313] [INFO] [timer.py:197:stop] 0/2946, RunningAvgSamplesPerSec=5.8672037531643735, CurrSamplesPerSec=5.33894004454204, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1456/5000 [4:54:44<10:50:57, 11.02s/it][2022-12-20 15:17:15,345] [INFO] [timer.py:197:stop] 0/2948, RunningAvgSamplesPerSec=5.867203384647868, CurrSamplesPerSec=5.314893063982781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1457/5000 [4:54:55<10:51:28, 11.03s/it][2022-12-20 15:17:26,449] [INFO] [timer.py:197:stop] 0/2950, RunningAvgSamplesPerSec=5.867176306276565, CurrSamplesPerSec=5.275464441914845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1458/5000 [4:55:06<10:51:18, 11.03s/it][2022-12-20 15:17:37,388] [INFO] [timer.py:197:stop] 0/2952, RunningAvgSamplesPerSec=5.867209703867827, CurrSamplesPerSec=5.359682771051617, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1459/5000 [4:55:17<10:49:29, 11.01s/it][2022-12-20 15:17:48,393] [INFO] [timer.py:197:stop] 0/2954, RunningAvgSamplesPerSec=5.8672187644952185, CurrSamplesPerSec=5.302231857858908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1460/5000 [4:55:28<10:50:14, 11.02s/it][2022-12-20 15:17:59,768] [INFO] [timer.py:197:stop] 0/2956, RunningAvgSamplesPerSec=5.867093372170636, CurrSamplesPerSec=5.0381269772130315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1461/5000 [4:55:40<10:55:38, 11.12s/it][2022-12-20 15:18:10,739] [INFO] [timer.py:197:stop] 0/2958, RunningAvgSamplesPerSec=5.867114386874484, CurrSamplesPerSec=5.344570261522429, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1462/5000 [4:55:51<10:53:22, 11.08s/it][2022-12-20 15:18:21,779] [INFO] [logging.py:68:log_dist] [Rank 0] step=1480, skipped=3, lr=[7.831111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:18:21,781] [INFO] [timer.py:197:stop] 0/2960, RunningAvgSamplesPerSec=5.867115780039747, CurrSamplesPerSec=5.3189859626643825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1463/5000 [4:56:02<10:52:04, 11.06s/it][2022-12-20 15:18:32,865] [INFO] [timer.py:197:stop] 0/2962, RunningAvgSamplesPerSec=5.867096446134377, CurrSamplesPerSec=5.2485556158247215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1464/5000 [4:56:13<10:52:47, 11.08s/it][2022-12-20 15:18:43,883] [INFO] [timer.py:197:stop] 0/2964, RunningAvgSamplesPerSec=5.867101121467743, CurrSamplesPerSec=5.330315708800027, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1465/5000 [4:56:24<10:50:41, 11.04s/it][2022-12-20 15:18:54,886] [INFO] [timer.py:197:stop] 0/2966, RunningAvgSamplesPerSec=5.86711117318959, CurrSamplesPerSec=5.30045662417831, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1466/5000 [4:56:35<10:50:09, 11.04s/it][2022-12-20 15:19:06,042] [INFO] [timer.py:197:stop] 0/2968, RunningAvgSamplesPerSec=5.867066482677286, CurrSamplesPerSec=5.187811181657828, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1467/5000 [4:56:46<10:52:47, 11.09s/it][2022-12-20 15:19:17,051] [INFO] [timer.py:197:stop] 0/2970, RunningAvgSamplesPerSec=5.867074273623438, CurrSamplesPerSec=5.3516979986734965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1468/5000 [4:56:57<10:50:55, 11.06s/it][2022-12-20 15:19:28,111] [INFO] [timer.py:197:stop] 0/2972, RunningAvgSamplesPerSec=5.8670641933922445, CurrSamplesPerSec=5.291072368564475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1469/5000 [4:57:08<10:50:01, 11.05s/it][2022-12-20 15:19:39,112] [INFO] [timer.py:197:stop] 0/2974, RunningAvgSamplesPerSec=5.86707476244847, CurrSamplesPerSec=5.3039024334148825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1470/5000 [4:57:19<10:49:19, 11.04s/it][2022-12-20 15:19:50,076] [INFO] [timer.py:197:stop] 0/2976, RunningAvgSamplesPerSec=5.867105391228582, CurrSamplesPerSec=5.351362997924009, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1471/5000 [4:57:30<10:48:22, 11.02s/it][2022-12-20 15:20:01,110] [INFO] [timer.py:197:stop] 0/2978, RunningAvgSamplesPerSec=5.867106639467362, CurrSamplesPerSec=5.318574323737692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1472/5000 [4:57:41<10:47:37, 11.01s/it][2022-12-20 15:20:12,106] [INFO] [logging.py:68:log_dist] [Rank 0] step=1490, skipped=3, lr=[7.808888888888888e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:20:12,107] [INFO] [timer.py:197:stop] 0/2980, RunningAvgSamplesPerSec=5.867118586350411, CurrSamplesPerSec=5.309225618020818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1473/5000 [4:57:52<10:48:10, 11.03s/it][2022-12-20 15:20:23,132] [INFO] [timer.py:197:stop] 0/2982, RunningAvgSamplesPerSec=5.867127296532327, CurrSamplesPerSec=5.3374761311717895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 29%|██▉ | 1474/5000 [4:58:03<10:47:57, 11.03s/it][2022-12-20 15:20:34,183] [INFO] [timer.py:197:stop] 0/2984, RunningAvgSamplesPerSec=5.867120041797028, CurrSamplesPerSec=5.313298641257833, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1475/5000 [4:58:14<10:48:07, 11.03s/it] {'loss': 0.0006, 'learning_rate': 7.804444444444445e-06, 'epoch': 35.96} + 30%|██▉ | 1475/5000 [4:58:14<10:48:07, 11.03s/it][2022-12-20 15:20:45,476] [INFO] [timer.py:197:stop] 0/2986, RunningAvgSamplesPerSec=5.867026159989341, CurrSamplesPerSec=5.104870620413164, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1476/5000 [4:58:26<10:52:18, 11.11s/it][2022-12-20 15:20:55,582] [INFO] [timer.py:197:stop] 0/2988, RunningAvgSamplesPerSec=5.867355876288415, CurrSamplesPerSec=6.267056000842719, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 15:21:06,645] [INFO] [timer.py:197:stop] 0/2990, RunningAvgSamplesPerSec=5.867347890080102, CurrSamplesPerSec=5.301989101229689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1477/5000 [4:58:42<12:22:14, 12.64s/it][2022-12-20 15:21:17,964] [INFO] [timer.py:197:stop] 0/2992, RunningAvgSamplesPerSec=5.867244082505629, CurrSamplesPerSec=5.0847359718844425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1478/5000 [4:58:53<11:58:45, 12.24s/it][2022-12-20 15:21:29,028] [INFO] [timer.py:197:stop] 0/2994, RunningAvgSamplesPerSec=5.8672382996983155, CurrSamplesPerSec=5.303764942579312, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1479/5000 [4:59:04<11:37:46, 11.89s/it][2022-12-20 15:21:40,013] [INFO] [timer.py:197:stop] 0/2996, RunningAvgSamplesPerSec=5.867255114910844, CurrSamplesPerSec=5.3425043498640905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1480/5000 [4:59:15<11:21:38, 11.62s/it][2022-12-20 15:21:51,277] [INFO] [timer.py:197:stop] 0/2998, RunningAvgSamplesPerSec=5.867171447366345, CurrSamplesPerSec=5.101236981591604, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1481/5000 [4:59:26<11:15:12, 11.51s/it][2022-12-20 15:22:02,321] [INFO] [logging.py:68:log_dist] [Rank 0] step=1500, skipped=3, lr=[7.786666666666666e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:22:02,323] [INFO] [timer.py:197:stop] 0/3000, RunningAvgSamplesPerSec=5.867169561864401, CurrSamplesPerSec=5.3275727993868145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1482/5000 [4:59:37<11:06:47, 11.37s/it][2022-12-20 15:22:13,434] [INFO] [timer.py:197:stop] 0/3002, RunningAvgSamplesPerSec=5.867147202143589, CurrSamplesPerSec=5.284803354107506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1483/5000 [4:59:49<11:02:01, 11.29s/it][2022-12-20 15:22:24,477] [INFO] [timer.py:197:stop] 0/3004, RunningAvgSamplesPerSec=5.867143373747431, CurrSamplesPerSec=5.289350456217155, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1484/5000 [5:00:00<10:57:25, 11.22s/it][2022-12-20 15:22:35,515] [INFO] [timer.py:197:stop] 0/3006, RunningAvgSamplesPerSec=5.867147431483427, CurrSamplesPerSec=5.353116989083556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1485/5000 [5:00:11<10:54:02, 11.16s/it][2022-12-20 15:22:46,572] [INFO] [timer.py:197:stop] 0/3008, RunningAvgSamplesPerSec=5.867144781786277, CurrSamplesPerSec=5.310961758172568, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1486/5000 [5:00:22<10:51:58, 11.13s/it][2022-12-20 15:22:57,785] [INFO] [timer.py:197:stop] 0/3010, RunningAvgSamplesPerSec=5.867079697566178, CurrSamplesPerSec=5.172297534766197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1487/5000 [5:00:33<10:53:12, 11.16s/it][2022-12-20 15:23:08,780] [INFO] [timer.py:197:stop] 0/3012, RunningAvgSamplesPerSec=5.867096655425602, CurrSamplesPerSec=5.328094970156685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1488/5000 [5:00:44<10:50:11, 11.11s/it][2022-12-20 15:23:19,827] [INFO] [timer.py:197:stop] 0/3014, RunningAvgSamplesPerSec=5.867090955722156, CurrSamplesPerSec=5.312953498361217, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1489/5000 [5:00:55<10:48:56, 11.09s/it][2022-12-20 15:23:30,908] [INFO] [timer.py:197:stop] 0/3016, RunningAvgSamplesPerSec=5.867073578279416, CurrSamplesPerSec=5.282343438659136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1490/5000 [5:01:06<10:48:35, 11.09s/it][2022-12-20 15:23:41,964] [INFO] [timer.py:197:stop] 0/3018, RunningAvgSamplesPerSec=5.867066200345507, CurrSamplesPerSec=5.31085647339549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1491/5000 [5:01:17<10:47:52, 11.08s/it][2022-12-20 15:23:53,019] [INFO] [logging.py:68:log_dist] [Rank 0] step=1510, skipped=3, lr=[7.764444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:23:53,020] [INFO] [timer.py:197:stop] 0/3020, RunningAvgSamplesPerSec=5.867057125320396, CurrSamplesPerSec=5.2829977642724435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1492/5000 [5:01:28<10:47:18, 11.07s/it][2022-12-20 15:24:04,062] [INFO] [timer.py:197:stop] 0/3022, RunningAvgSamplesPerSec=5.86705310962787, CurrSamplesPerSec=5.32072132157189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1493/5000 [5:01:39<10:46:35, 11.06s/it][2022-12-20 15:24:15,169] [INFO] [timer.py:197:stop] 0/3024, RunningAvgSamplesPerSec=5.867045179116094, CurrSamplesPerSec=5.322561447735841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1494/5000 [5:01:50<10:47:11, 11.08s/it][2022-12-20 15:24:26,117] [INFO] [timer.py:197:stop] 0/3026, RunningAvgSamplesPerSec=5.867074426057374, CurrSamplesPerSec=5.3719449647689785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1495/5000 [5:02:01<10:44:46, 11.04s/it][2022-12-20 15:24:37,192] [INFO] [timer.py:197:stop] 0/3028, RunningAvgSamplesPerSec=5.867058263240498, CurrSamplesPerSec=5.277408268189673, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1496/5000 [5:02:12<10:45:14, 11.05s/it][2022-12-20 15:24:48,178] [INFO] [timer.py:197:stop] 0/3030, RunningAvgSamplesPerSec=5.867074493580833, CurrSamplesPerSec=5.344055708443579, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1497/5000 [5:02:23<10:43:57, 11.03s/it][2022-12-20 15:24:59,168] [INFO] [timer.py:197:stop] 0/3032, RunningAvgSamplesPerSec=5.8670892522203175, CurrSamplesPerSec=5.362276533070928, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1498/5000 [5:02:34<10:43:05, 11.02s/it][2022-12-20 15:25:10,224] [INFO] [timer.py:197:stop] 0/3034, RunningAvgSamplesPerSec=5.8670807024635305, CurrSamplesPerSec=5.333892880964507, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|██▉ | 1499/5000 [5:02:45<10:43:34, 11.03s/it][2022-12-20 15:25:21,252] [INFO] [timer.py:197:stop] 0/3036, RunningAvgSamplesPerSec=5.86708176744793, CurrSamplesPerSec=5.327406800556645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1500/5000 [5:02:56<10:43:21, 11.03s/it] {'loss': 0.0005, 'learning_rate': 7.746666666666666e-06, 'epoch': 36.58} + 30%|███ | 1500/5000 [5:02:56<10:43:21, 11.03s/it][2022-12-20 15:25:32,239] [INFO] [timer.py:197:stop] 0/3038, RunningAvgSamplesPerSec=5.86709766334428, CurrSamplesPerSec=5.338563108297541, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1501/5000 [5:03:07<10:42:26, 11.02s/it][2022-12-20 15:25:43,260] [INFO] [logging.py:68:log_dist] [Rank 0] step=1520, skipped=3, lr=[7.742222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:25:43,262] [INFO] [timer.py:197:stop] 0/3040, RunningAvgSamplesPerSec=5.867100782471541, CurrSamplesPerSec=5.32822166857146, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1502/5000 [5:03:18<10:42:21, 11.02s/it][2022-12-20 15:25:54,379] [INFO] [timer.py:197:stop] 0/3042, RunningAvgSamplesPerSec=5.86707693277007, CurrSamplesPerSec=5.345282457479255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1503/5000 [5:03:29<10:43:54, 11.05s/it][2022-12-20 15:26:05,460] [INFO] [timer.py:197:stop] 0/3044, RunningAvgSamplesPerSec=5.867059272516083, CurrSamplesPerSec=5.2841802029652465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1504/5000 [5:03:41<10:44:18, 11.06s/it][2022-12-20 15:26:16,483] [INFO] [timer.py:197:stop] 0/3046, RunningAvgSamplesPerSec=5.867061892689114, CurrSamplesPerSec=5.3409679189900485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1505/5000 [5:03:52<10:43:31, 11.05s/it][2022-12-20 15:26:27,517] [INFO] [timer.py:197:stop] 0/3048, RunningAvgSamplesPerSec=5.867061267464411, CurrSamplesPerSec=5.299335309490053, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1506/5000 [5:04:03<10:43:06, 11.04s/it][2022-12-20 15:26:38,506] [INFO] [timer.py:197:stop] 0/3050, RunningAvgSamplesPerSec=5.8670764339901655, CurrSamplesPerSec=5.320373315612726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1507/5000 [5:04:14<10:41:57, 11.03s/it][2022-12-20 15:26:49,467] [INFO] [timer.py:197:stop] 0/3052, RunningAvgSamplesPerSec=5.867101363899046, CurrSamplesPerSec=5.347604691134345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1508/5000 [5:04:25<10:40:37, 11.01s/it][2022-12-20 15:27:00,560] [INFO] [timer.py:197:stop] 0/3054, RunningAvgSamplesPerSec=5.867085886600037, CurrSamplesPerSec=5.2962941610466086, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1509/5000 [5:04:36<10:41:55, 11.03s/it][2022-12-20 15:27:11,600] [INFO] [timer.py:197:stop] 0/3056, RunningAvgSamplesPerSec=5.867082602322803, CurrSamplesPerSec=5.326687098547047, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1510/5000 [5:04:47<10:41:52, 11.04s/it][2022-12-20 15:27:22,628] [INFO] [timer.py:197:stop] 0/3058, RunningAvgSamplesPerSec=5.867083961108254, CurrSamplesPerSec=5.328651938211841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1511/5000 [5:04:58<10:41:33, 11.03s/it][2022-12-20 15:27:33,693] [INFO] [logging.py:68:log_dist] [Rank 0] step=1530, skipped=3, lr=[7.72e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:27:33,695] [INFO] [timer.py:197:stop] 0/3060, RunningAvgSamplesPerSec=5.867077621120877, CurrSamplesPerSec=5.309095411265069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1512/5000 [5:05:09<10:41:58, 11.04s/it][2022-12-20 15:27:44,749] [INFO] [timer.py:197:stop] 0/3062, RunningAvgSamplesPerSec=5.867069595993426, CurrSamplesPerSec=5.297249436985049, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1513/5000 [5:05:20<10:41:58, 11.05s/it][2022-12-20 15:27:55,721] [INFO] [timer.py:197:stop] 0/3064, RunningAvgSamplesPerSec=5.86709092813864, CurrSamplesPerSec=5.357057733711487, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1514/5000 [5:05:31<10:40:29, 11.02s/it][2022-12-20 15:28:06,843] [INFO] [timer.py:197:stop] 0/3066, RunningAvgSamplesPerSec=5.86705909310525, CurrSamplesPerSec=5.324130600413189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1515/5000 [5:05:42<10:42:01, 11.05s/it][2022-12-20 15:28:17,862] [INFO] [timer.py:197:stop] 0/3068, RunningAvgSamplesPerSec=5.8670641199060265, CurrSamplesPerSec=5.31152608006618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1516/5000 [5:05:53<10:41:13, 11.04s/it][2022-12-20 15:28:28,829] [INFO] [timer.py:197:stop] 0/3070, RunningAvgSamplesPerSec=5.867086311420677, CurrSamplesPerSec=5.3414888926032935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1517/5000 [5:06:04<10:39:44, 11.02s/it][2022-12-20 15:28:39,013] [INFO] [timer.py:197:stop] 0/3072, RunningAvgSamplesPerSec=5.867379823670472, CurrSamplesPerSec=5.3178323542999415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1518/5000 [5:06:19<11:51:06, 12.25s/it][2022-12-20 15:28:50,007] [INFO] [timer.py:197:stop] 0/3074, RunningAvgSamplesPerSec=5.867392451654223, CurrSamplesPerSec=5.347825220773852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1519/5000 [5:06:30<11:29:22, 11.88s/it][2022-12-20 15:29:01,059] [INFO] [timer.py:197:stop] 0/3076, RunningAvgSamplesPerSec=5.8673846384449435, CurrSamplesPerSec=5.316255540434527, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1520/5000 [5:06:41<11:14:07, 11.62s/it][2022-12-20 15:29:12,152] [INFO] [timer.py:197:stop] 0/3078, RunningAvgSamplesPerSec=5.867362195003146, CurrSamplesPerSec=5.249550620378161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1521/5000 [5:06:52<11:04:32, 11.46s/it][2022-12-20 15:29:23,130] [INFO] [logging.py:68:log_dist] [Rank 0] step=1540, skipped=3, lr=[7.697777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:29:23,131] [INFO] [timer.py:197:stop] 0/3080, RunningAvgSamplesPerSec=5.867379931724516, CurrSamplesPerSec=5.3407003510788025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1522/5000 [5:07:03<10:56:27, 11.32s/it][2022-12-20 15:29:34,192] [INFO] [timer.py:197:stop] 0/3082, RunningAvgSamplesPerSec=5.867369149413616, CurrSamplesPerSec=5.293720185590196, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1523/5000 [5:07:14<10:52:15, 11.26s/it][2022-12-20 15:29:45,290] [INFO] [timer.py:197:stop] 0/3084, RunningAvgSamplesPerSec=5.867347823001926, CurrSamplesPerSec=5.29513303467783, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1524/5000 [5:07:25<10:48:20, 11.19s/it][2022-12-20 15:29:56,259] [INFO] [timer.py:197:stop] 0/3086, RunningAvgSamplesPerSec=5.867368933240322, CurrSamplesPerSec=5.353656778601285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 30%|███ | 1525/5000 [5:07:36<10:44:30, 11.13s/it] {'loss': 0.0004, 'learning_rate': 7.691111111111112e-06, 'epoch': 37.19} + 30%|███ | 1525/5000 [5:07:36<10:44:30, 11.13s/it][2022-12-20 15:30:07,287] [INFO] [timer.py:197:stop] 0/3088, RunningAvgSamplesPerSec=5.867374850148722, CurrSamplesPerSec=5.314342335044032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1526/5000 [5:07:47<10:42:46, 11.10s/it][2022-12-20 15:30:18,342] [INFO] [timer.py:197:stop] 0/3090, RunningAvgSamplesPerSec=5.867366324211938, CurrSamplesPerSec=5.29861605953942, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1527/5000 [5:07:58<10:41:40, 11.09s/it][2022-12-20 15:30:29,364] [INFO] [timer.py:197:stop] 0/3092, RunningAvgSamplesPerSec=5.867369688964089, CurrSamplesPerSec=5.3219747310963506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1528/5000 [5:08:09<10:40:43, 11.07s/it][2022-12-20 15:30:40,371] [INFO] [timer.py:197:stop] 0/3094, RunningAvgSamplesPerSec=5.8673812788436726, CurrSamplesPerSec=5.353147306653666, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1529/5000 [5:08:20<10:38:42, 11.04s/it][2022-12-20 15:30:51,422] [INFO] [timer.py:197:stop] 0/3096, RunningAvgSamplesPerSec=5.867374205248553, CurrSamplesPerSec=5.27852841795515, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1530/5000 [5:08:31<10:39:14, 11.05s/it][2022-12-20 15:31:02,415] [INFO] [timer.py:197:stop] 0/3098, RunningAvgSamplesPerSec=5.867387605115355, CurrSamplesPerSec=5.356932225917254, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1531/5000 [5:08:42<10:38:01, 11.04s/it][2022-12-20 15:31:13,446] [INFO] [logging.py:68:log_dist] [Rank 0] step=1550, skipped=3, lr=[7.675555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:31:13,448] [INFO] [timer.py:197:stop] 0/3100, RunningAvgSamplesPerSec=5.867387245830778, CurrSamplesPerSec=5.3225557487866615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1532/5000 [5:08:53<10:37:13, 11.02s/it][2022-12-20 15:31:24,606] [INFO] [timer.py:197:stop] 0/3102, RunningAvgSamplesPerSec=5.867342952325706, CurrSamplesPerSec=5.185365986521761, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1533/5000 [5:09:05<10:39:51, 11.07s/it][2022-12-20 15:31:35,638] [INFO] [timer.py:197:stop] 0/3104, RunningAvgSamplesPerSec=5.867342723093399, CurrSamplesPerSec=5.320308570436001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1534/5000 [5:09:16<10:38:19, 11.05s/it][2022-12-20 15:31:46,590] [INFO] [timer.py:197:stop] 0/3106, RunningAvgSamplesPerSec=5.867369856408551, CurrSamplesPerSec=5.358584182359116, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1535/5000 [5:09:27<10:36:53, 11.03s/it][2022-12-20 15:31:58,001] [INFO] [timer.py:197:stop] 0/3108, RunningAvgSamplesPerSec=5.867238432312703, CurrSamplesPerSec=4.996465448998019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1536/5000 [5:09:38<10:42:37, 11.13s/it][2022-12-20 15:32:08,955] [INFO] [timer.py:197:stop] 0/3110, RunningAvgSamplesPerSec=5.867264888302843, CurrSamplesPerSec=5.342848876581525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1537/5000 [5:09:49<10:40:55, 11.10s/it][2022-12-20 15:32:20,029] [INFO] [timer.py:197:stop] 0/3112, RunningAvgSamplesPerSec=5.867250022897016, CurrSamplesPerSec=5.315813585337739, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1538/5000 [5:10:00<10:39:28, 11.08s/it][2022-12-20 15:32:31,214] [INFO] [timer.py:197:stop] 0/3114, RunningAvgSamplesPerSec=5.86719718814154, CurrSamplesPerSec=5.183987472320314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1539/5000 [5:10:11<10:40:40, 11.11s/it][2022-12-20 15:32:42,178] [INFO] [timer.py:197:stop] 0/3116, RunningAvgSamplesPerSec=5.867220329416286, CurrSamplesPerSec=5.356111331902911, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1540/5000 [5:10:22<10:37:06, 11.05s/it][2022-12-20 15:32:53,132] [INFO] [timer.py:197:stop] 0/3118, RunningAvgSamplesPerSec=5.867246327999638, CurrSamplesPerSec=5.317448069815289, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1541/5000 [5:10:33<10:36:09, 11.03s/it][2022-12-20 15:33:04,488] [INFO] [logging.py:68:log_dist] [Rank 0] step=1560, skipped=3, lr=[7.653333333333333e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:33:04,490] [INFO] [timer.py:197:stop] 0/3120, RunningAvgSamplesPerSec=5.8671330803190855, CurrSamplesPerSec=5.022460492732595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1542/5000 [5:10:45<10:42:07, 11.14s/it][2022-12-20 15:33:15,521] [INFO] [timer.py:197:stop] 0/3122, RunningAvgSamplesPerSec=5.8671325232473075, CurrSamplesPerSec=5.322616327130317, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1543/5000 [5:10:56<10:39:46, 11.10s/it][2022-12-20 15:33:26,543] [INFO] [timer.py:197:stop] 0/3124, RunningAvgSamplesPerSec=5.867135794683141, CurrSamplesPerSec=5.317427424523699, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1544/5000 [5:11:07<10:40:53, 11.13s/it][2022-12-20 15:33:37,983] [INFO] [timer.py:197:stop] 0/3126, RunningAvgSamplesPerSec=5.867089641406597, CurrSamplesPerSec=5.320040959697411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1545/5000 [5:11:18<10:42:46, 11.16s/it][2022-12-20 15:33:48,980] [INFO] [timer.py:197:stop] 0/3128, RunningAvgSamplesPerSec=5.867101211428546, CurrSamplesPerSec=5.307833996520625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1546/5000 [5:11:29<10:40:28, 11.13s/it][2022-12-20 15:34:00,027] [INFO] [timer.py:197:stop] 0/3130, RunningAvgSamplesPerSec=5.867095764038956, CurrSamplesPerSec=5.3021901750277785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1547/5000 [5:11:40<10:39:10, 11.11s/it][2022-12-20 15:34:11,443] [INFO] [timer.py:197:stop] 0/3132, RunningAvgSamplesPerSec=5.86696327746274, CurrSamplesPerSec=5.008381138957689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1548/5000 [5:11:51<10:44:00, 11.19s/it][2022-12-20 15:34:22,446] [INFO] [timer.py:197:stop] 0/3134, RunningAvgSamplesPerSec=5.866972619652037, CurrSamplesPerSec=5.336698110458002, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1549/5000 [5:12:03<10:41:05, 11.15s/it][2022-12-20 15:34:33,530] [INFO] [timer.py:197:stop] 0/3136, RunningAvgSamplesPerSec=5.866954316963651, CurrSamplesPerSec=5.2938504744726345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1550/5000 [5:12:14<10:39:18, 11.12s/it] {'loss': 0.0004, 'learning_rate': 7.635555555555556e-06, 'epoch': 37.8} + 31%|███ | 1550/5000 [5:12:14<10:39:18, 11.12s/it][2022-12-20 15:34:44,898] [INFO] [timer.py:197:stop] 0/3138, RunningAvgSamplesPerSec=5.866839421662756, CurrSamplesPerSec=5.033475428352372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1551/5000 [5:12:25<10:42:50, 11.18s/it][2022-12-20 15:34:55,864] [INFO] [logging.py:68:log_dist] [Rank 0] step=1570, skipped=3, lr=[7.631111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:34:55,866] [INFO] [timer.py:197:stop] 0/3140, RunningAvgSamplesPerSec=5.866861278404587, CurrSamplesPerSec=5.338900118633625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1552/5000 [5:12:36<10:40:15, 11.14s/it][2022-12-20 15:35:06,899] [INFO] [timer.py:197:stop] 0/3142, RunningAvgSamplesPerSec=5.86686099948287, CurrSamplesPerSec=5.349256228768708, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1553/5000 [5:12:47<10:37:47, 11.10s/it][2022-12-20 15:35:17,967] [INFO] [timer.py:197:stop] 0/3144, RunningAvgSamplesPerSec=5.866848782413336, CurrSamplesPerSec=5.297122325696289, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1554/5000 [5:12:58<10:37:06, 11.09s/it][2022-12-20 15:35:29,003] [INFO] [timer.py:197:stop] 0/3146, RunningAvgSamplesPerSec=5.866847256798957, CurrSamplesPerSec=5.329595009044819, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1555/5000 [5:13:09<10:34:54, 11.06s/it][2022-12-20 15:35:40,015] [INFO] [timer.py:197:stop] 0/3148, RunningAvgSamplesPerSec=5.866853990259572, CurrSamplesPerSec=5.2978100119962575, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1556/5000 [5:13:20<10:35:10, 11.07s/it][2022-12-20 15:35:51,153] [INFO] [timer.py:197:stop] 0/3150, RunningAvgSamplesPerSec=5.8668173622743085, CurrSamplesPerSec=5.250824743970036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1557/5000 [5:13:31<10:35:24, 11.07s/it][2022-12-20 15:36:02,190] [INFO] [timer.py:197:stop] 0/3152, RunningAvgSamplesPerSec=5.866815552610913, CurrSamplesPerSec=5.297470223723035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1558/5000 [5:13:42<10:34:15, 11.06s/it][2022-12-20 15:36:12,277] [INFO] [timer.py:197:stop] 0/3154, RunningAvgSamplesPerSec=5.86713429495683, CurrSamplesPerSec=6.24804235631808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 15:36:23,356] [INFO] [timer.py:197:stop] 0/3156, RunningAvgSamplesPerSec=5.867117181613105, CurrSamplesPerSec=5.25697197048839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1559/5000 [5:13:58<12:03:29, 12.62s/it][2022-12-20 15:36:34,378] [INFO] [timer.py:197:stop] 0/3158, RunningAvgSamplesPerSec=5.867120232068444, CurrSamplesPerSec=5.307669855132011, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1560/5000 [5:14:09<11:35:53, 12.14s/it][2022-12-20 15:36:45,366] [INFO] [logging.py:68:log_dist] [Rank 0] step=1580, skipped=3, lr=[7.608888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:36:45,367] [INFO] [timer.py:197:stop] 0/3160, RunningAvgSamplesPerSec=5.867134621129926, CurrSamplesPerSec=5.361038331913165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1561/5000 [5:14:20<11:15:56, 11.79s/it][2022-12-20 15:36:56,393] [INFO] [timer.py:197:stop] 0/3162, RunningAvgSamplesPerSec=5.867136629268005, CurrSamplesPerSec=5.310088924328765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███ | 1562/5000 [5:14:31<11:02:33, 11.56s/it][2022-12-20 15:37:07,454] [INFO] [timer.py:197:stop] 0/3164, RunningAvgSamplesPerSec=5.867126930946481, CurrSamplesPerSec=5.301556215972514, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1563/5000 [5:14:43<10:53:44, 11.41s/it][2022-12-20 15:37:18,431] [INFO] [timer.py:197:stop] 0/3166, RunningAvgSamplesPerSec=5.8671455042925995, CurrSamplesPerSec=5.332084736214061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1564/5000 [5:14:54<10:46:03, 11.28s/it][2022-12-20 15:37:29,469] [INFO] [timer.py:197:stop] 0/3168, RunningAvgSamplesPerSec=5.867143193945456, CurrSamplesPerSec=5.3121220584991615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1565/5000 [5:15:05<10:41:41, 11.21s/it][2022-12-20 15:37:40,476] [INFO] [timer.py:197:stop] 0/3170, RunningAvgSamplesPerSec=5.867151760285315, CurrSamplesPerSec=5.347842480382645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1566/5000 [5:15:16<10:38:02, 11.15s/it][2022-12-20 15:37:51,524] [INFO] [timer.py:197:stop] 0/3172, RunningAvgSamplesPerSec=5.867146126950085, CurrSamplesPerSec=5.297617216008058, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1567/5000 [5:15:27<10:36:08, 11.12s/it][2022-12-20 15:38:02,476] [INFO] [timer.py:197:stop] 0/3174, RunningAvgSamplesPerSec=5.867173263021302, CurrSamplesPerSec=5.351576796108012, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1568/5000 [5:15:38<10:33:05, 11.07s/it][2022-12-20 15:38:13,454] [INFO] [timer.py:197:stop] 0/3176, RunningAvgSamplesPerSec=5.867191318574457, CurrSamplesPerSec=5.328544046966301, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1569/5000 [5:15:49<10:31:21, 11.04s/it][2022-12-20 15:38:24,456] [INFO] [timer.py:197:stop] 0/3178, RunningAvgSamplesPerSec=5.8672017471547875, CurrSamplesPerSec=5.342220680356176, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1570/5000 [5:16:00<10:30:31, 11.03s/it][2022-12-20 15:38:35,435] [INFO] [logging.py:68:log_dist] [Rank 0] step=1590, skipped=3, lr=[7.586666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:38:35,437] [INFO] [timer.py:197:stop] 0/3180, RunningAvgSamplesPerSec=5.867219361530512, CurrSamplesPerSec=5.301566686479741, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1571/5000 [5:16:11<10:29:29, 11.01s/it][2022-12-20 15:38:46,465] [INFO] [timer.py:197:stop] 0/3182, RunningAvgSamplesPerSec=5.8672202113364555, CurrSamplesPerSec=5.333258735852439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1572/5000 [5:16:22<10:29:32, 11.02s/it][2022-12-20 15:38:57,465] [INFO] [timer.py:197:stop] 0/3184, RunningAvgSamplesPerSec=5.867231058896733, CurrSamplesPerSec=5.351747718919474, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1573/5000 [5:16:33<10:29:01, 11.01s/it][2022-12-20 15:39:08,700] [INFO] [timer.py:197:stop] 0/3186, RunningAvgSamplesPerSec=5.86716228901257, CurrSamplesPerSec=5.154021646873344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 31%|███▏ | 1574/5000 [5:16:44<10:32:39, 11.08s/it][2022-12-20 15:39:19,708] [INFO] [timer.py:197:stop] 0/3188, RunningAvgSamplesPerSec=5.867170208009518, CurrSamplesPerSec=5.340776431998533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1575/5000 [5:16:55<10:31:14, 11.06s/it] {'loss': 0.0004, 'learning_rate': 7.5777777777777785e-06, 'epoch': 38.41} + 32%|███▏ | 1575/5000 [5:16:55<10:31:14, 11.06s/it][2022-12-20 15:39:30,710] [INFO] [timer.py:197:stop] 0/3190, RunningAvgSamplesPerSec=5.867180775106877, CurrSamplesPerSec=5.326864680265231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1576/5000 [5:17:06<10:30:05, 11.04s/it][2022-12-20 15:39:41,797] [INFO] [timer.py:197:stop] 0/3192, RunningAvgSamplesPerSec=5.86716179843869, CurrSamplesPerSec=5.296212445560194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1577/5000 [5:17:17<10:30:41, 11.06s/it][2022-12-20 15:39:52,752] [INFO] [timer.py:197:stop] 0/3194, RunningAvgSamplesPerSec=5.867187393996425, CurrSamplesPerSec=5.374365317034259, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1578/5000 [5:17:28<10:28:47, 11.03s/it][2022-12-20 15:40:03,775] [INFO] [timer.py:197:stop] 0/3196, RunningAvgSamplesPerSec=5.8671907073075715, CurrSamplesPerSec=5.30478078240873, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1579/5000 [5:17:39<10:28:34, 11.02s/it][2022-12-20 15:40:15,050] [INFO] [timer.py:197:stop] 0/3198, RunningAvgSamplesPerSec=5.867108892358042, CurrSamplesPerSec=5.105812858253662, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1580/5000 [5:17:50<10:32:40, 11.10s/it][2022-12-20 15:40:26,024] [INFO] [logging.py:68:log_dist] [Rank 0] step=1600, skipped=3, lr=[7.564444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:40:26,025] [INFO] [timer.py:197:stop] 0/3200, RunningAvgSamplesPerSec=5.867127992926814, CurrSamplesPerSec=5.364184537626203, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1581/5000 [5:18:01<10:30:21, 11.06s/it][2022-12-20 15:40:37,000] [INFO] [timer.py:197:stop] 0/3202, RunningAvgSamplesPerSec=5.8671467671042, CurrSamplesPerSec=5.350589242518427, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1582/5000 [5:18:12<10:28:41, 11.04s/it][2022-12-20 15:40:48,082] [INFO] [timer.py:197:stop] 0/3204, RunningAvgSamplesPerSec=5.867130520231536, CurrSamplesPerSec=5.282294791750996, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1583/5000 [5:18:23<10:29:16, 11.05s/it][2022-12-20 15:40:59,129] [INFO] [timer.py:197:stop] 0/3206, RunningAvgSamplesPerSec=5.867130480542053, CurrSamplesPerSec=5.326724305164637, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1584/5000 [5:18:34<10:29:03, 11.05s/it][2022-12-20 15:41:10,137] [INFO] [timer.py:197:stop] 0/3208, RunningAvgSamplesPerSec=5.86714413135889, CurrSamplesPerSec=5.347153887136331, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1585/5000 [5:18:45<10:28:10, 11.04s/it][2022-12-20 15:41:21,452] [INFO] [timer.py:197:stop] 0/3210, RunningAvgSamplesPerSec=5.8670514666614375, CurrSamplesPerSec=5.084840765489478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1586/5000 [5:18:57<10:33:02, 11.13s/it][2022-12-20 15:41:32,564] [INFO] [timer.py:197:stop] 0/3212, RunningAvgSamplesPerSec=5.867064017678457, CurrSamplesPerSec=5.312880942030667, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1587/5000 [5:19:08<10:32:19, 11.12s/it][2022-12-20 15:41:43,598] [INFO] [timer.py:197:stop] 0/3214, RunningAvgSamplesPerSec=5.867068841035827, CurrSamplesPerSec=5.304525842572113, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1588/5000 [5:19:19<10:30:44, 11.09s/it][2022-12-20 15:41:54,773] [INFO] [timer.py:197:stop] 0/3216, RunningAvgSamplesPerSec=5.86702143610355, CurrSamplesPerSec=5.152992685137411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1589/5000 [5:19:30<10:32:10, 11.12s/it][2022-12-20 15:42:05,891] [INFO] [timer.py:197:stop] 0/3218, RunningAvgSamplesPerSec=5.867028760220661, CurrSamplesPerSec=5.319026645239607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1590/5000 [5:19:41<10:31:45, 11.12s/it][2022-12-20 15:42:16,922] [INFO] [logging.py:68:log_dist] [Rank 0] step=1610, skipped=3, lr=[7.542222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:42:16,924] [INFO] [timer.py:197:stop] 0/3220, RunningAvgSamplesPerSec=5.867028295885633, CurrSamplesPerSec=5.311424766522087, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1591/5000 [5:19:52<10:30:09, 11.09s/it][2022-12-20 15:42:28,112] [INFO] [timer.py:197:stop] 0/3222, RunningAvgSamplesPerSec=5.866980144467691, CurrSamplesPerSec=5.153907055611606, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1592/5000 [5:20:03<10:31:52, 11.12s/it][2022-12-20 15:42:39,277] [INFO] [timer.py:197:stop] 0/3224, RunningAvgSamplesPerSec=5.866989059831445, CurrSamplesPerSec=5.2976627999099435, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1593/5000 [5:20:14<10:32:07, 11.13s/it][2022-12-20 15:42:50,346] [INFO] [timer.py:197:stop] 0/3226, RunningAvgSamplesPerSec=5.866982210521485, CurrSamplesPerSec=5.3007815131898175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1594/5000 [5:20:25<10:30:51, 11.11s/it][2022-12-20 15:43:01,407] [INFO] [timer.py:197:stop] 0/3228, RunningAvgSamplesPerSec=5.8669723317190465, CurrSamplesPerSec=5.268118402867236, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1595/5000 [5:20:37<10:29:47, 11.10s/it][2022-12-20 15:43:12,752] [INFO] [timer.py:197:stop] 0/3230, RunningAvgSamplesPerSec=5.866945840496718, CurrSamplesPerSec=5.28435038458257, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1596/5000 [5:20:48<10:33:49, 11.17s/it][2022-12-20 15:43:23,814] [INFO] [timer.py:197:stop] 0/3232, RunningAvgSamplesPerSec=5.866935915107837, CurrSamplesPerSec=5.305842736223007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1597/5000 [5:20:59<10:31:45, 11.14s/it][2022-12-20 15:43:34,849] [INFO] [timer.py:197:stop] 0/3234, RunningAvgSamplesPerSec=5.86693479579333, CurrSamplesPerSec=5.278347816918693, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1598/5000 [5:21:10<10:29:48, 11.11s/it][2022-12-20 15:43:45,857] [INFO] [timer.py:197:stop] 0/3236, RunningAvgSamplesPerSec=5.866943393858082, CurrSamplesPerSec=5.335462784298479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1599/5000 [5:21:21<10:27:55, 11.08s/it][2022-12-20 15:43:55,988] [INFO] [timer.py:197:stop] 0/3238, RunningAvgSamplesPerSec=5.867239463671098, CurrSamplesPerSec=5.295807458195084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1600/5000 [5:21:36<11:34:58, 12.26s/it] {'loss': 0.0003, 'learning_rate': 7.5222222222222226e-06, 'epoch': 39.02} + 32%|███▏ | 1600/5000 [5:21:36<11:34:58, 12.26s/it][2022-12-20 15:44:07,093] [INFO] [logging.py:68:log_dist] [Rank 0] step=1620, skipped=3, lr=[7.520000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:44:07,094] [INFO] [timer.py:197:stop] 0/3240, RunningAvgSamplesPerSec=5.867214607599297, CurrSamplesPerSec=5.212364659041231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1601/5000 [5:21:47<11:16:38, 11.94s/it][2022-12-20 15:44:18,143] [INFO] [timer.py:197:stop] 0/3242, RunningAvgSamplesPerSec=5.867217130881242, CurrSamplesPerSec=5.3400947562580665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1602/5000 [5:21:58<10:59:51, 11.65s/it][2022-12-20 15:44:29,170] [INFO] [timer.py:197:stop] 0/3244, RunningAvgSamplesPerSec=5.867218672121656, CurrSamplesPerSec=5.288177787275803, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1603/5000 [5:22:09<10:49:38, 11.47s/it][2022-12-20 15:44:40,366] [INFO] [timer.py:197:stop] 0/3246, RunningAvgSamplesPerSec=5.867165421614602, CurrSamplesPerSec=5.177446780408352, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1604/5000 [5:22:21<10:49:42, 11.48s/it][2022-12-20 15:44:51,705] [INFO] [timer.py:197:stop] 0/3248, RunningAvgSamplesPerSec=5.867153315118779, CurrSamplesPerSec=5.3017388275477355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1605/5000 [5:22:32<10:42:11, 11.35s/it][2022-12-20 15:45:02,750] [INFO] [timer.py:197:stop] 0/3250, RunningAvgSamplesPerSec=5.867148549917278, CurrSamplesPerSec=5.303427742952233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1606/5000 [5:22:43<10:36:48, 11.26s/it][2022-12-20 15:45:14,057] [INFO] [timer.py:197:stop] 0/3252, RunningAvgSamplesPerSec=5.867059051494024, CurrSamplesPerSec=5.0872456552056535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1607/5000 [5:22:54<10:38:22, 11.29s/it][2022-12-20 15:45:25,104] [INFO] [timer.py:197:stop] 0/3254, RunningAvgSamplesPerSec=5.86707599163612, CurrSamplesPerSec=5.34845196515103, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1608/5000 [5:23:05<10:32:54, 11.20s/it][2022-12-20 15:45:36,104] [INFO] [timer.py:197:stop] 0/3256, RunningAvgSamplesPerSec=5.86708635248154, CurrSamplesPerSec=5.329766858661181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1609/5000 [5:23:16<10:28:59, 11.13s/it][2022-12-20 15:45:47,465] [INFO] [timer.py:197:stop] 0/3258, RunningAvgSamplesPerSec=5.866977081469949, CurrSamplesPerSec=5.007083340996541, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1610/5000 [5:23:27<10:33:02, 11.20s/it][2022-12-20 15:45:58,417] [INFO] [logging.py:68:log_dist] [Rank 0] step=1630, skipped=3, lr=[7.4977777777777785e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:45:58,419] [INFO] [timer.py:197:stop] 0/3260, RunningAvgSamplesPerSec=5.867002709649023, CurrSamplesPerSec=5.363881412173787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1611/5000 [5:23:38<10:29:02, 11.14s/it][2022-12-20 15:46:09,454] [INFO] [timer.py:197:stop] 0/3262, RunningAvgSamplesPerSec=5.867001103606801, CurrSamplesPerSec=5.314345912204189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1612/5000 [5:23:50<10:27:24, 11.11s/it][2022-12-20 15:46:20,459] [INFO] [timer.py:197:stop] 0/3264, RunningAvgSamplesPerSec=5.867010073049012, CurrSamplesPerSec=5.356330853650448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1613/5000 [5:24:01<10:26:06, 11.09s/it][2022-12-20 15:46:31,534] [INFO] [timer.py:197:stop] 0/3266, RunningAvgSamplesPerSec=5.866995595406668, CurrSamplesPerSec=5.328680075274538, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1614/5000 [5:24:12<10:24:59, 11.07s/it][2022-12-20 15:46:42,595] [INFO] [timer.py:197:stop] 0/3268, RunningAvgSamplesPerSec=5.866985727428108, CurrSamplesPerSec=5.307075714612886, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1615/5000 [5:24:23<10:24:07, 11.06s/it][2022-12-20 15:46:53,611] [INFO] [timer.py:197:stop] 0/3270, RunningAvgSamplesPerSec=5.866990808636021, CurrSamplesPerSec=5.323398903762646, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1616/5000 [5:24:34<10:23:43, 11.06s/it][2022-12-20 15:47:04,619] [INFO] [timer.py:197:stop] 0/3272, RunningAvgSamplesPerSec=5.866998901489133, CurrSamplesPerSec=5.361418234571171, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1617/5000 [5:24:45<10:22:21, 11.04s/it][2022-12-20 15:47:15,673] [INFO] [timer.py:197:stop] 0/3274, RunningAvgSamplesPerSec=5.86699155327955, CurrSamplesPerSec=5.3029602611430136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1618/5000 [5:24:56<10:22:22, 11.04s/it][2022-12-20 15:47:26,676] [INFO] [timer.py:197:stop] 0/3276, RunningAvgSamplesPerSec=5.8670009866415755, CurrSamplesPerSec=5.344717538126158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1619/5000 [5:25:07<10:21:26, 11.03s/it][2022-12-20 15:47:37,698] [INFO] [timer.py:197:stop] 0/3278, RunningAvgSamplesPerSec=5.8670042558495545, CurrSamplesPerSec=5.322593108786849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1620/5000 [5:25:18<10:20:45, 11.02s/it][2022-12-20 15:47:48,690] [INFO] [logging.py:68:log_dist] [Rank 0] step=1640, skipped=3, lr=[7.475555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:47:48,691] [INFO] [timer.py:197:stop] 0/3280, RunningAvgSamplesPerSec=5.867016854887844, CurrSamplesPerSec=5.3272296055775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1621/5000 [5:25:29<10:20:25, 11.02s/it][2022-12-20 15:47:59,677] [INFO] [timer.py:197:stop] 0/3282, RunningAvgSamplesPerSec=5.867031804107175, CurrSamplesPerSec=5.349545336749256, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1622/5000 [5:25:40<10:19:27, 11.00s/it][2022-12-20 15:48:10,639] [INFO] [timer.py:197:stop] 0/3284, RunningAvgSamplesPerSec=5.867054659997944, CurrSamplesPerSec=5.357792938355677, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1623/5000 [5:25:51<10:19:35, 11.01s/it][2022-12-20 15:48:21,705] [INFO] [timer.py:197:stop] 0/3286, RunningAvgSamplesPerSec=5.867043729721724, CurrSamplesPerSec=5.318710686565158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▏ | 1624/5000 [5:26:02<10:19:40, 11.01s/it][2022-12-20 15:48:32,687] [INFO] [timer.py:197:stop] 0/3288, RunningAvgSamplesPerSec=5.867060091493495, CurrSamplesPerSec=5.356515120458578, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 32%|███▎ | 1625/5000 [5:26:13<10:19:01, 11.00s/it] {'loss': 0.0003, 'learning_rate': 7.4666666666666675e-06, 'epoch': 39.63} + 32%|███▎ | 1625/5000 [5:26:13<10:19:01, 11.00s/it][2022-12-20 15:48:43,737] [INFO] [timer.py:197:stop] 0/3290, RunningAvgSamplesPerSec=5.867054133980714, CurrSamplesPerSec=5.298373006405052, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1626/5000 [5:26:24<10:19:46, 11.02s/it][2022-12-20 15:48:54,786] [INFO] [timer.py:197:stop] 0/3292, RunningAvgSamplesPerSec=5.867048637020643, CurrSamplesPerSec=5.30793937140706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1627/5000 [5:26:35<10:20:09, 11.03s/it][2022-12-20 15:49:05,820] [INFO] [timer.py:197:stop] 0/3294, RunningAvgSamplesPerSec=5.867047904217109, CurrSamplesPerSec=5.326971657824347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1628/5000 [5:26:46<10:21:55, 11.07s/it][2022-12-20 15:49:16,989] [INFO] [timer.py:197:stop] 0/3296, RunningAvgSamplesPerSec=5.867003141429918, CurrSamplesPerSec=5.307828958782243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1629/5000 [5:26:57<10:21:15, 11.06s/it][2022-12-20 15:49:28,023] [INFO] [timer.py:197:stop] 0/3298, RunningAvgSamplesPerSec=5.867002203236517, CurrSamplesPerSec=5.310883372103201, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1630/5000 [5:27:08<10:21:13, 11.06s/it][2022-12-20 15:49:39,060] [INFO] [logging.py:68:log_dist] [Rank 0] step=1650, skipped=3, lr=[7.453333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:49:39,061] [INFO] [timer.py:197:stop] 0/3300, RunningAvgSamplesPerSec=5.866999972675358, CurrSamplesPerSec=5.335414850831249, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1631/5000 [5:27:19<10:19:46, 11.04s/it][2022-12-20 15:49:50,063] [INFO] [timer.py:197:stop] 0/3302, RunningAvgSamplesPerSec=5.867009506783383, CurrSamplesPerSec=5.321571280188643, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1632/5000 [5:27:30<10:19:49, 11.04s/it][2022-12-20 15:50:01,131] [INFO] [timer.py:197:stop] 0/3304, RunningAvgSamplesPerSec=5.866998024627807, CurrSamplesPerSec=5.307377070572465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1633/5000 [5:27:41<10:20:17, 11.05s/it][2022-12-20 15:50:12,227] [INFO] [timer.py:197:stop] 0/3306, RunningAvgSamplesPerSec=5.8669770335560925, CurrSamplesPerSec=5.29417956597485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1634/5000 [5:27:52<10:23:34, 11.12s/it][2022-12-20 15:50:23,463] [INFO] [timer.py:197:stop] 0/3308, RunningAvgSamplesPerSec=5.86691056327345, CurrSamplesPerSec=5.31471859450834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1635/5000 [5:28:03<10:21:14, 11.08s/it][2022-12-20 15:50:34,421] [INFO] [timer.py:197:stop] 0/3310, RunningAvgSamplesPerSec=5.866934121566561, CurrSamplesPerSec=5.339968554842867, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1636/5000 [5:28:14<10:19:31, 11.05s/it][2022-12-20 15:50:45,449] [INFO] [timer.py:197:stop] 0/3312, RunningAvgSamplesPerSec=5.86693569096524, CurrSamplesPerSec=5.304163182564428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1637/5000 [5:28:26<10:24:54, 11.15s/it][2022-12-20 15:50:56,802] [INFO] [timer.py:197:stop] 0/3314, RunningAvgSamplesPerSec=5.866831364847706, CurrSamplesPerSec=5.328584241233403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1638/5000 [5:28:37<10:22:25, 11.11s/it][2022-12-20 15:51:07,812] [INFO] [timer.py:197:stop] 0/3316, RunningAvgSamplesPerSec=5.866838339461951, CurrSamplesPerSec=5.330329256858428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1639/5000 [5:28:48<10:19:52, 11.07s/it][2022-12-20 15:51:18,799] [INFO] [timer.py:197:stop] 0/3318, RunningAvgSamplesPerSec=5.866852634178701, CurrSamplesPerSec=5.312430506200632, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1640/5000 [5:28:59<10:24:12, 11.15s/it][2022-12-20 15:51:29,210] [INFO] [logging.py:68:log_dist] [Rank 0] step=1660, skipped=3, lr=[7.431111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:51:29,212] [INFO] [timer.py:197:stop] 0/3320, RunningAvgSamplesPerSec=5.867049929712862, CurrSamplesPerSec=6.260503581192741, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 15:51:40,187] [INFO] [timer.py:197:stop] 0/3322, RunningAvgSamplesPerSec=5.867067046209449, CurrSamplesPerSec=5.342708295807623, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1641/5000 [5:29:15<11:47:53, 12.64s/it][2022-12-20 15:51:51,207] [INFO] [timer.py:197:stop] 0/3324, RunningAvgSamplesPerSec=5.8670708194298005, CurrSamplesPerSec=5.32413714752481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1642/5000 [5:29:26<11:20:23, 12.16s/it][2022-12-20 15:52:02,373] [INFO] [timer.py:197:stop] 0/3326, RunningAvgSamplesPerSec=5.867027155249485, CurrSamplesPerSec=5.305424950110097, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1643/5000 [5:29:37<11:03:33, 11.86s/it][2022-12-20 15:52:13,374] [INFO] [timer.py:197:stop] 0/3328, RunningAvgSamplesPerSec=5.86703695720371, CurrSamplesPerSec=5.3382299622077785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1644/5000 [5:29:48<10:48:57, 11.60s/it][2022-12-20 15:52:24,424] [INFO] [timer.py:197:stop] 0/3330, RunningAvgSamplesPerSec=5.867031115838578, CurrSamplesPerSec=5.296681664714217, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1645/5000 [5:30:00<10:39:29, 11.44s/it][2022-12-20 15:52:35,800] [INFO] [timer.py:197:stop] 0/3332, RunningAvgSamplesPerSec=5.866919742317112, CurrSamplesPerSec=5.30389048651986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1646/5000 [5:30:11<10:38:16, 11.42s/it][2022-12-20 15:52:46,838] [INFO] [timer.py:197:stop] 0/3334, RunningAvgSamplesPerSec=5.8669172277168204, CurrSamplesPerSec=5.307308444026384, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1647/5000 [5:30:22<10:31:43, 11.30s/it][2022-12-20 15:52:57,861] [INFO] [timer.py:197:stop] 0/3336, RunningAvgSamplesPerSec=5.866919954219362, CurrSamplesPerSec=5.3232327426792905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1648/5000 [5:30:33<10:26:48, 11.22s/it][2022-12-20 15:53:08,974] [INFO] [timer.py:197:stop] 0/3338, RunningAvgSamplesPerSec=5.866893396781395, CurrSamplesPerSec=5.347613639809135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1649/5000 [5:30:44<10:24:50, 11.19s/it][2022-12-20 15:53:20,009] [INFO] [logging.py:68:log_dist] [Rank 0] step=1670, skipped=3, lr=[7.40888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:53:20,010] [INFO] [timer.py:197:stop] 0/3340, RunningAvgSamplesPerSec=5.86689157467742, CurrSamplesPerSec=5.296723051952293, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1650/5000 [5:30:55<10:22:06, 11.14s/it] {'loss': 0.0003, 'learning_rate': 7.40888888888889e-06, 'epoch': 40.24} + 33%|███▎ | 1650/5000 [5:30:55<10:22:06, 11.14s/it][2022-12-20 15:53:30,986] [INFO] [timer.py:197:stop] 0/3342, RunningAvgSamplesPerSec=5.866910007618742, CurrSamplesPerSec=5.352008498600626, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1651/5000 [5:31:06<10:19:08, 11.09s/it][2022-12-20 15:53:42,174] [INFO] [timer.py:197:stop] 0/3344, RunningAvgSamplesPerSec=5.866859783396251, CurrSamplesPerSec=5.302444890400259, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1652/5000 [5:31:17<10:20:33, 11.12s/it][2022-12-20 15:53:53,226] [INFO] [timer.py:197:stop] 0/3346, RunningAvgSamplesPerSec=5.866853200118688, CurrSamplesPerSec=5.309491511651904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1653/5000 [5:31:28<10:19:12, 11.10s/it][2022-12-20 15:54:04,203] [INFO] [timer.py:197:stop] 0/3348, RunningAvgSamplesPerSec=5.866870879257763, CurrSamplesPerSec=5.344957837755523, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1654/5000 [5:31:39<10:16:57, 11.06s/it][2022-12-20 15:54:15,227] [INFO] [timer.py:197:stop] 0/3350, RunningAvgSamplesPerSec=5.866873430967599, CurrSamplesPerSec=5.364052050010181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1655/5000 [5:31:50<10:16:07, 11.05s/it][2022-12-20 15:54:26,293] [INFO] [timer.py:197:stop] 0/3352, RunningAvgSamplesPerSec=5.866862451849964, CurrSamplesPerSec=5.289379013554706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1656/5000 [5:32:01<10:16:11, 11.06s/it][2022-12-20 15:54:37,252] [INFO] [timer.py:197:stop] 0/3354, RunningAvgSamplesPerSec=5.86688604176015, CurrSamplesPerSec=5.345163886884828, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1657/5000 [5:32:12<10:14:23, 11.03s/it][2022-12-20 15:54:48,526] [INFO] [timer.py:197:stop] 0/3356, RunningAvgSamplesPerSec=5.866808674888217, CurrSamplesPerSec=5.318992707918792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1658/5000 [5:32:24<10:18:19, 11.10s/it][2022-12-20 15:54:59,570] [INFO] [timer.py:197:stop] 0/3358, RunningAvgSamplesPerSec=5.866805271501486, CurrSamplesPerSec=5.3131995737175775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1659/5000 [5:32:35<10:17:10, 11.08s/it][2022-12-20 15:55:10,556] [INFO] [logging.py:68:log_dist] [Rank 0] step=1680, skipped=3, lr=[7.386666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:55:10,558] [INFO] [timer.py:197:stop] 0/3360, RunningAvgSamplesPerSec=5.866819183279674, CurrSamplesPerSec=5.3356748896233634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1660/5000 [5:32:46<10:15:24, 11.06s/it][2022-12-20 15:55:21,791] [INFO] [timer.py:197:stop] 0/3362, RunningAvgSamplesPerSec=5.866754658964823, CurrSamplesPerSec=5.2952943124057175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1661/5000 [5:32:57<10:18:11, 11.11s/it][2022-12-20 15:55:32,774] [INFO] [timer.py:197:stop] 0/3364, RunningAvgSamplesPerSec=5.866770100969544, CurrSamplesPerSec=5.332643809995742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1662/5000 [5:33:08<10:15:54, 11.07s/it][2022-12-20 15:55:43,828] [INFO] [timer.py:197:stop] 0/3366, RunningAvgSamplesPerSec=5.866763078028928, CurrSamplesPerSec=5.297346237995616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1663/5000 [5:33:19<10:15:26, 11.07s/it][2022-12-20 15:55:54,886] [INFO] [timer.py:197:stop] 0/3368, RunningAvgSamplesPerSec=5.866759937331425, CurrSamplesPerSec=5.323342108019666, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1664/5000 [5:33:30<10:15:07, 11.06s/it][2022-12-20 15:56:05,862] [INFO] [timer.py:197:stop] 0/3370, RunningAvgSamplesPerSec=5.866777545837157, CurrSamplesPerSec=5.327645969034905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1665/5000 [5:33:41<10:13:29, 11.04s/it][2022-12-20 15:56:16,908] [INFO] [timer.py:197:stop] 0/3372, RunningAvgSamplesPerSec=5.866772870137784, CurrSamplesPerSec=5.3051374457817, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1666/5000 [5:33:52<10:13:27, 11.04s/it][2022-12-20 15:56:27,989] [INFO] [timer.py:197:stop] 0/3374, RunningAvgSamplesPerSec=5.866762662477689, CurrSamplesPerSec=5.304680145335362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1667/5000 [5:34:03<10:13:56, 11.05s/it][2022-12-20 15:56:38,977] [INFO] [timer.py:197:stop] 0/3376, RunningAvgSamplesPerSec=5.8667765699747125, CurrSamplesPerSec=5.360375879657484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1668/5000 [5:34:14<10:12:41, 11.03s/it][2022-12-20 15:56:49,944] [INFO] [timer.py:197:stop] 0/3378, RunningAvgSamplesPerSec=5.866797242380901, CurrSamplesPerSec=5.3272900789112745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1669/5000 [5:34:25<10:11:24, 11.01s/it][2022-12-20 15:57:01,048] [INFO] [logging.py:68:log_dist] [Rank 0] step=1690, skipped=3, lr=[7.364444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:57:01,049] [INFO] [timer.py:197:stop] 0/3380, RunningAvgSamplesPerSec=5.866773808021009, CurrSamplesPerSec=5.299749835095702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1670/5000 [5:34:36<10:12:45, 11.04s/it][2022-12-20 15:57:12,043] [INFO] [timer.py:197:stop] 0/3382, RunningAvgSamplesPerSec=5.86678574973704, CurrSamplesPerSec=5.361083514808536, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1671/5000 [5:34:47<10:11:48, 11.03s/it][2022-12-20 15:57:23,094] [INFO] [timer.py:197:stop] 0/3384, RunningAvgSamplesPerSec=5.8667798347917115, CurrSamplesPerSec=5.300819196193898, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1672/5000 [5:34:58<10:12:00, 11.03s/it][2022-12-20 15:57:34,169] [INFO] [timer.py:197:stop] 0/3386, RunningAvgSamplesPerSec=5.86676600368796, CurrSamplesPerSec=5.335087187143136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1673/5000 [5:35:09<10:12:31, 11.05s/it][2022-12-20 15:57:45,186] [INFO] [timer.py:197:stop] 0/3388, RunningAvgSamplesPerSec=5.866770974975621, CurrSamplesPerSec=5.344950387929622, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 33%|███▎ | 1674/5000 [5:35:20<10:11:50, 11.04s/it][2022-12-20 15:57:56,229] [INFO] [timer.py:197:stop] 0/3390, RunningAvgSamplesPerSec=5.866767455651793, CurrSamplesPerSec=5.303629345032969, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1675/5000 [5:35:31<10:11:45, 11.04s/it] {'loss': 0.0003, 'learning_rate': 7.353333333333334e-06, 'epoch': 40.84} + 34%|███▎ | 1675/5000 [5:35:31<10:11:45, 11.04s/it][2022-12-20 15:58:07,248] [INFO] [timer.py:197:stop] 0/3392, RunningAvgSamplesPerSec=5.866771905898879, CurrSamplesPerSec=5.333319134220203, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1676/5000 [5:35:42<10:11:13, 11.03s/it][2022-12-20 15:58:18,253] [INFO] [timer.py:197:stop] 0/3394, RunningAvgSamplesPerSec=5.866780427745965, CurrSamplesPerSec=5.350512881771158, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1677/5000 [5:35:53<10:10:34, 11.02s/it][2022-12-20 15:58:29,258] [INFO] [timer.py:197:stop] 0/3396, RunningAvgSamplesPerSec=5.866788779014053, CurrSamplesPerSec=5.3393452838247, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1678/5000 [5:36:04<10:10:04, 11.02s/it][2022-12-20 15:58:40,297] [INFO] [timer.py:197:stop] 0/3398, RunningAvgSamplesPerSec=5.866786278412618, CurrSamplesPerSec=5.344037622152068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1679/5000 [5:36:15<10:10:13, 11.02s/it][2022-12-20 15:58:51,283] [INFO] [logging.py:68:log_dist] [Rank 0] step=1700, skipped=3, lr=[7.342222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-20 15:58:51,284] [INFO] [timer.py:197:stop] 0/3400, RunningAvgSamplesPerSec=5.866800303620241, CurrSamplesPerSec=5.35636655168229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1680/5000 [5:36:26<10:09:24, 11.01s/it][2022-12-20 15:59:02,316] [INFO] [timer.py:197:stop] 0/3402, RunningAvgSamplesPerSec=5.866799861282996, CurrSamplesPerSec=5.322659387139845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1681/5000 [5:36:37<10:09:32, 11.02s/it][2022-12-20 15:59:12,454] [INFO] [timer.py:197:stop] 0/3404, RunningAvgSamplesPerSec=5.867079523543837, CurrSamplesPerSec=5.329056251916241, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1682/5000 [5:36:53<11:16:56, 12.24s/it][2022-12-20 15:59:23,497] [INFO] [timer.py:197:stop] 0/3406, RunningAvgSamplesPerSec=5.867075656315859, CurrSamplesPerSec=5.311733974108202, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1683/5000 [5:37:04<10:56:35, 11.88s/it][2022-12-20 15:59:34,504] [INFO] [timer.py:197:stop] 0/3408, RunningAvgSamplesPerSec=5.867082976361793, CurrSamplesPerSec=5.328866464373828, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1684/5000 [5:37:15<10:42:04, 11.62s/it][2022-12-20 15:59:45,475] [INFO] [timer.py:197:stop] 0/3410, RunningAvgSamplesPerSec=5.86710159900228, CurrSamplesPerSec=5.36688008726626, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1685/5000 [5:37:26<10:31:12, 11.42s/it][2022-12-20 15:59:56,476] [INFO] [timer.py:197:stop] 0/3412, RunningAvgSamplesPerSec=5.8671108087659904, CurrSamplesPerSec=5.342346988245683, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1686/5000 [5:37:36<10:23:19, 11.29s/it][2022-12-20 16:00:07,467] [INFO] [timer.py:197:stop] 0/3414, RunningAvgSamplesPerSec=5.867123191414505, CurrSamplesPerSec=5.314206827347002, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▎ | 1687/5000 [5:37:48<10:19:03, 11.21s/it][2022-12-20 16:00:18,514] [INFO] [timer.py:197:stop] 0/3416, RunningAvgSamplesPerSec=5.867117852702607, CurrSamplesPerSec=5.307745417681593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1688/5000 [5:37:59<10:16:06, 11.16s/it][2022-12-20 16:00:29,511] [INFO] [timer.py:197:stop] 0/3418, RunningAvgSamplesPerSec=5.867128167001346, CurrSamplesPerSec=5.34911914792693, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1689/5000 [5:38:09<10:11:59, 11.09s/it][2022-12-20 16:00:40,519] [INFO] [logging.py:68:log_dist] [Rank 0] step=1710, skipped=3, lr=[7.32e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:00:40,520] [INFO] [timer.py:197:stop] 0/3420, RunningAvgSamplesPerSec=5.867135298763588, CurrSamplesPerSec=5.275708508357985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1690/5000 [5:38:21<10:12:10, 11.10s/it][2022-12-20 16:00:51,574] [INFO] [timer.py:197:stop] 0/3422, RunningAvgSamplesPerSec=5.867132899307006, CurrSamplesPerSec=5.32589425607541, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1691/5000 [5:38:32<10:10:25, 11.07s/it][2022-12-20 16:01:02,597] [INFO] [timer.py:197:stop] 0/3424, RunningAvgSamplesPerSec=5.867136115388486, CurrSamplesPerSec=5.309805955786789, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1692/5000 [5:38:43<10:10:03, 11.07s/it][2022-12-20 16:01:13,684] [INFO] [timer.py:197:stop] 0/3426, RunningAvgSamplesPerSec=5.867117980474201, CurrSamplesPerSec=5.280994546696535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1693/5000 [5:38:54<10:09:47, 11.06s/it][2022-12-20 16:01:24,696] [INFO] [timer.py:197:stop] 0/3428, RunningAvgSamplesPerSec=5.867123741437511, CurrSamplesPerSec=5.323413050102763, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1694/5000 [5:39:05<10:09:17, 11.06s/it][2022-12-20 16:01:35,741] [INFO] [timer.py:197:stop] 0/3430, RunningAvgSamplesPerSec=5.867119110127389, CurrSamplesPerSec=5.3240250040629, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1695/5000 [5:39:16<10:07:56, 11.04s/it][2022-12-20 16:01:46,836] [INFO] [timer.py:197:stop] 0/3432, RunningAvgSamplesPerSec=5.867099109953734, CurrSamplesPerSec=5.230543069489109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1696/5000 [5:39:27<10:08:59, 11.06s/it][2022-12-20 16:01:57,811] [INFO] [timer.py:197:stop] 0/3434, RunningAvgSamplesPerSec=5.867116230895215, CurrSamplesPerSec=5.3497376662982665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1697/5000 [5:39:38<10:07:22, 11.03s/it][2022-12-20 16:02:08,843] [INFO] [timer.py:197:stop] 0/3436, RunningAvgSamplesPerSec=5.867115586251071, CurrSamplesPerSec=5.2971902709387795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1698/5000 [5:39:49<10:07:35, 11.04s/it][2022-12-20 16:02:19,912] [INFO] [timer.py:197:stop] 0/3438, RunningAvgSamplesPerSec=5.867103056385145, CurrSamplesPerSec=5.286448180075338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1699/5000 [5:40:00<10:07:48, 11.05s/it][2022-12-20 16:02:30,928] [INFO] [logging.py:68:log_dist] [Rank 0] step=1720, skipped=3, lr=[7.297777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:02:30,930] [INFO] [timer.py:197:stop] 0/3440, RunningAvgSamplesPerSec=5.867106655356168, CurrSamplesPerSec=5.327902713120213, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1700/5000 [5:40:11<10:07:06, 11.04s/it] {'loss': 0.0003, 'learning_rate': 7.297777777777778e-06, 'epoch': 41.46} + 34%|███▍ | 1700/5000 [5:40:11<10:07:06, 11.04s/it][2022-12-20 16:02:41,971] [INFO] [timer.py:197:stop] 0/3442, RunningAvgSamplesPerSec=5.867103893097264, CurrSamplesPerSec=5.3084610597104795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1701/5000 [5:40:22<10:06:56, 11.04s/it][2022-12-20 16:02:53,070] [INFO] [timer.py:197:stop] 0/3444, RunningAvgSamplesPerSec=5.8670821997854725, CurrSamplesPerSec=5.2537850929094505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1702/5000 [5:40:33<10:08:01, 11.06s/it][2022-12-20 16:03:04,157] [INFO] [timer.py:197:stop] 0/3446, RunningAvgSamplesPerSec=5.867065006970918, CurrSamplesPerSec=5.280127172099811, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1703/5000 [5:40:44<10:07:48, 11.06s/it][2022-12-20 16:03:15,219] [INFO] [timer.py:197:stop] 0/3448, RunningAvgSamplesPerSec=5.867056278394256, CurrSamplesPerSec=5.279830979608147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1704/5000 [5:40:55<10:07:48, 11.06s/it][2022-12-20 16:03:26,284] [INFO] [timer.py:197:stop] 0/3450, RunningAvgSamplesPerSec=5.867045188108756, CurrSamplesPerSec=5.282643239808394, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1705/5000 [5:41:06<10:07:00, 11.05s/it][2022-12-20 16:03:37,235] [INFO] [timer.py:197:stop] 0/3452, RunningAvgSamplesPerSec=5.867069530550082, CurrSamplesPerSec=5.350043033856596, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1706/5000 [5:41:17<10:05:34, 11.03s/it][2022-12-20 16:03:48,230] [INFO] [timer.py:197:stop] 0/3454, RunningAvgSamplesPerSec=5.867080637491387, CurrSamplesPerSec=5.3358267673963145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1707/5000 [5:41:28<10:05:17, 11.03s/it][2022-12-20 16:03:59,540] [INFO] [timer.py:197:stop] 0/3456, RunningAvgSamplesPerSec=5.866993210407371, CurrSamplesPerSec=5.092445190596961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1708/5000 [5:41:40<10:09:26, 11.11s/it][2022-12-20 16:04:10,533] [INFO] [timer.py:197:stop] 0/3458, RunningAvgSamplesPerSec=5.867004910643209, CurrSamplesPerSec=5.346908064809192, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1709/5000 [5:41:51<10:06:35, 11.06s/it][2022-12-20 16:04:21,514] [INFO] [logging.py:68:log_dist] [Rank 0] step=1730, skipped=3, lr=[7.2755555555555554e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:04:21,516] [INFO] [timer.py:197:stop] 0/3460, RunningAvgSamplesPerSec=5.86702412783182, CurrSamplesPerSec=5.314215454183853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1710/5000 [5:42:02<10:06:39, 11.06s/it][2022-12-20 16:04:32,577] [INFO] [timer.py:197:stop] 0/3462, RunningAvgSamplesPerSec=5.867015131148, CurrSamplesPerSec=5.328174922711971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1711/5000 [5:42:13<10:05:08, 11.04s/it][2022-12-20 16:04:43,604] [INFO] [timer.py:197:stop] 0/3464, RunningAvgSamplesPerSec=5.867015909501113, CurrSamplesPerSec=5.286568740992702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1712/5000 [5:42:24<10:05:34, 11.05s/it][2022-12-20 16:04:54,579] [INFO] [timer.py:197:stop] 0/3466, RunningAvgSamplesPerSec=5.867036179172931, CurrSamplesPerSec=5.377537100004852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1713/5000 [5:42:35<10:04:19, 11.03s/it][2022-12-20 16:05:05,663] [INFO] [timer.py:197:stop] 0/3468, RunningAvgSamplesPerSec=5.867019779969231, CurrSamplesPerSec=5.291153508336211, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1714/5000 [5:42:46<10:04:13, 11.03s/it][2022-12-20 16:05:16,616] [INFO] [timer.py:197:stop] 0/3470, RunningAvgSamplesPerSec=5.86704756131417, CurrSamplesPerSec=5.363740151123999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1715/5000 [5:42:57<10:03:08, 11.02s/it][2022-12-20 16:05:27,602] [INFO] [timer.py:197:stop] 0/3472, RunningAvgSamplesPerSec=5.867061725803188, CurrSamplesPerSec=5.357493314895133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1716/5000 [5:43:08<10:02:26, 11.01s/it][2022-12-20 16:05:38,639] [INFO] [timer.py:197:stop] 0/3474, RunningAvgSamplesPerSec=5.8670598794668445, CurrSamplesPerSec=5.310546527478033, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1717/5000 [5:43:19<10:03:06, 11.02s/it][2022-12-20 16:05:49,655] [INFO] [timer.py:197:stop] 0/3476, RunningAvgSamplesPerSec=5.867064609861847, CurrSamplesPerSec=5.348741838816351, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1718/5000 [5:43:30<10:02:26, 11.01s/it][2022-12-20 16:06:00,694] [INFO] [timer.py:197:stop] 0/3478, RunningAvgSamplesPerSec=5.8670666838942465, CurrSamplesPerSec=5.307431007666168, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1719/5000 [5:43:41<10:02:50, 11.02s/it][2022-12-20 16:06:11,788] [INFO] [logging.py:68:log_dist] [Rank 0] step=1740, skipped=3, lr=[7.253333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:06:11,790] [INFO] [timer.py:197:stop] 0/3480, RunningAvgSamplesPerSec=5.867046585928956, CurrSamplesPerSec=5.266333286144012, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1720/5000 [5:43:52<10:04:07, 11.05s/it][2022-12-20 16:06:22,793] [INFO] [timer.py:197:stop] 0/3482, RunningAvgSamplesPerSec=5.867055216136801, CurrSamplesPerSec=5.364685176942479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1721/5000 [5:44:03<10:02:28, 11.02s/it][2022-12-20 16:06:33,820] [INFO] [timer.py:197:stop] 0/3484, RunningAvgSamplesPerSec=5.867056337121522, CurrSamplesPerSec=5.306165139256972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1722/5000 [5:44:14<10:02:51, 11.03s/it][2022-12-20 16:06:44,027] [INFO] [timer.py:197:stop] 0/3486, RunningAvgSamplesPerSec=5.867307515851994, CurrSamplesPerSec=6.167097632570031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 16:06:55,055] [INFO] [timer.py:197:stop] 0/3488, RunningAvgSamplesPerSec=5.867307366416999, CurrSamplesPerSec=5.319168090538513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1723/5000 [5:44:30<11:28:27, 12.61s/it][2022-12-20 16:07:06,023] [INFO] [timer.py:197:stop] 0/3490, RunningAvgSamplesPerSec=5.867326994845612, CurrSamplesPerSec=5.376976974875648, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1724/5000 [5:44:41<11:01:25, 12.11s/it][2022-12-20 16:07:17,057] [INFO] [timer.py:197:stop] 0/3492, RunningAvgSamplesPerSec=5.867326367003451, CurrSamplesPerSec=5.335825070390194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 34%|███▍ | 1725/5000 [5:44:52<10:43:32, 11.79s/it] {'loss': 0.0003, 'learning_rate': 7.24e-06, 'epoch': 42.07} + 34%|███▍ | 1725/5000 [5:44:52<10:43:32, 11.79s/it][2022-12-20 16:07:28,070] [INFO] [timer.py:197:stop] 0/3494, RunningAvgSamplesPerSec=5.8673325273995465, CurrSamplesPerSec=5.316817618065792, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1726/5000 [5:45:03<10:30:38, 11.56s/it][2022-12-20 16:07:39,087] [INFO] [timer.py:197:stop] 0/3496, RunningAvgSamplesPerSec=5.867337238415593, CurrSamplesPerSec=5.331703048747376, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1727/5000 [5:45:14<10:21:35, 11.40s/it][2022-12-20 16:07:50,075] [INFO] [timer.py:197:stop] 0/3498, RunningAvgSamplesPerSec=5.86735077812428, CurrSamplesPerSec=5.357768342733404, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1728/5000 [5:45:25<10:14:44, 11.27s/it][2022-12-20 16:08:01,073] [INFO] [logging.py:68:log_dist] [Rank 0] step=1750, skipped=3, lr=[7.231111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:08:01,074] [INFO] [timer.py:197:stop] 0/3500, RunningAvgSamplesPerSec=5.867360417984349, CurrSamplesPerSec=5.352733565509722, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1729/5000 [5:45:36<10:10:05, 11.19s/it][2022-12-20 16:08:12,099] [INFO] [timer.py:197:stop] 0/3502, RunningAvgSamplesPerSec=5.867362219554452, CurrSamplesPerSec=5.323440920724167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1730/5000 [5:45:47<10:07:11, 11.14s/it][2022-12-20 16:08:23,169] [INFO] [timer.py:197:stop] 0/3504, RunningAvgSamplesPerSec=5.867350506540717, CurrSamplesPerSec=5.301670974989135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1731/5000 [5:45:58<10:05:50, 11.12s/it][2022-12-20 16:08:34,166] [INFO] [timer.py:197:stop] 0/3506, RunningAvgSamplesPerSec=5.867361741987509, CurrSamplesPerSec=5.337939103829883, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1732/5000 [5:46:09<10:03:38, 11.08s/it][2022-12-20 16:08:45,214] [INFO] [timer.py:197:stop] 0/3508, RunningAvgSamplesPerSec=5.867356261126641, CurrSamplesPerSec=5.307091243261932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1733/5000 [5:46:20<10:02:53, 11.07s/it][2022-12-20 16:08:56,248] [INFO] [timer.py:197:stop] 0/3510, RunningAvgSamplesPerSec=5.86736042817816, CurrSamplesPerSec=5.325260109754112, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1734/5000 [5:46:31<10:02:04, 11.06s/it][2022-12-20 16:09:07,316] [INFO] [timer.py:197:stop] 0/3512, RunningAvgSamplesPerSec=5.8673541519529815, CurrSamplesPerSec=5.310776409323368, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1735/5000 [5:46:42<10:02:00, 11.06s/it][2022-12-20 16:09:18,319] [INFO] [timer.py:197:stop] 0/3514, RunningAvgSamplesPerSec=5.8673628485629905, CurrSamplesPerSec=5.320781858165661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1736/5000 [5:46:53<10:00:51, 11.05s/it][2022-12-20 16:09:29,364] [INFO] [timer.py:197:stop] 0/3516, RunningAvgSamplesPerSec=5.8673638928230805, CurrSamplesPerSec=5.335972926098386, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1737/5000 [5:47:04<10:00:39, 11.04s/it][2022-12-20 16:09:40,436] [INFO] [timer.py:197:stop] 0/3518, RunningAvgSamplesPerSec=5.867351391670739, CurrSamplesPerSec=5.257804771763849, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1738/5000 [5:47:16<10:00:55, 11.05s/it][2022-12-20 16:09:51,488] [INFO] [logging.py:68:log_dist] [Rank 0] step=1760, skipped=3, lr=[7.20888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:09:51,489] [INFO] [timer.py:197:stop] 0/3520, RunningAvgSamplesPerSec=5.867344446400452, CurrSamplesPerSec=5.302402785266356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1739/5000 [5:47:27<10:00:44, 11.05s/it][2022-12-20 16:10:02,530] [INFO] [timer.py:197:stop] 0/3522, RunningAvgSamplesPerSec=5.8673469009264805, CurrSamplesPerSec=5.28829905218775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1740/5000 [5:47:38<10:00:21, 11.05s/it][2022-12-20 16:10:13,599] [INFO] [timer.py:197:stop] 0/3524, RunningAvgSamplesPerSec=5.867340676590109, CurrSamplesPerSec=5.315060388653837, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1741/5000 [5:47:49<10:00:29, 11.06s/it][2022-12-20 16:10:24,616] [INFO] [timer.py:197:stop] 0/3526, RunningAvgSamplesPerSec=5.867344985008762, CurrSamplesPerSec=5.307992690174752, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1742/5000 [5:48:00<9:59:40, 11.04s/it] [2022-12-20 16:10:35,625] [INFO] [timer.py:197:stop] 0/3528, RunningAvgSamplesPerSec=5.867351714819881, CurrSamplesPerSec=5.316517926816032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1743/5000 [5:48:11<9:58:55, 11.03s/it][2022-12-20 16:10:46,671] [INFO] [timer.py:197:stop] 0/3530, RunningAvgSamplesPerSec=5.867352419472016, CurrSamplesPerSec=5.300346313320061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1744/5000 [5:48:22<9:58:57, 11.04s/it][2022-12-20 16:10:57,726] [INFO] [timer.py:197:stop] 0/3532, RunningAvgSamplesPerSec=5.867345420424728, CurrSamplesPerSec=5.326640591005977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1745/5000 [5:48:33<9:59:03, 11.04s/it][2022-12-20 16:11:08,736] [INFO] [timer.py:197:stop] 0/3534, RunningAvgSamplesPerSec=5.867351700807187, CurrSamplesPerSec=5.343238330338344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1746/5000 [5:48:44<9:58:20, 11.03s/it][2022-12-20 16:11:19,756] [INFO] [timer.py:197:stop] 0/3536, RunningAvgSamplesPerSec=5.867360731575583, CurrSamplesPerSec=5.356464242741094, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1747/5000 [5:48:55<9:57:56, 11.03s/it][2022-12-20 16:11:30,781] [INFO] [timer.py:197:stop] 0/3538, RunningAvgSamplesPerSec=5.867362170290342, CurrSamplesPerSec=5.313764160924343, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1748/5000 [5:49:06<9:57:43, 11.03s/it][2022-12-20 16:11:41,737] [INFO] [logging.py:68:log_dist] [Rank 0] step=1770, skipped=3, lr=[7.186666666666668e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:11:41,738] [INFO] [timer.py:197:stop] 0/3540, RunningAvgSamplesPerSec=5.867386589578564, CurrSamplesPerSec=5.3594370794750255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▍ | 1749/5000 [5:49:17<9:56:22, 11.01s/it][2022-12-20 16:11:52,799] [INFO] [timer.py:197:stop] 0/3542, RunningAvgSamplesPerSec=5.86737765083096, CurrSamplesPerSec=5.320919810811181, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1750/5000 [5:49:28<9:57:04, 11.02s/it] {'loss': 0.0003, 'learning_rate': 7.184444444444445e-06, 'epoch': 42.67} + 35%|███▌ | 1750/5000 [5:49:28<9:57:04, 11.02s/it][2022-12-20 16:12:03,837] [INFO] [timer.py:197:stop] 0/3544, RunningAvgSamplesPerSec=5.86737590577841, CurrSamplesPerSec=5.314646831740549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1751/5000 [5:49:39<9:57:08, 11.03s/it][2022-12-20 16:12:14,821] [INFO] [timer.py:197:stop] 0/3546, RunningAvgSamplesPerSec=5.867392357996859, CurrSamplesPerSec=5.3599719369450245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1752/5000 [5:49:50<9:56:13, 11.01s/it][2022-12-20 16:12:25,971] [INFO] [timer.py:197:stop] 0/3548, RunningAvgSamplesPerSec=5.867355870596253, CurrSamplesPerSec=5.220298132220232, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1753/5000 [5:50:01<9:58:15, 11.06s/it][2022-12-20 16:12:37,030] [INFO] [timer.py:197:stop] 0/3550, RunningAvgSamplesPerSec=5.867347242026122, CurrSamplesPerSec=5.316565942536702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1754/5000 [5:50:12<9:58:09, 11.06s/it][2022-12-20 16:12:48,091] [INFO] [timer.py:197:stop] 0/3552, RunningAvgSamplesPerSec=5.867343450082506, CurrSamplesPerSec=5.310175690583778, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1755/5000 [5:50:23<9:58:01, 11.06s/it][2022-12-20 16:12:59,086] [INFO] [timer.py:197:stop] 0/3554, RunningAvgSamplesPerSec=5.867354390696369, CurrSamplesPerSec=5.327222627973488, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1756/5000 [5:50:34<9:56:49, 11.04s/it][2022-12-20 16:13:10,110] [INFO] [timer.py:197:stop] 0/3556, RunningAvgSamplesPerSec=5.867356406948019, CurrSamplesPerSec=5.305559800584736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1757/5000 [5:50:45<9:56:24, 11.03s/it][2022-12-20 16:13:21,154] [INFO] [timer.py:197:stop] 0/3558, RunningAvgSamplesPerSec=5.8673526526614355, CurrSamplesPerSec=5.322515223277781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1758/5000 [5:50:56<9:56:22, 11.04s/it][2022-12-20 16:13:32,165] [INFO] [logging.py:68:log_dist] [Rank 0] step=1780, skipped=3, lr=[7.164444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:13:32,167] [INFO] [timer.py:197:stop] 0/3560, RunningAvgSamplesPerSec=5.867358384820443, CurrSamplesPerSec=5.319745965345125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1759/5000 [5:51:07<9:55:47, 11.03s/it][2022-12-20 16:13:43,236] [INFO] [timer.py:197:stop] 0/3562, RunningAvgSamplesPerSec=5.867346569984162, CurrSamplesPerSec=5.313920895116661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1760/5000 [5:51:18<9:56:15, 11.04s/it][2022-12-20 16:13:54,264] [INFO] [timer.py:197:stop] 0/3564, RunningAvgSamplesPerSec=5.867351644557277, CurrSamplesPerSec=5.337596695867524, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1761/5000 [5:51:29<9:55:50, 11.04s/it][2022-12-20 16:14:05,357] [INFO] [timer.py:197:stop] 0/3566, RunningAvgSamplesPerSec=5.867333113542462, CurrSamplesPerSec=5.298407099433031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1762/5000 [5:51:40<9:56:33, 11.05s/it][2022-12-20 16:14:16,411] [INFO] [timer.py:197:stop] 0/3568, RunningAvgSamplesPerSec=5.8673263654851615, CurrSamplesPerSec=5.291144539036019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1763/5000 [5:51:52<9:56:23, 11.05s/it][2022-12-20 16:14:26,551] [INFO] [timer.py:197:stop] 0/3570, RunningAvgSamplesPerSec=5.867596458832462, CurrSamplesPerSec=5.317581425407733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1764/5000 [5:52:07<11:00:42, 12.25s/it][2022-12-20 16:14:37,623] [INFO] [timer.py:197:stop] 0/3572, RunningAvgSamplesPerSec=5.867583405405419, CurrSamplesPerSec=5.240314497593815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1765/5000 [5:52:18<10:42:04, 11.91s/it][2022-12-20 16:14:48,598] [INFO] [timer.py:197:stop] 0/3574, RunningAvgSamplesPerSec=5.8676001486299985, CurrSamplesPerSec=5.359767741063648, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1766/5000 [5:52:29<10:27:05, 11.63s/it][2022-12-20 16:14:59,602] [INFO] [timer.py:197:stop] 0/3576, RunningAvgSamplesPerSec=5.867607874296465, CurrSamplesPerSec=5.351342088439861, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1767/5000 [5:52:40<10:16:05, 11.43s/it][2022-12-20 16:15:10,639] [INFO] [timer.py:197:stop] 0/3578, RunningAvgSamplesPerSec=5.867605761137408, CurrSamplesPerSec=5.288008817437093, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1768/5000 [5:52:51<10:09:29, 11.31s/it][2022-12-20 16:15:21,644] [INFO] [logging.py:68:log_dist] [Rank 0] step=1790, skipped=3, lr=[7.142222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:15:21,645] [INFO] [timer.py:197:stop] 0/3580, RunningAvgSamplesPerSec=5.8676176118390355, CurrSamplesPerSec=5.3152957133762815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1769/5000 [5:53:02<10:04:38, 11.23s/it][2022-12-20 16:15:32,639] [INFO] [timer.py:197:stop] 0/3582, RunningAvgSamplesPerSec=5.867628478955337, CurrSamplesPerSec=5.3450721419737635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1770/5000 [5:53:13<10:00:41, 11.16s/it][2022-12-20 16:15:43,718] [INFO] [timer.py:197:stop] 0/3584, RunningAvgSamplesPerSec=5.867613445663599, CurrSamplesPerSec=5.270697754492382, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1771/5000 [5:53:24<9:59:25, 11.14s/it] [2022-12-20 16:15:54,720] [INFO] [timer.py:197:stop] 0/3586, RunningAvgSamplesPerSec=5.86762205608351, CurrSamplesPerSec=5.3497896957368205, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1772/5000 [5:53:35<9:57:02, 11.10s/it][2022-12-20 16:16:05,717] [INFO] [timer.py:197:stop] 0/3588, RunningAvgSamplesPerSec=5.867635507783562, CurrSamplesPerSec=5.354506398867133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1773/5000 [5:53:46<9:55:00, 11.06s/it][2022-12-20 16:16:16,702] [INFO] [timer.py:197:stop] 0/3590, RunningAvgSamplesPerSec=5.867649107553814, CurrSamplesPerSec=5.352905843137422, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 35%|███▌ | 1774/5000 [5:53:57<9:54:00, 11.05s/it][2022-12-20 16:16:27,718] [INFO] [timer.py:197:stop] 0/3592, RunningAvgSamplesPerSec=5.867653749220247, CurrSamplesPerSec=5.3501549964784125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1775/5000 [5:54:08<9:52:18, 11.02s/it] {'loss': 0.0003, 'learning_rate': 7.12888888888889e-06, 'epoch': 43.29} + 36%|███▌ | 1775/5000 [5:54:08<9:52:18, 11.02s/it][2022-12-20 16:16:38,700] [INFO] [timer.py:197:stop] 0/3594, RunningAvgSamplesPerSec=5.867668523604353, CurrSamplesPerSec=5.325266659643956, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1776/5000 [5:54:19<9:52:15, 11.02s/it][2022-12-20 16:16:49,687] [INFO] [timer.py:197:stop] 0/3596, RunningAvgSamplesPerSec=5.867681340865713, CurrSamplesPerSec=5.360628294483764, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1777/5000 [5:54:30<9:50:59, 11.00s/it][2022-12-20 16:17:00,651] [INFO] [timer.py:197:stop] 0/3598, RunningAvgSamplesPerSec=5.867701318782148, CurrSamplesPerSec=5.352783304999633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1778/5000 [5:54:41<9:50:16, 10.99s/it][2022-12-20 16:17:11,574] [INFO] [logging.py:68:log_dist] [Rank 0] step=1800, skipped=3, lr=[7.1200000000000004e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:17:11,576] [INFO] [timer.py:197:stop] 0/3600, RunningAvgSamplesPerSec=5.867733005480635, CurrSamplesPerSec=5.393124441521702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1779/5000 [5:54:52<9:48:56, 10.97s/it][2022-12-20 16:17:22,589] [INFO] [timer.py:197:stop] 0/3602, RunningAvgSamplesPerSec=5.867737649483979, CurrSamplesPerSec=5.309725923385553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1780/5000 [5:55:03<9:48:51, 10.97s/it][2022-12-20 16:17:33,525] [INFO] [timer.py:197:stop] 0/3604, RunningAvgSamplesPerSec=5.867765906315899, CurrSamplesPerSec=5.347261894213736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1781/5000 [5:55:14<9:49:00, 10.98s/it][2022-12-20 16:17:44,546] [INFO] [timer.py:197:stop] 0/3606, RunningAvgSamplesPerSec=5.867768586479001, CurrSamplesPerSec=5.321789667829376, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1782/5000 [5:55:25<9:48:56, 10.98s/it][2022-12-20 16:17:55,491] [INFO] [timer.py:197:stop] 0/3608, RunningAvgSamplesPerSec=5.867793552327495, CurrSamplesPerSec=5.357194152600634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1783/5000 [5:55:36<9:48:50, 10.98s/it][2022-12-20 16:18:06,530] [INFO] [timer.py:197:stop] 0/3610, RunningAvgSamplesPerSec=5.8677905521950295, CurrSamplesPerSec=5.31032107768662, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1784/5000 [5:55:47<9:49:33, 11.00s/it][2022-12-20 16:18:17,517] [INFO] [timer.py:197:stop] 0/3612, RunningAvgSamplesPerSec=5.867802799163721, CurrSamplesPerSec=5.355282997103622, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1785/5000 [5:55:58<9:49:15, 11.00s/it][2022-12-20 16:18:28,566] [INFO] [timer.py:197:stop] 0/3614, RunningAvgSamplesPerSec=5.867796786673767, CurrSamplesPerSec=5.305290316195042, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1786/5000 [5:56:09<9:49:48, 11.01s/it][2022-12-20 16:18:39,551] [INFO] [timer.py:197:stop] 0/3616, RunningAvgSamplesPerSec=5.8678097220916206, CurrSamplesPerSec=5.355854640394681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1787/5000 [5:56:20<9:49:04, 11.00s/it][2022-12-20 16:18:50,588] [INFO] [timer.py:197:stop] 0/3618, RunningAvgSamplesPerSec=5.867807372672665, CurrSamplesPerSec=5.30306858553917, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1788/5000 [5:56:31<9:49:12, 11.01s/it][2022-12-20 16:19:01,587] [INFO] [logging.py:68:log_dist] [Rank 0] step=1810, skipped=3, lr=[7.097777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:19:01,589] [INFO] [timer.py:197:stop] 0/3620, RunningAvgSamplesPerSec=5.8678156441538025, CurrSamplesPerSec=5.320339572001522, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1789/5000 [5:56:42<9:49:17, 11.01s/it][2022-12-20 16:19:12,587] [INFO] [timer.py:197:stop] 0/3622, RunningAvgSamplesPerSec=5.867824806299585, CurrSamplesPerSec=5.341856037050231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1790/5000 [5:56:53<9:48:20, 11.00s/it][2022-12-20 16:19:23,570] [INFO] [timer.py:197:stop] 0/3624, RunningAvgSamplesPerSec=5.867838705663901, CurrSamplesPerSec=5.325347372512371, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1791/5000 [5:57:04<9:48:47, 11.01s/it][2022-12-20 16:19:34,614] [INFO] [timer.py:197:stop] 0/3626, RunningAvgSamplesPerSec=5.867834194811847, CurrSamplesPerSec=5.31771499837736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1792/5000 [5:57:15<9:48:45, 11.01s/it][2022-12-20 16:19:45,633] [INFO] [timer.py:197:stop] 0/3628, RunningAvgSamplesPerSec=5.867837066394823, CurrSamplesPerSec=5.316711047945764, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1793/5000 [5:57:26<9:48:51, 11.02s/it][2022-12-20 16:19:56,676] [INFO] [timer.py:197:stop] 0/3630, RunningAvgSamplesPerSec=5.86783311217931, CurrSamplesPerSec=5.305689414500028, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1794/5000 [5:57:37<9:49:32, 11.03s/it][2022-12-20 16:20:07,752] [INFO] [timer.py:197:stop] 0/3632, RunningAvgSamplesPerSec=5.8678192171607195, CurrSamplesPerSec=5.30120422158682, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1795/5000 [5:57:48<9:49:13, 11.03s/it][2022-12-20 16:20:18,776] [INFO] [timer.py:197:stop] 0/3634, RunningAvgSamplesPerSec=5.867826088633927, CurrSamplesPerSec=5.317065525959461, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1796/5000 [5:57:59<9:49:56, 11.05s/it][2022-12-20 16:20:29,866] [INFO] [timer.py:197:stop] 0/3636, RunningAvgSamplesPerSec=5.8678083359500075, CurrSamplesPerSec=5.29959895784635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1797/5000 [5:58:10<9:49:12, 11.04s/it][2022-12-20 16:20:40,868] [INFO] [timer.py:197:stop] 0/3638, RunningAvgSamplesPerSec=5.867816582888875, CurrSamplesPerSec=5.309409388422586, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1798/5000 [5:58:21<9:47:55, 11.02s/it][2022-12-20 16:20:51,786] [INFO] [logging.py:68:log_dist] [Rank 0] step=1820, skipped=3, lr=[7.0755555555555565e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:20:51,787] [INFO] [timer.py:197:stop] 0/3640, RunningAvgSamplesPerSec=5.867848963113875, CurrSamplesPerSec=5.352412949437624, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1799/5000 [5:58:32<9:46:52, 11.00s/it][2022-12-20 16:21:02,768] [INFO] [timer.py:197:stop] 0/3642, RunningAvgSamplesPerSec=5.867863518411578, CurrSamplesPerSec=5.337413516093198, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1800/5000 [5:58:43<9:46:27, 11.00s/it] {'loss': 0.0003, 'learning_rate': 7.073333333333334e-06, 'epoch': 43.89} + 36%|███▌ | 1800/5000 [5:58:43<9:46:27, 11.00s/it][2022-12-20 16:21:13,797] [INFO] [timer.py:197:stop] 0/3644, RunningAvgSamplesPerSec=5.867863865032752, CurrSamplesPerSec=5.3000020138927, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1801/5000 [5:58:54<9:47:29, 11.02s/it][2022-12-20 16:21:24,902] [INFO] [timer.py:197:stop] 0/3646, RunningAvgSamplesPerSec=5.867855906001333, CurrSamplesPerSec=5.299130267792619, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1802/5000 [5:59:05<9:48:35, 11.04s/it][2022-12-20 16:21:35,952] [INFO] [timer.py:197:stop] 0/3648, RunningAvgSamplesPerSec=5.867850075133944, CurrSamplesPerSec=5.313642987537273, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1803/5000 [5:59:16<9:48:26, 11.04s/it][2022-12-20 16:21:46,988] [INFO] [timer.py:197:stop] 0/3650, RunningAvgSamplesPerSec=5.867848148493058, CurrSamplesPerSec=5.321127175109992, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1804/5000 [5:59:27<9:47:14, 11.02s/it][2022-12-20 16:21:57,042] [INFO] [timer.py:197:stop] 0/3652, RunningAvgSamplesPerSec=5.868132621584865, CurrSamplesPerSec=6.277680904409208, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 16:22:08,095] [INFO] [timer.py:197:stop] 0/3654, RunningAvgSamplesPerSec=5.868125109190584, CurrSamplesPerSec=5.301034208916655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1805/5000 [5:59:43<11:09:47, 12.58s/it][2022-12-20 16:22:19,093] [INFO] [timer.py:197:stop] 0/3656, RunningAvgSamplesPerSec=5.868134462380748, CurrSamplesPerSec=5.355363340341336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1806/5000 [5:59:54<10:44:21, 12.10s/it][2022-12-20 16:22:30,155] [INFO] [timer.py:197:stop] 0/3658, RunningAvgSamplesPerSec=5.868125109133783, CurrSamplesPerSec=5.30359707090439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1807/5000 [6:00:05<10:27:30, 11.79s/it][2022-12-20 16:22:41,190] [INFO] [logging.py:68:log_dist] [Rank 0] step=1830, skipped=3, lr=[7.053333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:22:41,192] [INFO] [timer.py:197:stop] 0/3660, RunningAvgSamplesPerSec=5.868122547434797, CurrSamplesPerSec=5.321938645914429, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1808/5000 [6:00:16<10:15:16, 11.57s/it][2022-12-20 16:22:52,141] [INFO] [timer.py:197:stop] 0/3662, RunningAvgSamplesPerSec=5.868145908661473, CurrSamplesPerSec=5.351630995168219, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1809/5000 [6:00:27<10:05:15, 11.38s/it][2022-12-20 16:23:03,180] [INFO] [timer.py:197:stop] 0/3664, RunningAvgSamplesPerSec=5.868148154905698, CurrSamplesPerSec=5.329684530306949, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1810/5000 [6:00:38<9:59:36, 11.28s/it] [2022-12-20 16:23:14,210] [INFO] [timer.py:197:stop] 0/3666, RunningAvgSamplesPerSec=5.8681475644032295, CurrSamplesPerSec=5.334633614325137, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1811/5000 [6:00:49<9:55:28, 11.20s/it][2022-12-20 16:23:25,158] [INFO] [timer.py:197:stop] 0/3668, RunningAvgSamplesPerSec=5.868171638389008, CurrSamplesPerSec=5.346339181450542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▌ | 1812/5000 [6:01:00<9:51:12, 11.13s/it][2022-12-20 16:23:36,165] [INFO] [timer.py:197:stop] 0/3670, RunningAvgSamplesPerSec=5.86817780599577, CurrSamplesPerSec=5.312544895366188, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1813/5000 [6:01:11<9:49:06, 11.09s/it][2022-12-20 16:23:47,196] [INFO] [timer.py:197:stop] 0/3672, RunningAvgSamplesPerSec=5.868177330254089, CurrSamplesPerSec=5.319049200096095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1814/5000 [6:01:22<9:47:58, 11.07s/it][2022-12-20 16:23:58,179] [INFO] [timer.py:197:stop] 0/3674, RunningAvgSamplesPerSec=5.868190599847353, CurrSamplesPerSec=5.333014189427325, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1815/5000 [6:01:33<9:46:21, 11.05s/it][2022-12-20 16:24:09,158] [INFO] [timer.py:197:stop] 0/3676, RunningAvgSamplesPerSec=5.86820545758403, CurrSamplesPerSec=5.308064903126967, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1816/5000 [6:01:44<9:45:06, 11.03s/it][2022-12-20 16:24:20,135] [INFO] [timer.py:197:stop] 0/3678, RunningAvgSamplesPerSec=5.868220665096575, CurrSamplesPerSec=5.328915972864234, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1817/5000 [6:01:55<9:44:09, 11.01s/it][2022-12-20 16:24:31,114] [INFO] [logging.py:68:log_dist] [Rank 0] step=1840, skipped=3, lr=[7.031111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:24:31,116] [INFO] [timer.py:197:stop] 0/3680, RunningAvgSamplesPerSec=5.868234756206267, CurrSamplesPerSec=5.33764657898069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1818/5000 [6:02:06<9:43:28, 11.00s/it][2022-12-20 16:24:42,103] [INFO] [timer.py:197:stop] 0/3682, RunningAvgSamplesPerSec=5.8682466593737255, CurrSamplesPerSec=5.321677201230961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1819/5000 [6:02:17<9:43:03, 11.00s/it][2022-12-20 16:24:53,135] [INFO] [timer.py:197:stop] 0/3684, RunningAvgSamplesPerSec=5.868245948061289, CurrSamplesPerSec=5.3332663650444605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1820/5000 [6:02:28<9:43:25, 11.01s/it][2022-12-20 16:25:04,128] [INFO] [timer.py:197:stop] 0/3686, RunningAvgSamplesPerSec=5.868256603460851, CurrSamplesPerSec=5.344668161250516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1821/5000 [6:02:39<9:42:59, 11.00s/it][2022-12-20 16:25:15,101] [INFO] [timer.py:197:stop] 0/3688, RunningAvgSamplesPerSec=5.868272661115379, CurrSamplesPerSec=5.346490815049714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1822/5000 [6:02:50<9:42:20, 10.99s/it][2022-12-20 16:25:26,085] [INFO] [timer.py:197:stop] 0/3690, RunningAvgSamplesPerSec=5.868285621717364, CurrSamplesPerSec=5.321523595996189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1823/5000 [6:03:01<9:41:59, 10.99s/it][2022-12-20 16:25:37,094] [INFO] [timer.py:197:stop] 0/3692, RunningAvgSamplesPerSec=5.868294990879574, CurrSamplesPerSec=5.308947151348811, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1824/5000 [6:03:12<9:42:05, 11.00s/it][2022-12-20 16:25:48,087] [INFO] [timer.py:197:stop] 0/3694, RunningAvgSamplesPerSec=5.868305565250043, CurrSamplesPerSec=5.354673449952983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 36%|███▋ | 1825/5000 [6:03:23<9:41:50, 11.00s/it] {'loss': 0.0003, 'learning_rate': 7.015555555555556e-06, 'epoch': 44.51} + 36%|███▋ | 1825/5000 [6:03:23<9:41:50, 11.00s/it][2022-12-20 16:25:59,075] [INFO] [timer.py:197:stop] 0/3696, RunningAvgSamplesPerSec=5.86831799793993, CurrSamplesPerSec=5.314430292496196, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1826/5000 [6:03:34<9:41:32, 10.99s/it][2022-12-20 16:26:10,088] [INFO] [timer.py:197:stop] 0/3698, RunningAvgSamplesPerSec=5.8683228582549685, CurrSamplesPerSec=5.294283772891498, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1827/5000 [6:03:45<9:41:40, 11.00s/it][2022-12-20 16:26:21,066] [INFO] [logging.py:68:log_dist] [Rank 0] step=1850, skipped=3, lr=[7.008888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:26:21,068] [INFO] [timer.py:197:stop] 0/3700, RunningAvgSamplesPerSec=5.868336823992637, CurrSamplesPerSec=5.329232298823237, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1828/5000 [6:03:56<9:41:11, 10.99s/it][2022-12-20 16:26:32,089] [INFO] [timer.py:197:stop] 0/3702, RunningAvgSamplesPerSec=5.868339276831172, CurrSamplesPerSec=5.3270109826711645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1829/5000 [6:04:07<9:41:26, 11.00s/it][2022-12-20 16:26:43,137] [INFO] [timer.py:197:stop] 0/3704, RunningAvgSamplesPerSec=5.8683339059503234, CurrSamplesPerSec=5.31480025068922, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1830/5000 [6:04:18<9:41:59, 11.02s/it][2022-12-20 16:26:54,190] [INFO] [timer.py:197:stop] 0/3706, RunningAvgSamplesPerSec=5.8683269539631775, CurrSamplesPerSec=5.306354781722552, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1831/5000 [6:04:29<9:42:23, 11.03s/it][2022-12-20 16:27:05,187] [INFO] [timer.py:197:stop] 0/3708, RunningAvgSamplesPerSec=5.86833620414536, CurrSamplesPerSec=5.307387983825914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1832/5000 [6:04:40<9:41:45, 11.02s/it][2022-12-20 16:27:16,129] [INFO] [timer.py:197:stop] 0/3710, RunningAvgSamplesPerSec=5.8683618261204415, CurrSamplesPerSec=5.357746527674794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1833/5000 [6:04:51<9:40:21, 11.00s/it][2022-12-20 16:27:27,173] [INFO] [timer.py:197:stop] 0/3712, RunningAvgSamplesPerSec=5.868357495813902, CurrSamplesPerSec=5.3143345494768095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1834/5000 [6:05:02<9:40:57, 11.01s/it][2022-12-20 16:27:38,211] [INFO] [timer.py:197:stop] 0/3714, RunningAvgSamplesPerSec=5.8683550285875965, CurrSamplesPerSec=5.319246299738143, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1835/5000 [6:05:13<9:41:12, 11.02s/it][2022-12-20 16:27:49,192] [INFO] [timer.py:197:stop] 0/3716, RunningAvgSamplesPerSec=5.86836907975285, CurrSamplesPerSec=5.360113855825353, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1836/5000 [6:05:24<9:40:25, 11.01s/it][2022-12-20 16:28:00,148] [INFO] [timer.py:197:stop] 0/3718, RunningAvgSamplesPerSec=5.868390048636477, CurrSamplesPerSec=5.349072674212964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1837/5000 [6:05:35<9:39:26, 10.99s/it][2022-12-20 16:28:11,101] [INFO] [logging.py:68:log_dist] [Rank 0] step=1860, skipped=3, lr=[6.986666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:28:11,103] [INFO] [timer.py:197:stop] 0/3720, RunningAvgSamplesPerSec=5.868411506969105, CurrSamplesPerSec=5.357620987604054, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1838/5000 [6:05:46<9:38:41, 10.98s/it][2022-12-20 16:28:22,109] [INFO] [timer.py:197:stop] 0/3722, RunningAvgSamplesPerSec=5.8684182052315625, CurrSamplesPerSec=5.3540307235296005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1839/5000 [6:05:57<9:38:53, 10.99s/it][2022-12-20 16:28:33,149] [INFO] [timer.py:197:stop] 0/3724, RunningAvgSamplesPerSec=5.868415356657042, CurrSamplesPerSec=5.323421495714795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1840/5000 [6:06:08<9:39:32, 11.00s/it][2022-12-20 16:28:44,160] [INFO] [timer.py:197:stop] 0/3726, RunningAvgSamplesPerSec=5.868425675215126, CurrSamplesPerSec=5.326175560257915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1841/5000 [6:06:19<9:39:27, 11.01s/it][2022-12-20 16:28:55,185] [INFO] [timer.py:197:stop] 0/3728, RunningAvgSamplesPerSec=5.86842671145372, CurrSamplesPerSec=5.325337441739497, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1842/5000 [6:06:30<9:39:34, 11.01s/it][2022-12-20 16:29:06,206] [INFO] [timer.py:197:stop] 0/3730, RunningAvgSamplesPerSec=5.868428850838629, CurrSamplesPerSec=5.323535091800804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1843/5000 [6:06:41<9:39:33, 11.01s/it][2022-12-20 16:29:17,188] [INFO] [timer.py:197:stop] 0/3732, RunningAvgSamplesPerSec=5.868442745815802, CurrSamplesPerSec=5.361874445881324, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1844/5000 [6:06:52<9:38:51, 11.00s/it][2022-12-20 16:29:28,229] [INFO] [timer.py:197:stop] 0/3734, RunningAvgSamplesPerSec=5.868444475831424, CurrSamplesPerSec=5.317454811169712, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1845/5000 [6:07:03<9:39:13, 11.02s/it][2022-12-20 16:29:38,341] [INFO] [timer.py:197:stop] 0/3736, RunningAvgSamplesPerSec=5.868706050036927, CurrSamplesPerSec=5.343482538933694, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1846/5000 [6:07:18<10:42:43, 12.23s/it][2022-12-20 16:29:49,334] [INFO] [timer.py:197:stop] 0/3738, RunningAvgSamplesPerSec=5.868716611851059, CurrSamplesPerSec=5.344501095398371, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1847/5000 [6:07:29<10:23:20, 11.86s/it][2022-12-20 16:30:00,322] [INFO] [logging.py:68:log_dist] [Rank 0] step=1870, skipped=3, lr=[6.964444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:30:00,324] [INFO] [timer.py:197:stop] 0/3740, RunningAvgSamplesPerSec=5.86872799107172, CurrSamplesPerSec=5.362258323237146, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1848/5000 [6:07:40<10:09:15, 11.60s/it][2022-12-20 16:30:11,350] [INFO] [timer.py:197:stop] 0/3742, RunningAvgSamplesPerSec=5.868728892944803, CurrSamplesPerSec=5.322625614524423, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1849/5000 [6:07:51<10:00:23, 11.43s/it][2022-12-20 16:30:22,386] [INFO] [timer.py:197:stop] 0/3744, RunningAvgSamplesPerSec=5.868726741246671, CurrSamplesPerSec=5.331778026322872, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1850/5000 [6:08:02<9:52:53, 11.29s/it] {'loss': 0.0003, 'learning_rate': 6.96e-06, 'epoch': 45.12} + 37%|███▋ | 1850/5000 [6:08:02<9:52:53, 11.29s/it][2022-12-20 16:30:33,352] [INFO] [timer.py:197:stop] 0/3746, RunningAvgSamplesPerSec=5.868744658743884, CurrSamplesPerSec=5.334493253512511, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1851/5000 [6:08:13<9:48:12, 11.21s/it][2022-12-20 16:30:44,342] [INFO] [timer.py:197:stop] 0/3748, RunningAvgSamplesPerSec=5.868755344175072, CurrSamplesPerSec=5.349467726523821, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1852/5000 [6:08:24<9:44:56, 11.15s/it][2022-12-20 16:30:55,385] [INFO] [timer.py:197:stop] 0/3750, RunningAvgSamplesPerSec=5.868751074232292, CurrSamplesPerSec=5.321030557789979, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1853/5000 [6:08:35<9:43:20, 11.12s/it][2022-12-20 16:31:06,471] [INFO] [timer.py:197:stop] 0/3752, RunningAvgSamplesPerSec=5.868734432402666, CurrSamplesPerSec=5.297403107625619, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1854/5000 [6:08:46<9:41:39, 11.09s/it][2022-12-20 16:31:17,470] [INFO] [timer.py:197:stop] 0/3754, RunningAvgSamplesPerSec=5.86874245227039, CurrSamplesPerSec=5.321080342695744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1855/5000 [6:08:58<9:40:16, 11.07s/it][2022-12-20 16:31:28,485] [INFO] [timer.py:197:stop] 0/3756, RunningAvgSamplesPerSec=5.868746021327581, CurrSamplesPerSec=5.323925536132185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1856/5000 [6:09:09<9:39:34, 11.06s/it][2022-12-20 16:31:39,627] [INFO] [timer.py:197:stop] 0/3758, RunningAvgSamplesPerSec=5.868713093712055, CurrSamplesPerSec=5.232180599438258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1857/5000 [6:09:20<9:40:03, 11.07s/it][2022-12-20 16:31:50,602] [INFO] [logging.py:68:log_dist] [Rank 0] step=1880, skipped=3, lr=[6.942222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:31:50,604] [INFO] [timer.py:197:stop] 0/3760, RunningAvgSamplesPerSec=5.868727513110509, CurrSamplesPerSec=5.342600897931986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1858/5000 [6:09:31<9:38:54, 11.05s/it][2022-12-20 16:32:01,632] [INFO] [timer.py:197:stop] 0/3762, RunningAvgSamplesPerSec=5.868727241214993, CurrSamplesPerSec=5.328374391530989, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1859/5000 [6:09:42<9:38:05, 11.04s/it][2022-12-20 16:32:12,634] [INFO] [timer.py:197:stop] 0/3764, RunningAvgSamplesPerSec=5.868736391370496, CurrSamplesPerSec=5.345233283039553, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1860/5000 [6:09:53<9:37:17, 11.03s/it][2022-12-20 16:32:23,667] [INFO] [timer.py:197:stop] 0/3766, RunningAvgSamplesPerSec=5.868735681107274, CurrSamplesPerSec=5.315751898546076, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1861/5000 [6:10:04<9:36:25, 11.02s/it][2022-12-20 16:32:34,616] [INFO] [timer.py:197:stop] 0/3768, RunningAvgSamplesPerSec=5.868758489828963, CurrSamplesPerSec=5.348920680962366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1862/5000 [6:10:15<9:35:27, 11.00s/it][2022-12-20 16:32:45,580] [INFO] [timer.py:197:stop] 0/3770, RunningAvgSamplesPerSec=5.868776839356115, CurrSamplesPerSec=5.351711015472656, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1863/5000 [6:10:26<9:35:09, 11.00s/it][2022-12-20 16:32:56,635] [INFO] [timer.py:197:stop] 0/3772, RunningAvgSamplesPerSec=5.868768565999091, CurrSamplesPerSec=5.298422368231982, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1864/5000 [6:10:37<9:35:35, 11.01s/it][2022-12-20 16:33:07,622] [INFO] [timer.py:197:stop] 0/3774, RunningAvgSamplesPerSec=5.8687804552120175, CurrSamplesPerSec=5.346630743009816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1865/5000 [6:10:48<9:34:46, 11.00s/it][2022-12-20 16:33:18,617] [INFO] [timer.py:197:stop] 0/3776, RunningAvgSamplesPerSec=5.868789928065298, CurrSamplesPerSec=5.325936523777543, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1866/5000 [6:10:59<9:34:16, 10.99s/it][2022-12-20 16:33:29,580] [INFO] [timer.py:197:stop] 0/3778, RunningAvgSamplesPerSec=5.868808120047668, CurrSamplesPerSec=5.34097769560886, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1867/5000 [6:11:10<9:34:33, 11.00s/it][2022-12-20 16:33:40,600] [INFO] [logging.py:68:log_dist] [Rank 0] step=1890, skipped=3, lr=[6.92e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:33:40,602] [INFO] [timer.py:197:stop] 0/3780, RunningAvgSamplesPerSec=5.868809453467568, CurrSamplesPerSec=5.3425175346367935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1868/5000 [6:11:21<9:34:12, 11.00s/it][2022-12-20 16:33:51,599] [INFO] [timer.py:197:stop] 0/3782, RunningAvgSamplesPerSec=5.868818229839722, CurrSamplesPerSec=5.34016359590789, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1869/5000 [6:11:32<9:34:04, 11.00s/it][2022-12-20 16:34:02,628] [INFO] [timer.py:197:stop] 0/3784, RunningAvgSamplesPerSec=5.868817758535052, CurrSamplesPerSec=5.316844998430708, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1870/5000 [6:11:43<9:33:57, 11.00s/it][2022-12-20 16:34:13,633] [INFO] [timer.py:197:stop] 0/3786, RunningAvgSamplesPerSec=5.868823979332049, CurrSamplesPerSec=5.315866641334482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1871/5000 [6:11:54<9:34:40, 11.02s/it][2022-12-20 16:34:24,602] [INFO] [timer.py:197:stop] 0/3788, RunningAvgSamplesPerSec=5.868845354753321, CurrSamplesPerSec=5.398444182432372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1872/5000 [6:12:05<9:33:13, 11.00s/it][2022-12-20 16:34:35,609] [INFO] [timer.py:197:stop] 0/3790, RunningAvgSamplesPerSec=5.868851344861927, CurrSamplesPerSec=5.336599653739395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1873/5000 [6:12:16<9:33:08, 11.00s/it][2022-12-20 16:34:46,671] [INFO] [timer.py:197:stop] 0/3792, RunningAvgSamplesPerSec=5.868841521140663, CurrSamplesPerSec=5.283556990771648, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 37%|███▋ | 1874/5000 [6:12:27<9:33:43, 11.01s/it][2022-12-20 16:34:57,682] [INFO] [timer.py:197:stop] 0/3794, RunningAvgSamplesPerSec=5.8688459956298775, CurrSamplesPerSec=5.314820665126496, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1875/5000 [6:12:38<9:33:45, 11.02s/it] {'loss': 0.0002, 'learning_rate': 6.904444444444444e-06, 'epoch': 45.72} + 38%|███▊ | 1875/5000 [6:12:38<9:33:45, 11.02s/it][2022-12-20 16:35:08,713] [INFO] [timer.py:197:stop] 0/3796, RunningAvgSamplesPerSec=5.86884505569269, CurrSamplesPerSec=5.311306221970929, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1876/5000 [6:12:49<9:33:36, 11.02s/it][2022-12-20 16:35:19,700] [INFO] [timer.py:197:stop] 0/3798, RunningAvgSamplesPerSec=5.868857147887066, CurrSamplesPerSec=5.339707460077929, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1877/5000 [6:13:00<9:33:14, 11.01s/it][2022-12-20 16:35:30,766] [INFO] [logging.py:68:log_dist] [Rank 0] step=1900, skipped=3, lr=[6.897777777777779e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:35:30,767] [INFO] [timer.py:197:stop] 0/3800, RunningAvgSamplesPerSec=5.868845747112066, CurrSamplesPerSec=5.282470881175296, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1878/5000 [6:13:11<9:34:03, 11.03s/it][2022-12-20 16:35:41,804] [INFO] [timer.py:197:stop] 0/3802, RunningAvgSamplesPerSec=5.868843133515691, CurrSamplesPerSec=5.319080819375827, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1879/5000 [6:13:22<9:33:42, 11.03s/it][2022-12-20 16:35:52,835] [INFO] [timer.py:197:stop] 0/3804, RunningAvgSamplesPerSec=5.868841949957179, CurrSamplesPerSec=5.3108411328482585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1880/5000 [6:13:33<9:33:14, 11.02s/it][2022-12-20 16:36:03,804] [INFO] [timer.py:197:stop] 0/3806, RunningAvgSamplesPerSec=5.868858912477242, CurrSamplesPerSec=5.348922812645288, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1881/5000 [6:13:44<9:31:54, 11.00s/it][2022-12-20 16:36:14,757] [INFO] [timer.py:197:stop] 0/3808, RunningAvgSamplesPerSec=5.868879766394498, CurrSamplesPerSec=5.345230728547926, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1882/5000 [6:13:55<9:32:02, 11.01s/it][2022-12-20 16:36:25,782] [INFO] [timer.py:197:stop] 0/3810, RunningAvgSamplesPerSec=5.868880605851189, CurrSamplesPerSec=5.34352189519481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1883/5000 [6:14:06<9:32:01, 11.01s/it][2022-12-20 16:36:36,783] [INFO] [timer.py:197:stop] 0/3812, RunningAvgSamplesPerSec=5.86888787766575, CurrSamplesPerSec=5.358793423001814, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1884/5000 [6:14:17<9:31:01, 11.00s/it][2022-12-20 16:36:47,727] [INFO] [timer.py:197:stop] 0/3814, RunningAvgSamplesPerSec=5.868911516982396, CurrSamplesPerSec=5.372213307296083, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1885/5000 [6:14:28<9:29:52, 10.98s/it][2022-12-20 16:36:58,664] [INFO] [timer.py:197:stop] 0/3816, RunningAvgSamplesPerSec=5.86893729650693, CurrSamplesPerSec=5.369476095965548, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1886/5000 [6:14:39<9:29:01, 10.96s/it][2022-12-20 16:37:08,748] [INFO] [timer.py:197:stop] 0/3818, RunningAvgSamplesPerSec=5.8692004493378125, CurrSamplesPerSec=6.248256142802676, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 16:37:19,733] [INFO] [logging.py:68:log_dist] [Rank 0] step=1910, skipped=3, lr=[6.875555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:37:19,734] [INFO] [timer.py:197:stop] 0/3820, RunningAvgSamplesPerSec=5.869211360605502, CurrSamplesPerSec=5.316808140312871, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1887/5000 [6:14:55<10:49:43, 12.52s/it][2022-12-20 16:37:30,715] [INFO] [timer.py:197:stop] 0/3822, RunningAvgSamplesPerSec=5.8692244754562966, CurrSamplesPerSec=5.361287168495422, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1888/5000 [6:15:06<10:25:31, 12.06s/it][2022-12-20 16:37:41,727] [INFO] [timer.py:197:stop] 0/3824, RunningAvgSamplesPerSec=5.869228957885405, CurrSamplesPerSec=5.316804138605116, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1889/5000 [6:15:17<10:09:00, 11.75s/it][2022-12-20 16:37:52,701] [INFO] [timer.py:197:stop] 0/3826, RunningAvgSamplesPerSec=5.869243675413664, CurrSamplesPerSec=5.348302990836642, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1890/5000 [6:15:28<9:56:49, 11.51s/it] [2022-12-20 16:38:03,784] [INFO] [timer.py:197:stop] 0/3828, RunningAvgSamplesPerSec=5.869227979587994, CurrSamplesPerSec=5.286715545388966, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1891/5000 [6:15:39<9:49:55, 11.38s/it][2022-12-20 16:38:14,821] [INFO] [timer.py:197:stop] 0/3830, RunningAvgSamplesPerSec=5.869225616368315, CurrSamplesPerSec=5.3182169050599795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1892/5000 [6:15:50<9:44:19, 11.28s/it][2022-12-20 16:38:25,878] [INFO] [timer.py:197:stop] 0/3832, RunningAvgSamplesPerSec=5.86921717053297, CurrSamplesPerSec=5.318021988820157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1893/5000 [6:16:01<9:40:40, 11.21s/it][2022-12-20 16:38:36,883] [INFO] [timer.py:197:stop] 0/3834, RunningAvgSamplesPerSec=5.869223571325087, CurrSamplesPerSec=5.3388340722982335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1894/5000 [6:16:12<9:37:14, 11.15s/it][2022-12-20 16:38:47,958] [INFO] [timer.py:197:stop] 0/3836, RunningAvgSamplesPerSec=5.869210283027008, CurrSamplesPerSec=5.269363905321959, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1895/5000 [6:16:23<9:35:53, 11.13s/it][2022-12-20 16:38:58,908] [INFO] [timer.py:197:stop] 0/3838, RunningAvgSamplesPerSec=5.869232230158236, CurrSamplesPerSec=5.350949959199412, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1896/5000 [6:16:34<9:32:55, 11.07s/it][2022-12-20 16:39:09,961] [INFO] [logging.py:68:log_dist] [Rank 0] step=1920, skipped=3, lr=[6.853333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:39:09,963] [INFO] [timer.py:197:stop] 0/3840, RunningAvgSamplesPerSec=5.869224439634074, CurrSamplesPerSec=5.306866296574155, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1897/5000 [6:16:45<9:32:26, 11.07s/it][2022-12-20 16:39:20,990] [INFO] [timer.py:197:stop] 0/3842, RunningAvgSamplesPerSec=5.869224567054846, CurrSamplesPerSec=5.333898604217269, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1898/5000 [6:16:56<9:31:36, 11.06s/it][2022-12-20 16:39:31,991] [INFO] [timer.py:197:stop] 0/3844, RunningAvgSamplesPerSec=5.869231945721757, CurrSamplesPerSec=5.32162318519249, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1899/5000 [6:17:07<9:30:34, 11.04s/it][2022-12-20 16:39:42,964] [INFO] [timer.py:197:stop] 0/3846, RunningAvgSamplesPerSec=5.869247350035886, CurrSamplesPerSec=5.34918331708863, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1900/5000 [6:17:18<9:29:21, 11.02s/it] {'loss': 0.0002, 'learning_rate': 6.846666666666667e-06, 'epoch': 46.34} + 38%|███▊ | 1900/5000 [6:17:18<9:29:21, 11.02s/it][2022-12-20 16:39:53,976] [INFO] [timer.py:197:stop] 0/3848, RunningAvgSamplesPerSec=5.869252167410342, CurrSamplesPerSec=5.308627769663831, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1901/5000 [6:17:29<9:29:03, 11.02s/it][2022-12-20 16:40:04,999] [INFO] [timer.py:197:stop] 0/3850, RunningAvgSamplesPerSec=5.869253645555402, CurrSamplesPerSec=5.323418539747536, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1902/5000 [6:17:40<9:28:57, 11.02s/it][2022-12-20 16:40:15,973] [INFO] [timer.py:197:stop] 0/3852, RunningAvgSamplesPerSec=5.869268764376935, CurrSamplesPerSec=5.359081209063046, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1903/5000 [6:17:51<9:28:03, 11.01s/it][2022-12-20 16:40:26,946] [INFO] [timer.py:197:stop] 0/3854, RunningAvgSamplesPerSec=5.869283832861956, CurrSamplesPerSec=5.359023221429588, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1904/5000 [6:18:02<9:27:22, 11.00s/it][2022-12-20 16:40:37,969] [INFO] [timer.py:197:stop] 0/3856, RunningAvgSamplesPerSec=5.869285350627309, CurrSamplesPerSec=5.336374956861386, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1905/5000 [6:18:13<9:27:37, 11.00s/it][2022-12-20 16:40:48,982] [INFO] [timer.py:197:stop] 0/3858, RunningAvgSamplesPerSec=5.869289213648897, CurrSamplesPerSec=5.308052097779914, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1906/5000 [6:18:24<9:27:34, 11.01s/it][2022-12-20 16:40:59,941] [INFO] [logging.py:68:log_dist] [Rank 0] step=1930, skipped=3, lr=[6.831111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:40:59,943] [INFO] [timer.py:197:stop] 0/3860, RunningAvgSamplesPerSec=5.869307706769644, CurrSamplesPerSec=5.353391567774957, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1907/5000 [6:18:35<9:26:41, 10.99s/it][2022-12-20 16:41:10,899] [INFO] [timer.py:197:stop] 0/3862, RunningAvgSamplesPerSec=5.869327569959282, CurrSamplesPerSec=5.361225064259297, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1908/5000 [6:18:46<9:25:55, 10.98s/it][2022-12-20 16:41:21,895] [INFO] [timer.py:197:stop] 0/3864, RunningAvgSamplesPerSec=5.869336043827087, CurrSamplesPerSec=5.313138578555798, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1909/5000 [6:18:57<9:25:58, 10.99s/it][2022-12-20 16:41:32,914] [INFO] [timer.py:197:stop] 0/3866, RunningAvgSamplesPerSec=5.869342909431282, CurrSamplesPerSec=5.322120976054332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1910/5000 [6:19:08<9:26:18, 11.00s/it][2022-12-20 16:41:43,892] [INFO] [timer.py:197:stop] 0/3868, RunningAvgSamplesPerSec=5.8693570598635665, CurrSamplesPerSec=5.341944907623823, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1911/5000 [6:19:19<9:25:49, 10.99s/it][2022-12-20 16:41:54,890] [INFO] [timer.py:197:stop] 0/3870, RunningAvgSamplesPerSec=5.8693651980584844, CurrSamplesPerSec=5.325207077368411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1912/5000 [6:19:30<9:25:45, 10.99s/it][2022-12-20 16:42:05,881] [INFO] [timer.py:197:stop] 0/3872, RunningAvgSamplesPerSec=5.86937528936078, CurrSamplesPerSec=5.364937139995612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1913/5000 [6:19:41<9:25:33, 10.99s/it][2022-12-20 16:42:16,906] [INFO] [timer.py:197:stop] 0/3874, RunningAvgSamplesPerSec=5.869375984128186, CurrSamplesPerSec=5.329702307936765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1914/5000 [6:19:52<9:25:52, 11.00s/it][2022-12-20 16:42:27,887] [INFO] [timer.py:197:stop] 0/3876, RunningAvgSamplesPerSec=5.869388822495246, CurrSamplesPerSec=5.339384366756276, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1915/5000 [6:20:03<9:25:21, 11.00s/it][2022-12-20 16:42:38,829] [INFO] [timer.py:197:stop] 0/3878, RunningAvgSamplesPerSec=5.869412413194206, CurrSamplesPerSec=5.3402938436812795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1916/5000 [6:20:14<9:24:21, 10.98s/it][2022-12-20 16:42:49,879] [INFO] [logging.py:68:log_dist] [Rank 0] step=1940, skipped=3, lr=[6.80888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:42:49,881] [INFO] [timer.py:197:stop] 0/3880, RunningAvgSamplesPerSec=5.869405436560416, CurrSamplesPerSec=5.298367986609769, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1917/5000 [6:20:25<9:25:17, 11.00s/it][2022-12-20 16:43:00,901] [INFO] [timer.py:197:stop] 0/3882, RunningAvgSamplesPerSec=5.8694072961784345, CurrSamplesPerSec=5.335487387651544, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1918/5000 [6:20:36<9:25:23, 11.01s/it][2022-12-20 16:43:11,947] [INFO] [timer.py:197:stop] 0/3884, RunningAvgSamplesPerSec=5.869402042839468, CurrSamplesPerSec=5.307456402552905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1919/5000 [6:20:47<9:25:48, 11.02s/it][2022-12-20 16:43:22,963] [INFO] [timer.py:197:stop] 0/3886, RunningAvgSamplesPerSec=5.869405125708295, CurrSamplesPerSec=5.307123769780808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1920/5000 [6:20:58<9:25:35, 11.02s/it][2022-12-20 16:43:34,045] [INFO] [timer.py:197:stop] 0/3888, RunningAvgSamplesPerSec=5.869391301553758, CurrSamplesPerSec=5.276328210939081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1921/5000 [6:21:09<9:26:23, 11.04s/it][2022-12-20 16:43:45,150] [INFO] [timer.py:197:stop] 0/3890, RunningAvgSamplesPerSec=5.869369935923021, CurrSamplesPerSec=5.2670712880852255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1922/5000 [6:21:20<9:27:14, 11.06s/it][2022-12-20 16:43:56,273] [INFO] [timer.py:197:stop] 0/3892, RunningAvgSamplesPerSec=5.869344162309848, CurrSamplesPerSec=5.249545282025271, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1923/5000 [6:21:31<9:28:04, 11.08s/it][2022-12-20 16:44:07,263] [INFO] [timer.py:197:stop] 0/3894, RunningAvgSamplesPerSec=5.869354858142882, CurrSamplesPerSec=5.352553827934376, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1924/5000 [6:21:42<9:26:32, 11.05s/it][2022-12-20 16:44:18,300] [INFO] [timer.py:197:stop] 0/3896, RunningAvgSamplesPerSec=5.869352430820795, CurrSamplesPerSec=5.3142569054734805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 38%|███▊ | 1925/5000 [6:21:53<9:26:08, 11.05s/it] {'loss': 0.0002, 'learning_rate': 6.7911111111111115e-06, 'epoch': 46.94} + 38%|███▊ | 1925/5000 [6:21:53<9:26:08, 11.05s/it][2022-12-20 16:44:29,342] [INFO] [timer.py:197:stop] 0/3898, RunningAvgSamplesPerSec=5.86934956407227, CurrSamplesPerSec=5.329271445449385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1926/5000 [6:22:04<9:25:53, 11.05s/it][2022-12-20 16:44:40,396] [INFO] [logging.py:68:log_dist] [Rank 0] step=1950, skipped=3, lr=[6.786666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:44:40,397] [INFO] [timer.py:197:stop] 0/3900, RunningAvgSamplesPerSec=5.8693419947365815, CurrSamplesPerSec=5.282200618872502, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1927/5000 [6:22:15<9:25:51, 11.05s/it][2022-12-20 16:44:50,510] [INFO] [timer.py:197:stop] 0/3902, RunningAvgSamplesPerSec=5.869591892280097, CurrSamplesPerSec=5.318891952461886, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1928/5000 [6:22:31<10:27:46, 12.26s/it][2022-12-20 16:45:01,546] [INFO] [timer.py:197:stop] 0/3904, RunningAvgSamplesPerSec=5.869593820351264, CurrSamplesPerSec=5.338881430115715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1929/5000 [6:22:42<10:08:25, 11.89s/it][2022-12-20 16:45:12,603] [INFO] [timer.py:197:stop] 0/3906, RunningAvgSamplesPerSec=5.869586007882187, CurrSamplesPerSec=5.30195873203379, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1930/5000 [6:22:53<9:54:47, 11.62s/it] [2022-12-20 16:45:23,582] [INFO] [timer.py:197:stop] 0/3908, RunningAvgSamplesPerSec=5.86959881910179, CurrSamplesPerSec=5.329788234819466, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1931/5000 [6:23:04<9:45:10, 11.44s/it][2022-12-20 16:45:34,643] [INFO] [timer.py:197:stop] 0/3910, RunningAvgSamplesPerSec=5.869589249658905, CurrSamplesPerSec=5.285602117931175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1932/5000 [6:23:15<9:39:38, 11.34s/it][2022-12-20 16:45:45,642] [INFO] [timer.py:197:stop] 0/3912, RunningAvgSamplesPerSec=5.869596773633829, CurrSamplesPerSec=5.367759885341876, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1933/5000 [6:23:26<9:34:01, 11.23s/it][2022-12-20 16:45:56,700] [INFO] [timer.py:197:stop] 0/3914, RunningAvgSamplesPerSec=5.869587751762808, CurrSamplesPerSec=5.300550402484532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1934/5000 [6:23:37<9:31:25, 11.18s/it][2022-12-20 16:46:07,761] [INFO] [timer.py:197:stop] 0/3916, RunningAvgSamplesPerSec=5.869577869459803, CurrSamplesPerSec=5.309664167748668, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1935/5000 [6:23:48<9:29:19, 11.14s/it][2022-12-20 16:46:18,780] [INFO] [timer.py:197:stop] 0/3918, RunningAvgSamplesPerSec=5.869579894212139, CurrSamplesPerSec=5.344679654062521, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1936/5000 [6:23:59<9:27:11, 11.11s/it][2022-12-20 16:46:29,819] [INFO] [logging.py:68:log_dist] [Rank 0] step=1960, skipped=3, lr=[6.764444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:46:29,821] [INFO] [timer.py:197:stop] 0/3920, RunningAvgSamplesPerSec=5.8695752995091395, CurrSamplesPerSec=5.323166238926243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▊ | 1937/5000 [6:24:10<9:25:55, 11.09s/it][2022-12-20 16:46:40,871] [INFO] [timer.py:197:stop] 0/3922, RunningAvgSamplesPerSec=5.869568734523539, CurrSamplesPerSec=5.312787568104147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1938/5000 [6:24:21<9:25:21, 11.08s/it][2022-12-20 16:46:51,886] [INFO] [timer.py:197:stop] 0/3924, RunningAvgSamplesPerSec=5.869571431539732, CurrSamplesPerSec=5.3528223714273935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1939/5000 [6:24:32<9:23:46, 11.05s/it][2022-12-20 16:47:02,908] [INFO] [timer.py:197:stop] 0/3926, RunningAvgSamplesPerSec=5.869572475989458, CurrSamplesPerSec=5.3214356145743675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1940/5000 [6:24:43<9:23:15, 11.04s/it][2022-12-20 16:47:13,934] [INFO] [timer.py:197:stop] 0/3928, RunningAvgSamplesPerSec=5.86957576868585, CurrSamplesPerSec=5.334026426729179, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1941/5000 [6:24:54<9:22:10, 11.03s/it][2022-12-20 16:47:24,944] [INFO] [timer.py:197:stop] 0/3930, RunningAvgSamplesPerSec=5.869580090139969, CurrSamplesPerSec=5.302932185522639, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1942/5000 [6:25:05<9:21:50, 11.02s/it][2022-12-20 16:47:35,916] [INFO] [timer.py:197:stop] 0/3932, RunningAvgSamplesPerSec=5.86959479072205, CurrSamplesPerSec=5.342879077955207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1943/5000 [6:25:16<9:20:48, 11.01s/it][2022-12-20 16:47:46,911] [INFO] [timer.py:197:stop] 0/3934, RunningAvgSamplesPerSec=5.869603626291639, CurrSamplesPerSec=5.319073230714407, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1944/5000 [6:25:27<9:20:58, 11.01s/it][2022-12-20 16:47:58,016] [INFO] [timer.py:197:stop] 0/3936, RunningAvgSamplesPerSec=5.869582091734974, CurrSamplesPerSec=5.253546135170918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1945/5000 [6:25:38<9:22:14, 11.04s/it][2022-12-20 16:48:09,019] [INFO] [timer.py:197:stop] 0/3938, RunningAvgSamplesPerSec=5.869592221354327, CurrSamplesPerSec=5.349160505947804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1946/5000 [6:25:49<9:21:15, 11.03s/it][2022-12-20 16:48:20,121] [INFO] [logging.py:68:log_dist] [Rank 0] step=1970, skipped=3, lr=[6.742222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:48:20,122] [INFO] [timer.py:197:stop] 0/3940, RunningAvgSamplesPerSec=5.8695713544378565, CurrSamplesPerSec=5.24730946709312, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1947/5000 [6:26:00<9:22:19, 11.05s/it][2022-12-20 16:48:31,168] [INFO] [timer.py:197:stop] 0/3942, RunningAvgSamplesPerSec=5.869566223596272, CurrSamplesPerSec=5.301972974096133, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1948/5000 [6:26:11<9:22:29, 11.06s/it][2022-12-20 16:48:42,202] [INFO] [timer.py:197:stop] 0/3944, RunningAvgSamplesPerSec=5.869568570836033, CurrSamplesPerSec=5.336716783693784, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1949/5000 [6:26:22<9:22:48, 11.07s/it][2022-12-20 16:48:53,381] [INFO] [timer.py:197:stop] 0/3946, RunningAvgSamplesPerSec=5.869546987859969, CurrSamplesPerSec=5.31634482470698, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1950/5000 [6:26:33<9:22:39, 11.07s/it] {'loss': 0.0002, 'learning_rate': 6.735555555555556e-06, 'epoch': 47.55} + 39%|███▉ | 1950/5000 [6:26:33<9:22:39, 11.07s/it][2022-12-20 16:49:04,422] [INFO] [timer.py:197:stop] 0/3948, RunningAvgSamplesPerSec=5.869543442685055, CurrSamplesPerSec=5.287364914245699, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1951/5000 [6:26:44<9:22:15, 11.06s/it][2022-12-20 16:49:15,432] [INFO] [timer.py:197:stop] 0/3950, RunningAvgSamplesPerSec=5.869552740584047, CurrSamplesPerSec=5.325567338062437, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1952/5000 [6:26:55<9:21:17, 11.05s/it][2022-12-20 16:49:26,460] [INFO] [timer.py:197:stop] 0/3952, RunningAvgSamplesPerSec=5.86955243940339, CurrSamplesPerSec=5.311936418424618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1953/5000 [6:27:06<9:20:49, 11.04s/it][2022-12-20 16:49:37,442] [INFO] [timer.py:197:stop] 0/3954, RunningAvgSamplesPerSec=5.869564487154895, CurrSamplesPerSec=5.35502894884612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1954/5000 [6:27:17<9:19:43, 11.03s/it][2022-12-20 16:49:48,450] [INFO] [timer.py:197:stop] 0/3956, RunningAvgSamplesPerSec=5.869574362158543, CurrSamplesPerSec=5.334361168436438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1955/5000 [6:27:28<9:18:41, 11.01s/it][2022-12-20 16:49:59,405] [INFO] [timer.py:197:stop] 0/3958, RunningAvgSamplesPerSec=5.869594160762788, CurrSamplesPerSec=5.347177959341796, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1956/5000 [6:27:39<9:18:24, 11.01s/it][2022-12-20 16:50:10,422] [INFO] [logging.py:68:log_dist] [Rank 0] step=1980, skipped=3, lr=[6.720000000000001e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:50:10,423] [INFO] [timer.py:197:stop] 0/3960, RunningAvgSamplesPerSec=5.869596322675665, CurrSamplesPerSec=5.331401465337975, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1957/5000 [6:27:50<9:18:40, 11.02s/it][2022-12-20 16:50:21,433] [INFO] [timer.py:197:stop] 0/3962, RunningAvgSamplesPerSec=5.869601262165809, CurrSamplesPerSec=5.356232526202764, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1958/5000 [6:28:01<9:18:11, 11.01s/it][2022-12-20 16:50:32,488] [INFO] [timer.py:197:stop] 0/3964, RunningAvgSamplesPerSec=5.869593648334074, CurrSamplesPerSec=5.304033852812347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1959/5000 [6:28:13<9:18:49, 11.03s/it][2022-12-20 16:50:43,505] [INFO] [timer.py:197:stop] 0/3966, RunningAvgSamplesPerSec=5.86959648903757, CurrSamplesPerSec=5.344486836758444, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1960/5000 [6:28:24<9:18:16, 11.02s/it][2022-12-20 16:50:54,555] [INFO] [timer.py:197:stop] 0/3968, RunningAvgSamplesPerSec=5.869590105769698, CurrSamplesPerSec=5.302128175647129, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1961/5000 [6:28:35<9:18:03, 11.02s/it][2022-12-20 16:51:05,556] [INFO] [timer.py:197:stop] 0/3970, RunningAvgSamplesPerSec=5.869601622349099, CurrSamplesPerSec=5.328819495630727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1962/5000 [6:28:46<9:18:24, 11.03s/it][2022-12-20 16:51:16,577] [INFO] [timer.py:197:stop] 0/3972, RunningAvgSamplesPerSec=5.869603423984077, CurrSamplesPerSec=5.34412465025393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1963/5000 [6:28:57<9:17:25, 11.01s/it][2022-12-20 16:51:27,561] [INFO] [timer.py:197:stop] 0/3974, RunningAvgSamplesPerSec=5.869615143522958, CurrSamplesPerSec=5.336657157114371, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1964/5000 [6:29:08<9:18:03, 11.03s/it][2022-12-20 16:51:38,615] [INFO] [timer.py:197:stop] 0/3976, RunningAvgSamplesPerSec=5.869607727422724, CurrSamplesPerSec=5.347221204644711, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1965/5000 [6:29:19<9:17:27, 11.02s/it][2022-12-20 16:51:49,620] [INFO] [timer.py:197:stop] 0/3978, RunningAvgSamplesPerSec=5.869613895587286, CurrSamplesPerSec=5.344482793277385, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1966/5000 [6:29:30<9:17:12, 11.02s/it][2022-12-20 16:52:00,647] [INFO] [logging.py:68:log_dist] [Rank 0] step=1990, skipped=3, lr=[6.6977777777777785e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:52:00,649] [INFO] [timer.py:197:stop] 0/3980, RunningAvgSamplesPerSec=5.869613029615043, CurrSamplesPerSec=5.332084736214061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1967/5000 [6:29:41<9:17:16, 11.02s/it][2022-12-20 16:52:11,722] [INFO] [timer.py:197:stop] 0/3982, RunningAvgSamplesPerSec=5.869600435932706, CurrSamplesPerSec=5.299258103151714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1968/5000 [6:29:52<9:17:30, 11.03s/it][2022-12-20 16:52:21,935] [INFO] [timer.py:197:stop] 0/3984, RunningAvgSamplesPerSec=5.869817961050262, CurrSamplesPerSec=6.14320397814663, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-20 16:52:32,952] [INFO] [timer.py:197:stop] 0/3986, RunningAvgSamplesPerSec=5.869819792663639, CurrSamplesPerSec=5.305473814264637, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1969/5000 [6:30:08<10:36:50, 12.61s/it][2022-12-20 16:52:44,007] [INFO] [timer.py:197:stop] 0/3988, RunningAvgSamplesPerSec=5.869811964981253, CurrSamplesPerSec=5.30674480811317, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1970/5000 [6:30:19<10:13:08, 12.14s/it][2022-12-20 16:52:55,012] [INFO] [timer.py:197:stop] 0/3990, RunningAvgSamplesPerSec=5.869817733474977, CurrSamplesPerSec=5.32188019317856, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1971/5000 [6:30:30<9:55:43, 11.80s/it] [2022-12-20 16:53:06,047] [INFO] [timer.py:197:stop] 0/3992, RunningAvgSamplesPerSec=5.8698159209907566, CurrSamplesPerSec=5.302158965698852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1972/5000 [6:30:41<9:43:55, 11.57s/it][2022-12-20 16:53:17,069] [INFO] [timer.py:197:stop] 0/3994, RunningAvgSamplesPerSec=5.869817049151985, CurrSamplesPerSec=5.314862757244781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1973/5000 [6:30:52<9:35:26, 11.41s/it][2022-12-20 16:53:28,132] [INFO] [timer.py:197:stop] 0/3996, RunningAvgSamplesPerSec=5.869807233205918, CurrSamplesPerSec=5.306065708248419, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 39%|███▉ | 1974/5000 [6:31:03<9:30:03, 11.30s/it][2022-12-20 16:53:39,525] [INFO] [timer.py:197:stop] 0/3998, RunningAvgSamplesPerSec=5.869708698504063, CurrSamplesPerSec=5.027969838713019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1975/5000 [6:31:15<9:31:14, 11.33s/it] {'loss': 0.0002, 'learning_rate': 6.677777777777779e-06, 'epoch': 48.17} + 40%|███▉ | 1975/5000 [6:31:15<9:31:14, 11.33s/it][2022-12-20 16:53:50,575] [INFO] [logging.py:68:log_dist] [Rank 0] step=2000, skipped=3, lr=[6.675555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:53:50,577] [INFO] [timer.py:197:stop] 0/4000, RunningAvgSamplesPerSec=5.869712959722663, CurrSamplesPerSec=5.320165799181944, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1976/5000 [6:31:26<9:26:49, 11.25s/it][2022-12-20 16:54:01,622] [INFO] [timer.py:197:stop] 0/4002, RunningAvgSamplesPerSec=5.869707890884144, CurrSamplesPerSec=5.313586189414462, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1977/5000 [6:31:37<9:23:35, 11.19s/it][2022-12-20 16:54:12,689] [INFO] [timer.py:197:stop] 0/4004, RunningAvgSamplesPerSec=5.869697027754707, CurrSamplesPerSec=5.288194455662534, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1978/5000 [6:31:48<9:21:37, 11.15s/it][2022-12-20 16:54:23,765] [INFO] [timer.py:197:stop] 0/4006, RunningAvgSamplesPerSec=5.869687648830099, CurrSamplesPerSec=5.3106139770350635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1979/5000 [6:31:59<9:20:17, 11.13s/it][2022-12-20 16:54:34,803] [INFO] [timer.py:197:stop] 0/4008, RunningAvgSamplesPerSec=5.869689189018972, CurrSamplesPerSec=5.320263861331488, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1980/5000 [6:32:10<9:18:44, 11.10s/it][2022-12-20 16:54:45,962] [INFO] [timer.py:197:stop] 0/4010, RunningAvgSamplesPerSec=5.869654010771513, CurrSamplesPerSec=5.213757295916811, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1981/5000 [6:32:21<9:19:27, 11.12s/it][2022-12-20 16:54:56,988] [INFO] [timer.py:197:stop] 0/4012, RunningAvgSamplesPerSec=5.8696706573841855, CurrSamplesPerSec=5.345509608299978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1982/5000 [6:32:32<9:17:51, 11.09s/it][2022-12-20 16:55:08,008] [INFO] [timer.py:197:stop] 0/4014, RunningAvgSamplesPerSec=5.869672388766631, CurrSamplesPerSec=5.30914980331051, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1983/5000 [6:32:43<9:16:36, 11.07s/it][2022-12-20 16:55:19,149] [INFO] [timer.py:197:stop] 0/4016, RunningAvgSamplesPerSec=5.869641747035894, CurrSamplesPerSec=5.216913627561566, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1984/5000 [6:32:54<9:17:30, 11.09s/it][2022-12-20 16:55:30,160] [INFO] [timer.py:197:stop] 0/4018, RunningAvgSamplesPerSec=5.869645889132529, CurrSamplesPerSec=5.339104426646775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1985/5000 [6:33:05<9:16:07, 11.07s/it][2022-12-20 16:55:41,210] [INFO] [logging.py:68:log_dist] [Rank 0] step=2010, skipped=3, lr=[6.653333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:55:41,211] [INFO] [timer.py:197:stop] 0/4020, RunningAvgSamplesPerSec=5.869639377105949, CurrSamplesPerSec=5.321645973074997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1986/5000 [6:33:16<9:15:41, 11.06s/it][2022-12-20 16:55:52,276] [INFO] [timer.py:197:stop] 0/4022, RunningAvgSamplesPerSec=5.86962927610254, CurrSamplesPerSec=5.249277762322744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1987/5000 [6:33:27<9:15:33, 11.06s/it][2022-12-20 16:56:03,323] [INFO] [timer.py:197:stop] 0/4024, RunningAvgSamplesPerSec=5.869632974194579, CurrSamplesPerSec=5.303785062744144, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1988/5000 [6:33:38<9:15:07, 11.06s/it][2022-12-20 16:56:14,306] [INFO] [timer.py:197:stop] 0/4026, RunningAvgSamplesPerSec=5.869644635905818, CurrSamplesPerSec=5.339517762810007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1989/5000 [6:33:49<9:13:48, 11.04s/it][2022-12-20 16:56:25,471] [INFO] [timer.py:197:stop] 0/4028, RunningAvgSamplesPerSec=5.8696079325847865, CurrSamplesPerSec=5.183891165950706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1990/5000 [6:34:01<9:15:34, 11.07s/it][2022-12-20 16:56:36,505] [INFO] [timer.py:197:stop] 0/4030, RunningAvgSamplesPerSec=5.869617051723272, CurrSamplesPerSec=5.3520750847196945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1991/5000 [6:34:12<9:14:46, 11.06s/it][2022-12-20 16:56:47,521] [INFO] [timer.py:197:stop] 0/4032, RunningAvgSamplesPerSec=5.869619648895697, CurrSamplesPerSec=5.3190930455981045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1992/5000 [6:34:23<9:13:53, 11.05s/it][2022-12-20 16:56:58,802] [INFO] [timer.py:197:stop] 0/4034, RunningAvgSamplesPerSec=5.869551547210776, CurrSamplesPerSec=5.097142153163929, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1993/5000 [6:34:34<9:17:13, 11.12s/it][2022-12-20 16:57:09,875] [INFO] [timer.py:197:stop] 0/4036, RunningAvgSamplesPerSec=5.869554301741147, CurrSamplesPerSec=5.328986428840508, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1994/5000 [6:34:45<9:16:20, 11.10s/it][2022-12-20 16:57:20,889] [INFO] [timer.py:197:stop] 0/4038, RunningAvgSamplesPerSec=5.869557895466522, CurrSamplesPerSec=5.345931815233471, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1995/5000 [6:34:56<9:14:46, 11.08s/it][2022-12-20 16:57:32,254] [INFO] [logging.py:68:log_dist] [Rank 0] step=2020, skipped=3, lr=[6.631111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-20 16:57:32,255] [INFO] [timer.py:197:stop] 0/4040, RunningAvgSamplesPerSec=5.86946724439319, CurrSamplesPerSec=5.045618774681076, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1996/5000 [6:35:07<9:18:56, 11.16s/it][2022-12-20 16:57:43,330] [INFO] [timer.py:197:stop] 0/4042, RunningAvgSamplesPerSec=5.869454816246396, CurrSamplesPerSec=5.312724479471249, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1997/5000 [6:35:18<9:17:25, 11.14s/it][2022-12-20 16:57:54,315] [INFO] [timer.py:197:stop] 0/4044, RunningAvgSamplesPerSec=5.869466283782735, CurrSamplesPerSec=5.317001703790771, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1998/5000 [6:35:29<9:14:56, 11.09s/it][2022-12-20 16:58:05,347] [INFO] [timer.py:197:stop] 0/4046, RunningAvgSamplesPerSec=5.869465102326746, CurrSamplesPerSec=5.327271894477746, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|███▉ | 1999/5000 [6:35:40<9:13:51, 11.07s/it][2022-12-20 16:58:16,506] [INFO] [timer.py:197:stop] 0/4048, RunningAvgSamplesPerSec=5.869473178920971, CurrSamplesPerSec=5.321830393304674, MemAllocated=3.0GB, MaxMemAllocated=19.53GB + 40%|████ | 2000/5000 [6:35:52<9:14:57, 11.10s/it] {'loss': 0.0002, 'learning_rate': 6.6222222222222236e-06, 'epoch': 48.77} + 40%|████ | 2000/5000 [6:35:52<9:14:57, 11.10s/it][INFO|trainer.py:2955] 2022-12-20 16:58:16,513 >> ***** Running Evaluation ***** +[INFO|trainer.py:2957] 2022-12-20 16:58:16,513 >> Num examples = 1325 +[INFO|trainer.py:2960] 2022-12-20 16:58:16,513 >> Batch size = 8 + + 0%| | 0/166 [00:00> Saving model checkpoint to ./checkpoint-2000 +[INFO|configuration_utils.py:447] 2022-12-20 17:19:45,605 >> Configuration saved in ./checkpoint-2000/config.json +[INFO|modeling_utils.py:1680] 2022-12-20 17:19:49,244 >> Model weights saved in ./checkpoint-2000/pytorch_model.bin +[INFO|feature_extraction_utils.py:368] 2022-12-20 17:19:49,259 >> Feature extractor saved in ./checkpoint-2000/preprocessor_config.json +[2022-12-20 17:19:49,260] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step2024 is begin to save! +/home/milan/hf_env/lib/python3.8/site-packages/torch/nn/modules/module.py:1428: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details. + warnings.warn( +[2022-12-20 17:19:49,270] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-2000/global_step2024/mp_rank_00_model_states.pt +[2022-12-20 17:19:49,270] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-2000/global_step2024/mp_rank_00_model_states.pt... +[2022-12-20 17:19:52,982] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-2000/global_step2024/mp_rank_00_model_states.pt. +[2022-12-20 17:19:52,984] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-2000/global_step2024/zero_pp_rank_0_mp_rank_00_optim_states.pt... +[2022-12-20 17:20:08,330] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-2000/global_step2024/zero_pp_rank_0_mp_rank_00_optim_states.pt. +[2022-12-20 17:20:08,330] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-2000/global_step2024/zero_pp_rank_0_mp_rank_00_optim_states.pt +[2022-12-20 17:20:08,330] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2024 is ready now! +[INFO|feature_extraction_utils.py:368] 2022-12-20 17:20:13,382 >> Feature extractor saved in ./preprocessor_config.json