Namespace(aa='rand-m9-mstd0.5-inc1', auto_resume=True, batch_size=512, cfg_path='iFormer_t.yaml', clip_grad=None, color_jitter=0.4, crop_pct=None, cutmix=1.0, cutmix_minmax=None, data_path='imagenet', data_set='IMNET', device='cuda', disable_eval=False, dist_backend='nccl', dist_eval=True, dist_on_itp=False, dist_url='env://', distributed=True, drop_path=0, enable_wandb=False, epochs=300, eval=False, eval_data_path=None, finetune='', gpu=0, head_init_scale=1.0, imagenet_default_mean_and_std=True, input_size=224, layer_decay=1.0, layer_scale_init_value=0, local_rank=-1, log_dir=None, lr=0.004, min_lr=1e-06, mixup=0.8, mixup_mode='batch', mixup_prob=1.0, mixup_switch_prob=0.5, model='iFormer_t', model_ema=False, model_ema_decay=0.9999, model_ema_eval=False, model_ema_force_cpu=False, model_key='model|module', model_prefix='', momentum=0.9, nb_classes=1000, num_workers=16, opt='adamw', opt_betas=None, opt_eps=1e-08, output_dir='', pin_mem=True, project='iFormer', rank=0, recount=1, remode='pixel', reprob=0.25, resplit=False, resume='', save_ckpt=True, save_ckpt_freq=1, save_ckpt_num=3, seed=0, smoothing=0.1, start_epoch=0, train_interpolation='bicubic', update_freq=1, use_amp=False, wandb_ckpt=False, warmup_epochs=20, warmup_steps=-1, weight_decay=0.05, weight_decay_end=None, world_size=8) Transform = RandomResizedCropAndInterpolation(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BICUBIC) RandomHorizontalFlip(p=0.5) ToTensor() Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250])) --------------------------- reading from datapath imagenet Number of the class = 1000 Transform = Resize(size=256, interpolation=bicubic, max_size=None, antialias=warn) CenterCrop(size=(224, 224)) ToTensor() Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) --------------------------- reading from datapath imagenet Number of the class = 1000 Sampler_train = Mixup is activated! Model = iFormer( (downsample_layers): ModuleList( (0): Sequential( (0): Conv2d_BN( (c): Conv2d(3, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False) (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): EdgeResidual( (conv_exp_bn1): Conv2d_BN( (c): Conv2d(16, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (act): GELU(approximate='none') (conv_pwl_bn2): Conv2d_BN( (c): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) (1): Sequential( (0): Conv2d_BN( (c): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (2): Sequential( (0): Conv2d_BN( (c): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (3): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) (stages): ModuleList( (0): Sequential( (0): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(32, 32, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=32, bias=False) (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(32, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(96, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (1): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(32, 32, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=32, bias=False) (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(32, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(96, 32, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) ) (1): Sequential( (0): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(64, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=64, bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (1): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(64, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=64, bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) ) (2): Sequential( (0): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (1): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (2): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (3): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (4): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (5): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (6): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (7): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (8): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (9): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (10): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (11): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (12): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (13): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (14): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (15): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128, bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) ) (3): Sequential( (0): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (1): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (2): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (3): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) (4): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): GAU2dv2( (q): Conv2d_BN( (c): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) (5): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) ) ) ) ) (classifier): Classfier( (classifier): BN_Linear( (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (l): Linear(in_features=256, out_features=1000, bias=True) ) ) ) number of params: 2886456 LR = 0.00400000 Batch size = 4096 Update frequent = 1 Number of training examples = 1281167 Number of training training per epoch = 312 Param groups = { "decay": { "weight_decay": 0.05, "params": [ "downsample_layers.0.0.c.weight", "downsample_layers.0.2.conv_exp_bn1.c.weight", "downsample_layers.0.2.conv_pwl_bn2.c.weight", "downsample_layers.1.0.c.weight", "downsample_layers.2.0.c.weight", "downsample_layers.3.0.c.weight", "stages.0.0.block.token_channel_mixer.m.0.c.weight", "stages.0.0.block.token_channel_mixer.m.1.c.weight", "stages.0.0.block.token_channel_mixer.m.3.c.weight", "stages.0.1.block.token_channel_mixer.m.0.c.weight", "stages.0.1.block.token_channel_mixer.m.1.c.weight", "stages.0.1.block.token_channel_mixer.m.3.c.weight", "stages.1.0.block.token_channel_mixer.m.0.c.weight", "stages.1.0.block.token_channel_mixer.m.1.c.weight", "stages.1.0.block.token_channel_mixer.m.3.c.weight", "stages.1.1.block.token_channel_mixer.m.0.c.weight", "stages.1.1.block.token_channel_mixer.m.1.c.weight", "stages.1.1.block.token_channel_mixer.m.3.c.weight", "stages.2.0.block.token_channel_mixer.m.0.c.weight", "stages.2.0.block.token_channel_mixer.m.1.c.weight", "stages.2.0.block.token_channel_mixer.m.3.c.weight", "stages.2.1.block.token_channel_mixer.m.0.c.weight", "stages.2.1.block.token_channel_mixer.m.1.c.weight", "stages.2.1.block.token_channel_mixer.m.3.c.weight", "stages.2.2.block.token_channel_mixer.m.0.c.weight", "stages.2.2.block.token_channel_mixer.m.1.c.weight", "stages.2.2.block.token_channel_mixer.m.3.c.weight", "stages.2.3.block.token_channel_mixer.m.0.c.weight", "stages.2.3.block.token_channel_mixer.m.1.c.weight", "stages.2.3.block.token_channel_mixer.m.3.c.weight", "stages.2.4.block.token_channel_mixer.m.0.c.weight", "stages.2.4.block.token_channel_mixer.m.1.c.weight", "stages.2.4.block.token_channel_mixer.m.3.c.weight", "stages.2.5.block.token_channel_mixer.m.0.c.weight", "stages.2.5.block.token_channel_mixer.m.1.c.weight", "stages.2.5.block.token_channel_mixer.m.3.c.weight", "stages.2.6.block.cpe.m.c.weight", "stages.2.7.block.token_channel_mixer.m.q.c.weight", "stages.2.7.block.token_channel_mixer.m.k.c.weight", "stages.2.7.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.7.block.token_channel_mixer.m.proj.c.weight", "stages.2.8.block.channel_mixer.m.0.c.weight", "stages.2.8.block.channel_mixer.m.2.c.weight", "stages.2.9.block.cpe.m.c.weight", "stages.2.10.block.token_channel_mixer.m.q.c.weight", "stages.2.10.block.token_channel_mixer.m.k.c.weight", "stages.2.10.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.10.block.token_channel_mixer.m.proj.c.weight", "stages.2.11.block.channel_mixer.m.0.c.weight", "stages.2.11.block.channel_mixer.m.2.c.weight", "stages.2.12.block.cpe.m.c.weight", "stages.2.13.block.token_channel_mixer.m.q.c.weight", "stages.2.13.block.token_channel_mixer.m.k.c.weight", "stages.2.13.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.13.block.token_channel_mixer.m.proj.c.weight", "stages.2.14.block.channel_mixer.m.0.c.weight", "stages.2.14.block.channel_mixer.m.2.c.weight", "stages.2.15.block.token_channel_mixer.m.0.c.weight", "stages.2.15.block.token_channel_mixer.m.1.c.weight", "stages.2.15.block.token_channel_mixer.m.3.c.weight", "stages.3.0.block.cpe.m.c.weight", "stages.3.1.block.token_channel_mixer.m.q.c.weight", "stages.3.1.block.token_channel_mixer.m.k.c.weight", "stages.3.1.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.1.block.token_channel_mixer.m.proj.c.weight", "stages.3.2.block.channel_mixer.m.0.c.weight", "stages.3.2.block.channel_mixer.m.2.c.weight", "stages.3.3.block.cpe.m.c.weight", "stages.3.4.block.token_channel_mixer.m.q.c.weight", "stages.3.4.block.token_channel_mixer.m.k.c.weight", "stages.3.4.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.4.block.token_channel_mixer.m.proj.c.weight", "stages.3.5.block.channel_mixer.m.0.c.weight", "stages.3.5.block.channel_mixer.m.2.c.weight", "classifier.classifier.l.weight" ], "lr_scale": 1.0 }, "no_decay": { "weight_decay": 0.0, "params": [ "downsample_layers.0.0.bn.weight", "downsample_layers.0.0.bn.bias", "downsample_layers.0.2.conv_exp_bn1.bn.weight", "downsample_layers.0.2.conv_exp_bn1.bn.bias", "downsample_layers.0.2.conv_pwl_bn2.bn.weight", "downsample_layers.0.2.conv_pwl_bn2.bn.bias", "downsample_layers.1.0.bn.weight", "downsample_layers.1.0.bn.bias", "downsample_layers.2.0.bn.weight", "downsample_layers.2.0.bn.bias", "downsample_layers.3.0.bn.weight", "downsample_layers.3.0.bn.bias", "stages.0.0.block.token_channel_mixer.m.0.bn.weight", "stages.0.0.block.token_channel_mixer.m.0.bn.bias", "stages.0.0.block.token_channel_mixer.m.1.bn.weight", "stages.0.0.block.token_channel_mixer.m.1.bn.bias", "stages.0.0.block.token_channel_mixer.m.3.bn.weight", "stages.0.0.block.token_channel_mixer.m.3.bn.bias", "stages.0.1.block.token_channel_mixer.m.0.bn.weight", "stages.0.1.block.token_channel_mixer.m.0.bn.bias", "stages.0.1.block.token_channel_mixer.m.1.bn.weight", "stages.0.1.block.token_channel_mixer.m.1.bn.bias", "stages.0.1.block.token_channel_mixer.m.3.bn.weight", "stages.0.1.block.token_channel_mixer.m.3.bn.bias", "stages.1.0.block.token_channel_mixer.m.0.bn.weight", "stages.1.0.block.token_channel_mixer.m.0.bn.bias", "stages.1.0.block.token_channel_mixer.m.1.bn.weight", "stages.1.0.block.token_channel_mixer.m.1.bn.bias", "stages.1.0.block.token_channel_mixer.m.3.bn.weight", "stages.1.0.block.token_channel_mixer.m.3.bn.bias", "stages.1.1.block.token_channel_mixer.m.0.bn.weight", "stages.1.1.block.token_channel_mixer.m.0.bn.bias", "stages.1.1.block.token_channel_mixer.m.1.bn.weight", "stages.1.1.block.token_channel_mixer.m.1.bn.bias", "stages.1.1.block.token_channel_mixer.m.3.bn.weight", "stages.1.1.block.token_channel_mixer.m.3.bn.bias", "stages.2.0.block.token_channel_mixer.m.0.bn.weight", "stages.2.0.block.token_channel_mixer.m.0.bn.bias", "stages.2.0.block.token_channel_mixer.m.1.bn.weight", "stages.2.0.block.token_channel_mixer.m.1.bn.bias", "stages.2.0.block.token_channel_mixer.m.3.bn.weight", "stages.2.0.block.token_channel_mixer.m.3.bn.bias", "stages.2.1.block.token_channel_mixer.m.0.bn.weight", "stages.2.1.block.token_channel_mixer.m.0.bn.bias", "stages.2.1.block.token_channel_mixer.m.1.bn.weight", "stages.2.1.block.token_channel_mixer.m.1.bn.bias", "stages.2.1.block.token_channel_mixer.m.3.bn.weight", "stages.2.1.block.token_channel_mixer.m.3.bn.bias", "stages.2.2.block.token_channel_mixer.m.0.bn.weight", "stages.2.2.block.token_channel_mixer.m.0.bn.bias", "stages.2.2.block.token_channel_mixer.m.1.bn.weight", "stages.2.2.block.token_channel_mixer.m.1.bn.bias", "stages.2.2.block.token_channel_mixer.m.3.bn.weight", "stages.2.2.block.token_channel_mixer.m.3.bn.bias", "stages.2.3.block.token_channel_mixer.m.0.bn.weight", "stages.2.3.block.token_channel_mixer.m.0.bn.bias", "stages.2.3.block.token_channel_mixer.m.1.bn.weight", "stages.2.3.block.token_channel_mixer.m.1.bn.bias", "stages.2.3.block.token_channel_mixer.m.3.bn.weight", "stages.2.3.block.token_channel_mixer.m.3.bn.bias", "stages.2.4.block.token_channel_mixer.m.0.bn.weight", "stages.2.4.block.token_channel_mixer.m.0.bn.bias", "stages.2.4.block.token_channel_mixer.m.1.bn.weight", "stages.2.4.block.token_channel_mixer.m.1.bn.bias", "stages.2.4.block.token_channel_mixer.m.3.bn.weight", "stages.2.4.block.token_channel_mixer.m.3.bn.bias", "stages.2.5.block.token_channel_mixer.m.0.bn.weight", "stages.2.5.block.token_channel_mixer.m.0.bn.bias", "stages.2.5.block.token_channel_mixer.m.1.bn.weight", "stages.2.5.block.token_channel_mixer.m.1.bn.bias", "stages.2.5.block.token_channel_mixer.m.3.bn.weight", "stages.2.5.block.token_channel_mixer.m.3.bn.bias", "stages.2.6.block.cpe.m.bn.weight", "stages.2.6.block.cpe.m.bn.bias", "stages.2.7.block.token_channel_mixer.m.q.bn.weight", "stages.2.7.block.token_channel_mixer.m.q.bn.bias", "stages.2.7.block.token_channel_mixer.m.k.bn.weight", "stages.2.7.block.token_channel_mixer.m.k.bn.bias", "stages.2.7.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.7.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.7.block.token_channel_mixer.m.proj.bn.weight", "stages.2.7.block.token_channel_mixer.m.proj.bn.bias", "stages.2.8.block.channel_mixer.m.0.bn.weight", "stages.2.8.block.channel_mixer.m.0.bn.bias", "stages.2.8.block.channel_mixer.m.2.bn.weight", "stages.2.8.block.channel_mixer.m.2.bn.bias", "stages.2.9.block.cpe.m.bn.weight", "stages.2.9.block.cpe.m.bn.bias", "stages.2.10.block.token_channel_mixer.m.q.bn.weight", "stages.2.10.block.token_channel_mixer.m.q.bn.bias", "stages.2.10.block.token_channel_mixer.m.k.bn.weight", "stages.2.10.block.token_channel_mixer.m.k.bn.bias", "stages.2.10.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.10.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.10.block.token_channel_mixer.m.proj.bn.weight", "stages.2.10.block.token_channel_mixer.m.proj.bn.bias", "stages.2.11.block.channel_mixer.m.0.bn.weight", "stages.2.11.block.channel_mixer.m.0.bn.bias", "stages.2.11.block.channel_mixer.m.2.bn.weight", "stages.2.11.block.channel_mixer.m.2.bn.bias", "stages.2.12.block.cpe.m.bn.weight", "stages.2.12.block.cpe.m.bn.bias", "stages.2.13.block.token_channel_mixer.m.q.bn.weight", "stages.2.13.block.token_channel_mixer.m.q.bn.bias", "stages.2.13.block.token_channel_mixer.m.k.bn.weight", "stages.2.13.block.token_channel_mixer.m.k.bn.bias", "stages.2.13.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.13.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.13.block.token_channel_mixer.m.proj.bn.weight", "stages.2.13.block.token_channel_mixer.m.proj.bn.bias", "stages.2.14.block.channel_mixer.m.0.bn.weight", "stages.2.14.block.channel_mixer.m.0.bn.bias", "stages.2.14.block.channel_mixer.m.2.bn.weight", "stages.2.14.block.channel_mixer.m.2.bn.bias", "stages.2.15.block.token_channel_mixer.m.0.bn.weight", "stages.2.15.block.token_channel_mixer.m.0.bn.bias", "stages.2.15.block.token_channel_mixer.m.1.bn.weight", "stages.2.15.block.token_channel_mixer.m.1.bn.bias", "stages.2.15.block.token_channel_mixer.m.3.bn.weight", "stages.2.15.block.token_channel_mixer.m.3.bn.bias", "stages.3.0.block.cpe.m.bn.weight", "stages.3.0.block.cpe.m.bn.bias", "stages.3.1.block.token_channel_mixer.m.q.bn.weight", "stages.3.1.block.token_channel_mixer.m.q.bn.bias", "stages.3.1.block.token_channel_mixer.m.k.bn.weight", "stages.3.1.block.token_channel_mixer.m.k.bn.bias", "stages.3.1.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.1.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.1.block.token_channel_mixer.m.proj.bn.weight", "stages.3.1.block.token_channel_mixer.m.proj.bn.bias", "stages.3.2.block.channel_mixer.m.0.bn.weight", "stages.3.2.block.channel_mixer.m.0.bn.bias", "stages.3.2.block.channel_mixer.m.2.bn.weight", "stages.3.2.block.channel_mixer.m.2.bn.bias", "stages.3.3.block.cpe.m.bn.weight", "stages.3.3.block.cpe.m.bn.bias", "stages.3.4.block.token_channel_mixer.m.q.bn.weight", "stages.3.4.block.token_channel_mixer.m.q.bn.bias", "stages.3.4.block.token_channel_mixer.m.k.bn.weight", "stages.3.4.block.token_channel_mixer.m.k.bn.bias", "stages.3.4.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.4.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.4.block.token_channel_mixer.m.proj.bn.weight", "stages.3.4.block.token_channel_mixer.m.proj.bn.bias", "stages.3.5.block.channel_mixer.m.0.bn.weight", "stages.3.5.block.channel_mixer.m.0.bn.bias", "stages.3.5.block.channel_mixer.m.2.bn.weight", "stages.3.5.block.channel_mixer.m.2.bn.bias", "classifier.classifier.bn.weight", "classifier.classifier.bn.bias", "classifier.classifier.l.bias" ], "lr_scale": 1.0 } } Use Cosine LR scheduler Set warmup steps = 6240 Set warmup steps = 0 Max WD = 0.0500000, Min WD = 0.0500000 criterion = SoftTargetCrossEntropy() Auto resume checkpoint: ./checkpoint-184.pth Resume checkpoint ./checkpoint-184.pth With optim & sched! Start training for 300 epochs Epoch: [185] [ 0/312] eta: 3:28:39 lr: 0.001447 min_lr: 0.001447 loss: 4.7776 (4.7776) weight_decay: 0.0500 (0.0500) time: 40.1251 data: 21.4799 max mem: 21002 Epoch: [185] [ 10/312] eta: 0:20:32 lr: 0.001446 min_lr: 0.001446 loss: 4.3197 (4.2576) weight_decay: 0.0500 (0.0500) time: 4.0813 data: 1.9532 max mem: 21002 Epoch: [185] [ 20/312] eta: 0:11:02 lr: 0.001446 min_lr: 0.001446 loss: 4.1258 (4.1144) weight_decay: 0.0500 (0.0500) time: 0.3774 data: 0.0006 max mem: 21002 Epoch: [185] [ 30/312] eta: 0:07:39 lr: 0.001445 min_lr: 0.001445 loss: 4.1043 (4.1046) weight_decay: 0.0500 (0.0500) time: 0.2782 data: 0.0006 max mem: 21002 Epoch: [185] [ 40/312] eta: 0:06:49 lr: 0.001444 min_lr: 0.001444 loss: 3.9541 (4.0304) weight_decay: 0.0500 (0.0500) time: 0.7042 data: 0.3828 max mem: 21002 Epoch: [185] [ 50/312] eta: 0:06:26 lr: 0.001444 min_lr: 0.001444 loss: 3.8703 (4.0131) weight_decay: 0.0500 (0.0500) time: 1.2351 data: 0.8635 max mem: 21002 Epoch: [185] [ 60/312] eta: 0:05:35 lr: 0.001443 min_lr: 0.001443 loss: 3.9302 (3.9807) weight_decay: 0.0500 (0.0500) time: 0.9770 data: 0.5809 max mem: 21002 Epoch: [185] [ 70/312] eta: 0:05:24 lr: 0.001442 min_lr: 0.001442 loss: 3.9200 (3.9395) weight_decay: 0.0500 (0.0500) time: 0.9999 data: 0.6010 max mem: 21002 Epoch: [185] [ 80/312] eta: 0:04:42 lr: 0.001441 min_lr: 0.001441 loss: 3.8287 (3.9173) weight_decay: 0.0500 (0.0500) time: 0.8643 data: 0.5015 max mem: 21002 Epoch: [185] [ 90/312] eta: 0:04:31 lr: 0.001441 min_lr: 0.001441 loss: 3.9179 (3.9136) weight_decay: 0.0500 (0.0500) time: 0.8141 data: 0.4236 max mem: 21002 Epoch: [185] [100/312] eta: 0:04:17 lr: 0.001440 min_lr: 0.001440 loss: 4.0469 (3.9254) weight_decay: 0.0500 (0.0500) time: 1.2128 data: 0.7309 max mem: 21002 Epoch: [185] [110/312] eta: 0:03:53 lr: 0.001439 min_lr: 0.001439 loss: 4.0469 (3.9285) weight_decay: 0.0500 (0.0500) time: 0.8341 data: 0.4102 max mem: 21002 Epoch: [185] [120/312] eta: 0:03:44 lr: 0.001439 min_lr: 0.001439 loss: 3.9577 (3.9325) weight_decay: 0.0500 (0.0500) time: 0.9251 data: 0.5630 max mem: 21002 Epoch: [185] [130/312] eta: 0:03:31 lr: 0.001438 min_lr: 0.001438 loss: 3.9283 (3.9191) weight_decay: 0.0500 (0.0500) time: 1.1961 data: 0.7978 max mem: 21002 Epoch: [185] [140/312] eta: 0:03:13 lr: 0.001437 min_lr: 0.001437 loss: 3.7867 (3.9053) weight_decay: 0.0500 (0.0500) time: 0.8616 data: 0.4782 max mem: 21002 Epoch: [185] [150/312] eta: 0:03:05 lr: 0.001437 min_lr: 0.001437 loss: 3.7464 (3.8951) weight_decay: 0.0500 (0.0500) time: 1.0321 data: 0.5708 max mem: 21002 Epoch: [185] [160/312] eta: 0:02:45 lr: 0.001436 min_lr: 0.001436 loss: 3.9646 (3.9011) weight_decay: 0.0500 (0.0500) time: 0.8511 data: 0.4303 max mem: 21002 Epoch: [185] [170/312] eta: 0:02:37 lr: 0.001435 min_lr: 0.001435 loss: 3.9700 (3.8935) weight_decay: 0.0500 (0.0500) time: 0.8449 data: 0.4511 max mem: 21002 Epoch: [185] [180/312] eta: 0:02:25 lr: 0.001435 min_lr: 0.001435 loss: 3.7780 (3.8790) weight_decay: 0.0500 (0.0500) time: 1.2164 data: 0.7146 max mem: 21002 Epoch: [185] [190/312] eta: 0:02:10 lr: 0.001434 min_lr: 0.001434 loss: 3.7688 (3.8758) weight_decay: 0.0500 (0.0500) time: 0.7711 data: 0.3790 max mem: 21002 Epoch: [185] [200/312] eta: 0:02:01 lr: 0.001433 min_lr: 0.001433 loss: 3.6115 (3.8608) weight_decay: 0.0500 (0.0500) time: 0.8918 data: 0.5042 max mem: 21002 Epoch: [185] [210/312] eta: 0:01:50 lr: 0.001432 min_lr: 0.001432 loss: 3.8248 (3.8548) weight_decay: 0.0500 (0.0500) time: 1.1313 data: 0.6378 max mem: 21002 Epoch: [185] [220/312] eta: 0:01:37 lr: 0.001432 min_lr: 0.001432 loss: 3.9037 (3.8480) weight_decay: 0.0500 (0.0500) time: 0.8406 data: 0.4300 max mem: 21002 Epoch: [185] [230/312] eta: 0:01:28 lr: 0.001431 min_lr: 0.001431 loss: 3.8900 (3.8443) weight_decay: 0.0500 (0.0500) time: 1.0816 data: 0.6117 max mem: 21002 Epoch: [185] [240/312] eta: 0:01:15 lr: 0.001430 min_lr: 0.001430 loss: 3.9297 (3.8436) weight_decay: 0.0500 (0.0500) time: 0.8735 data: 0.4309 max mem: 21002 Epoch: [185] [250/312] eta: 0:01:05 lr: 0.001430 min_lr: 0.001430 loss: 3.7176 (3.8357) weight_decay: 0.0500 (0.0500) time: 0.8808 data: 0.4207 max mem: 21002 Epoch: [185] [260/312] eta: 0:00:55 lr: 0.001429 min_lr: 0.001429 loss: 3.6910 (3.8268) weight_decay: 0.0500 (0.0500) time: 1.3171 data: 0.6734 max mem: 21002 Epoch: [185] [270/312] eta: 0:00:43 lr: 0.001428 min_lr: 0.001428 loss: 3.6211 (3.8215) weight_decay: 0.0500 (0.0500) time: 0.8236 data: 0.3524 max mem: 21002 Epoch: [185] [280/312] eta: 0:00:33 lr: 0.001428 min_lr: 0.001428 loss: 4.0236 (3.8273) weight_decay: 0.0500 (0.0500) time: 0.9296 data: 0.3895 max mem: 21002 Epoch: [185] [290/312] eta: 0:00:23 lr: 0.001427 min_lr: 0.001427 loss: 4.0236 (3.8202) weight_decay: 0.0500 (0.0500) time: 1.1625 data: 0.3881 max mem: 21002 Epoch: [185] [300/312] eta: 0:00:12 lr: 0.001426 min_lr: 0.001426 loss: 3.4791 (3.8090) weight_decay: 0.0500 (0.0500) time: 0.8002 data: 0.2368 max mem: 21002 Epoch: [185] [310/312] eta: 0:00:02 lr: 0.001426 min_lr: 0.001426 loss: 3.6150 (3.8066) weight_decay: 0.0500 (0.0500) time: 0.6982 data: 0.2672 max mem: 21002 Epoch: [185] [311/312] eta: 0:00:01 lr: 0.001426 min_lr: 0.001426 loss: 3.6150 (3.8073) weight_decay: 0.0500 (0.0500) time: 0.6775 data: 0.2672 max mem: 21002 Epoch: [185] Total time: 0:05:22 (1.0321 s / it) Averaged stats: lr: 0.001426 min_lr: 0.001426 loss: 3.6150 (3.8315) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:03:17 loss: 1.4889 (1.4889) acc1: 70.3125 (70.3125) acc5: 90.3646 (90.3646) time: 21.9968 data: 17.9279 max mem: 21002 Test: [8/9] eta: 0:00:02 loss: 1.9074 (1.8183) acc1: 63.2812 (63.7280) acc5: 85.1562 (85.2160) time: 2.6415 data: 1.9921 max mem: 21002 Test: Total time: 0:00:23 (2.6533 s / it) * Acc@1 63.080 Acc@5 85.666 loss 1.814 Accuracy of the model on the 50000 test images: 63.1% Max accuracy: 63.08% Epoch: [186] [ 0/312] eta: 1:26:41 lr: 0.001425 min_lr: 0.001425 loss: 4.6332 (4.6332) weight_decay: 0.0500 (0.0500) time: 16.6699 data: 11.4587 max mem: 21002 Epoch: [186] [ 10/312] eta: 0:10:00 lr: 0.001425 min_lr: 0.001425 loss: 4.2259 (4.2220) weight_decay: 0.0500 (0.0500) time: 1.9887 data: 1.0421 max mem: 21002 Epoch: [186] [ 20/312] eta: 0:06:17 lr: 0.001424 min_lr: 0.001424 loss: 4.1311 (4.0367) weight_decay: 0.0500 (0.0500) time: 0.5222 data: 0.0006 max mem: 21002 Epoch: [186] [ 30/312] eta: 0:04:52 lr: 0.001423 min_lr: 0.001423 loss: 4.0918 (3.9926) weight_decay: 0.0500 (0.0500) time: 0.5145 data: 0.0007 max mem: 21002 Epoch: [186] [ 40/312] eta: 0:04:06 lr: 0.001423 min_lr: 0.001423 loss: 4.0401 (3.9831) weight_decay: 0.0500 (0.0500) time: 0.4990 data: 0.0007 max mem: 21002 Epoch: [186] [ 50/312] eta: 0:03:34 lr: 0.001422 min_lr: 0.001422 loss: 3.8079 (3.9437) weight_decay: 0.0500 (0.0500) time: 0.4822 data: 0.0009 max mem: 21002 Epoch: [186] [ 60/312] eta: 0:03:10 lr: 0.001421 min_lr: 0.001421 loss: 3.8025 (3.9034) weight_decay: 0.0500 (0.0500) time: 0.4567 data: 0.0010 max mem: 21002 Epoch: [186] [ 70/312] eta: 0:02:52 lr: 0.001421 min_lr: 0.001421 loss: 3.8025 (3.8713) weight_decay: 0.0500 (0.0500) time: 0.4410 data: 0.0011 max mem: 21002 Epoch: [186] [ 80/312] eta: 0:02:37 lr: 0.001420 min_lr: 0.001420 loss: 3.7597 (3.8502) weight_decay: 0.0500 (0.0500) time: 0.4466 data: 0.0010 max mem: 21002 Epoch: [186] [ 90/312] eta: 0:02:25 lr: 0.001419 min_lr: 0.001419 loss: 4.0322 (3.8707) weight_decay: 0.0500 (0.0500) time: 0.4486 data: 0.0008 max mem: 21002 Epoch: [186] [100/312] eta: 0:02:14 lr: 0.001419 min_lr: 0.001419 loss: 3.9802 (3.8827) weight_decay: 0.0500 (0.0500) time: 0.4435 data: 0.0016 max mem: 21002 Epoch: [186] [110/312] eta: 0:02:04 lr: 0.001418 min_lr: 0.001418 loss: 3.9384 (3.8691) weight_decay: 0.0500 (0.0500) time: 0.4428 data: 0.0028 max mem: 21002 Epoch: [186] [120/312] eta: 0:01:55 lr: 0.001417 min_lr: 0.001417 loss: 4.0920 (3.9020) weight_decay: 0.0500 (0.0500) time: 0.4441 data: 0.0020 max mem: 21002 Epoch: [186] [130/312] eta: 0:01:48 lr: 0.001416 min_lr: 0.001416 loss: 4.1042 (3.9061) weight_decay: 0.0500 (0.0500) time: 0.4662 data: 0.0239 max mem: 21002 Epoch: [186] [140/312] eta: 0:01:41 lr: 0.001416 min_lr: 0.001416 loss: 3.8015 (3.8862) weight_decay: 0.0500 (0.0500) time: 0.5135 data: 0.0946 max mem: 21002 Epoch: [186] [150/312] eta: 0:01:34 lr: 0.001415 min_lr: 0.001415 loss: 3.6452 (3.8728) weight_decay: 0.0500 (0.0500) time: 0.5211 data: 0.1418 max mem: 21002 Epoch: [186] [160/312] eta: 0:01:29 lr: 0.001414 min_lr: 0.001414 loss: 3.5607 (3.8486) weight_decay: 0.0500 (0.0500) time: 0.5732 data: 0.1631 max mem: 21002 Epoch: [186] [170/312] eta: 0:01:21 lr: 0.001414 min_lr: 0.001414 loss: 3.7257 (3.8534) weight_decay: 0.0500 (0.0500) time: 0.5125 data: 0.1352 max mem: 21002 Epoch: [186] [180/312] eta: 0:01:16 lr: 0.001413 min_lr: 0.001413 loss: 3.9088 (3.8550) weight_decay: 0.0500 (0.0500) time: 0.5313 data: 0.1064 max mem: 21002 Epoch: [186] [190/312] eta: 0:01:10 lr: 0.001412 min_lr: 0.001412 loss: 3.8104 (3.8486) weight_decay: 0.0500 (0.0500) time: 0.6336 data: 0.0822 max mem: 21002 Epoch: [186] [200/312] eta: 0:01:04 lr: 0.001412 min_lr: 0.001412 loss: 3.9454 (3.8691) weight_decay: 0.0500 (0.0500) time: 0.4885 data: 0.0713 max mem: 21002 Epoch: [186] [210/312] eta: 0:00:59 lr: 0.001411 min_lr: 0.001411 loss: 3.9158 (3.8575) weight_decay: 0.0500 (0.0500) time: 0.5529 data: 0.1342 max mem: 21002 Epoch: [186] [220/312] eta: 0:00:52 lr: 0.001410 min_lr: 0.001410 loss: 3.8995 (3.8535) weight_decay: 0.0500 (0.0500) time: 0.5961 data: 0.0960 max mem: 21002 Epoch: [186] [230/312] eta: 0:00:47 lr: 0.001410 min_lr: 0.001410 loss: 4.0158 (3.8465) weight_decay: 0.0500 (0.0500) time: 0.5277 data: 0.0810 max mem: 21002 Epoch: [186] [240/312] eta: 0:00:41 lr: 0.001409 min_lr: 0.001409 loss: 4.0709 (3.8547) weight_decay: 0.0500 (0.0500) time: 0.6248 data: 0.1249 max mem: 21002 Epoch: [186] [250/312] eta: 0:00:35 lr: 0.001408 min_lr: 0.001408 loss: 4.1024 (3.8606) weight_decay: 0.0500 (0.0500) time: 0.4797 data: 0.0588 max mem: 21002 Epoch: [186] [260/312] eta: 0:00:29 lr: 0.001408 min_lr: 0.001408 loss: 4.0443 (3.8614) weight_decay: 0.0500 (0.0500) time: 0.5083 data: 0.0483 max mem: 21002 Epoch: [186] [270/312] eta: 0:00:24 lr: 0.001407 min_lr: 0.001407 loss: 4.0904 (3.8701) weight_decay: 0.0500 (0.0500) time: 0.6736 data: 0.0484 max mem: 21002 Epoch: [186] [280/312] eta: 0:00:18 lr: 0.001406 min_lr: 0.001406 loss: 4.1406 (3.8694) weight_decay: 0.0500 (0.0500) time: 0.5016 data: 0.0506 max mem: 21002 Epoch: [186] [290/312] eta: 0:00:12 lr: 0.001406 min_lr: 0.001406 loss: 3.8005 (3.8631) weight_decay: 0.0500 (0.0500) time: 0.5189 data: 0.0844 max mem: 21002 Epoch: [186] [300/312] eta: 0:00:06 lr: 0.001405 min_lr: 0.001405 loss: 3.8368 (3.8659) weight_decay: 0.0500 (0.0500) time: 0.4800 data: 0.0343 max mem: 21002 Epoch: [186] [310/312] eta: 0:00:01 lr: 0.001404 min_lr: 0.001404 loss: 3.9907 (3.8746) weight_decay: 0.0500 (0.0500) time: 0.3121 data: 0.0002 max mem: 21002 Epoch: [186] [311/312] eta: 0:00:00 lr: 0.001404 min_lr: 0.001404 loss: 4.1389 (3.8757) weight_decay: 0.0500 (0.0500) time: 0.3119 data: 0.0002 max mem: 21002 Epoch: [186] Total time: 0:02:53 (0.5575 s / it) Averaged stats: lr: 0.001404 min_lr: 0.001404 loss: 4.1389 (3.8393) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:23 loss: 1.6118 (1.6118) acc1: 69.6615 (69.6615) acc5: 89.8438 (89.8438) time: 9.2424 data: 9.1235 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 2.1096 (1.9515) acc1: 58.7240 (61.9520) acc5: 84.5052 (85.0880) time: 1.1154 data: 1.0175 max mem: 21002 Test: Total time: 0:00:10 (1.1301 s / it) * Acc@1 62.208 Acc@5 85.350 loss 1.968 Accuracy of the model on the 50000 test images: 62.2% Max accuracy: 63.08% Epoch: [187] [ 0/312] eta: 1:02:12 lr: 0.001404 min_lr: 0.001404 loss: 4.1415 (4.1415) weight_decay: 0.0500 (0.0500) time: 11.9629 data: 10.0203 max mem: 21002 Epoch: [187] [ 10/312] eta: 0:07:49 lr: 0.001403 min_lr: 0.001403 loss: 3.7426 (3.8852) weight_decay: 0.0500 (0.0500) time: 1.5546 data: 1.0413 max mem: 21002 Epoch: [187] [ 20/312] eta: 0:05:08 lr: 0.001403 min_lr: 0.001403 loss: 3.7066 (3.7140) weight_decay: 0.0500 (0.0500) time: 0.5100 data: 0.1489 max mem: 21002 Epoch: [187] [ 30/312] eta: 0:03:47 lr: 0.001402 min_lr: 0.001402 loss: 3.7508 (3.7773) weight_decay: 0.0500 (0.0500) time: 0.3979 data: 0.0775 max mem: 21002 Epoch: [187] [ 40/312] eta: 0:03:25 lr: 0.001401 min_lr: 0.001401 loss: 3.6976 (3.7060) weight_decay: 0.0500 (0.0500) time: 0.4371 data: 0.1506 max mem: 21002 Epoch: [187] [ 50/312] eta: 0:03:13 lr: 0.001401 min_lr: 0.001401 loss: 4.0243 (3.8134) weight_decay: 0.0500 (0.0500) time: 0.6318 data: 0.3473 max mem: 21002 Epoch: [187] [ 60/312] eta: 0:02:47 lr: 0.001400 min_lr: 0.001400 loss: 4.0504 (3.8140) weight_decay: 0.0500 (0.0500) time: 0.4828 data: 0.1973 max mem: 21002 Epoch: [187] [ 70/312] eta: 0:02:42 lr: 0.001399 min_lr: 0.001399 loss: 3.6309 (3.7916) weight_decay: 0.0500 (0.0500) time: 0.5045 data: 0.2183 max mem: 21002 Epoch: [187] [ 80/312] eta: 0:02:35 lr: 0.001399 min_lr: 0.001399 loss: 3.6131 (3.7790) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.3965 max mem: 21002 Epoch: [187] [ 90/312] eta: 0:02:19 lr: 0.001398 min_lr: 0.001398 loss: 3.4239 (3.7468) weight_decay: 0.0500 (0.0500) time: 0.4648 data: 0.1797 max mem: 21002 Epoch: [187] [100/312] eta: 0:02:14 lr: 0.001397 min_lr: 0.001397 loss: 3.7394 (3.7540) weight_decay: 0.0500 (0.0500) time: 0.4840 data: 0.1873 max mem: 21002 Epoch: [187] [110/312] eta: 0:02:01 lr: 0.001396 min_lr: 0.001396 loss: 3.9885 (3.7726) weight_decay: 0.0500 (0.0500) time: 0.4860 data: 0.1864 max mem: 21002 Epoch: [187] [120/312] eta: 0:01:56 lr: 0.001396 min_lr: 0.001396 loss: 4.0255 (3.7903) weight_decay: 0.0500 (0.0500) time: 0.4618 data: 0.1725 max mem: 21002 Epoch: [187] [130/312] eta: 0:01:51 lr: 0.001395 min_lr: 0.001395 loss: 4.0255 (3.7809) weight_decay: 0.0500 (0.0500) time: 0.6589 data: 0.3712 max mem: 21002 Epoch: [187] [140/312] eta: 0:01:41 lr: 0.001394 min_lr: 0.001394 loss: 4.0443 (3.8058) weight_decay: 0.0500 (0.0500) time: 0.4944 data: 0.1994 max mem: 21002 Epoch: [187] [150/312] eta: 0:01:36 lr: 0.001394 min_lr: 0.001394 loss: 4.1434 (3.8161) weight_decay: 0.0500 (0.0500) time: 0.4997 data: 0.1798 max mem: 21002 Epoch: [187] [160/312] eta: 0:01:30 lr: 0.001393 min_lr: 0.001393 loss: 4.0065 (3.8116) weight_decay: 0.0500 (0.0500) time: 0.6642 data: 0.3537 max mem: 21002 Epoch: [187] [170/312] eta: 0:01:22 lr: 0.001392 min_lr: 0.001392 loss: 3.8901 (3.8165) weight_decay: 0.0500 (0.0500) time: 0.4881 data: 0.1745 max mem: 21002 Epoch: [187] [180/312] eta: 0:01:17 lr: 0.001392 min_lr: 0.001392 loss: 3.8901 (3.8091) weight_decay: 0.0500 (0.0500) time: 0.5140 data: 0.1920 max mem: 21002 Epoch: [187] [190/312] eta: 0:01:09 lr: 0.001391 min_lr: 0.001391 loss: 3.4707 (3.7880) weight_decay: 0.0500 (0.0500) time: 0.4864 data: 0.1919 max mem: 21002 Epoch: [187] [200/312] eta: 0:01:04 lr: 0.001390 min_lr: 0.001390 loss: 3.3687 (3.7881) weight_decay: 0.0500 (0.0500) time: 0.4917 data: 0.2062 max mem: 21002 Epoch: [187] [210/312] eta: 0:00:59 lr: 0.001390 min_lr: 0.001390 loss: 3.8016 (3.7775) weight_decay: 0.0500 (0.0500) time: 0.6833 data: 0.3937 max mem: 21002 Epoch: [187] [220/312] eta: 0:00:52 lr: 0.001389 min_lr: 0.001389 loss: 3.4292 (3.7720) weight_decay: 0.0500 (0.0500) time: 0.4791 data: 0.1882 max mem: 21002 Epoch: [187] [230/312] eta: 0:00:47 lr: 0.001388 min_lr: 0.001388 loss: 3.9478 (3.7801) weight_decay: 0.0500 (0.0500) time: 0.4921 data: 0.2045 max mem: 21002 Epoch: [187] [240/312] eta: 0:00:41 lr: 0.001388 min_lr: 0.001388 loss: 3.9204 (3.7781) weight_decay: 0.0500 (0.0500) time: 0.6763 data: 0.3900 max mem: 21002 Epoch: [187] [250/312] eta: 0:00:35 lr: 0.001387 min_lr: 0.001387 loss: 3.9003 (3.7847) weight_decay: 0.0500 (0.0500) time: 0.4725 data: 0.1862 max mem: 21002 Epoch: [187] [260/312] eta: 0:00:29 lr: 0.001386 min_lr: 0.001386 loss: 3.8030 (3.7776) weight_decay: 0.0500 (0.0500) time: 0.4835 data: 0.1971 max mem: 21002 Epoch: [187] [270/312] eta: 0:00:23 lr: 0.001385 min_lr: 0.001385 loss: 3.7837 (3.7791) weight_decay: 0.0500 (0.0500) time: 0.4864 data: 0.1971 max mem: 21002 Epoch: [187] [280/312] eta: 0:00:18 lr: 0.001385 min_lr: 0.001385 loss: 4.0251 (3.7828) weight_decay: 0.0500 (0.0500) time: 0.4744 data: 0.1818 max mem: 21002 Epoch: [187] [290/312] eta: 0:00:12 lr: 0.001384 min_lr: 0.001384 loss: 4.0251 (3.7819) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.3048 max mem: 21002 Epoch: [187] [300/312] eta: 0:00:06 lr: 0.001383 min_lr: 0.001383 loss: 4.1027 (3.7945) weight_decay: 0.0500 (0.0500) time: 0.4040 data: 0.1233 max mem: 21002 Epoch: [187] [310/312] eta: 0:00:01 lr: 0.001383 min_lr: 0.001383 loss: 4.1027 (3.7997) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [187] [311/312] eta: 0:00:00 lr: 0.001383 min_lr: 0.001383 loss: 4.0232 (3.7965) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [187] Total time: 0:02:51 (0.5481 s / it) Averaged stats: lr: 0.001383 min_lr: 0.001383 loss: 4.0232 (3.8194) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:23 loss: 1.7346 (1.7346) acc1: 69.0104 (69.0104) acc5: 91.2760 (91.2760) time: 9.3126 data: 9.1906 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 2.1113 (2.0165) acc1: 62.1094 (62.6400) acc5: 85.8073 (85.5520) time: 1.1290 data: 1.0261 max mem: 21002 Test: Total time: 0:00:10 (1.1463 s / it) * Acc@1 62.462 Acc@5 85.344 loss 2.040 Accuracy of the model on the 50000 test images: 62.5% Max accuracy: 63.08% Epoch: [188] [ 0/312] eta: 1:01:05 lr: 0.001383 min_lr: 0.001383 loss: 4.2814 (4.2814) weight_decay: 0.0500 (0.0500) time: 11.7491 data: 10.9142 max mem: 21002 Epoch: [188] [ 10/312] eta: 0:07:35 lr: 0.001382 min_lr: 0.001382 loss: 3.8155 (3.6153) weight_decay: 0.0500 (0.0500) time: 1.5074 data: 0.9928 max mem: 21002 Epoch: [188] [ 20/312] eta: 0:05:09 lr: 0.001381 min_lr: 0.001381 loss: 3.7004 (3.5595) weight_decay: 0.0500 (0.0500) time: 0.5265 data: 0.1310 max mem: 21002 Epoch: [188] [ 30/312] eta: 0:03:48 lr: 0.001381 min_lr: 0.001381 loss: 3.7375 (3.6529) weight_decay: 0.0500 (0.0500) time: 0.4294 data: 0.1310 max mem: 21002 Epoch: [188] [ 40/312] eta: 0:03:28 lr: 0.001380 min_lr: 0.001380 loss: 3.9737 (3.7171) weight_decay: 0.0500 (0.0500) time: 0.4567 data: 0.1065 max mem: 21002 Epoch: [188] [ 50/312] eta: 0:03:16 lr: 0.001379 min_lr: 0.001379 loss: 3.9737 (3.7365) weight_decay: 0.0500 (0.0500) time: 0.6552 data: 0.2441 max mem: 21002 Epoch: [188] [ 60/312] eta: 0:02:49 lr: 0.001379 min_lr: 0.001379 loss: 3.9066 (3.7338) weight_decay: 0.0500 (0.0500) time: 0.4858 data: 0.1382 max mem: 21002 Epoch: [188] [ 70/312] eta: 0:02:43 lr: 0.001378 min_lr: 0.001378 loss: 3.8160 (3.7431) weight_decay: 0.0500 (0.0500) time: 0.4862 data: 0.1161 max mem: 21002 Epoch: [188] [ 80/312] eta: 0:02:34 lr: 0.001377 min_lr: 0.001377 loss: 4.0151 (3.7777) weight_decay: 0.0500 (0.0500) time: 0.6475 data: 0.2306 max mem: 21002 Epoch: [188] [ 90/312] eta: 0:02:20 lr: 0.001376 min_lr: 0.001376 loss: 4.0068 (3.7508) weight_decay: 0.0500 (0.0500) time: 0.4705 data: 0.1153 max mem: 21002 Epoch: [188] [100/312] eta: 0:02:15 lr: 0.001376 min_lr: 0.001376 loss: 3.6596 (3.7638) weight_decay: 0.0500 (0.0500) time: 0.5126 data: 0.1240 max mem: 21002 Epoch: [188] [110/312] eta: 0:02:02 lr: 0.001375 min_lr: 0.001375 loss: 4.0125 (3.7800) weight_decay: 0.0500 (0.0500) time: 0.4887 data: 0.1244 max mem: 21002 Epoch: [188] [120/312] eta: 0:01:57 lr: 0.001374 min_lr: 0.001374 loss: 4.1170 (3.7970) weight_decay: 0.0500 (0.0500) time: 0.4985 data: 0.1167 max mem: 21002 Epoch: [188] [130/312] eta: 0:01:53 lr: 0.001374 min_lr: 0.001374 loss: 4.0867 (3.8041) weight_decay: 0.0500 (0.0500) time: 0.7184 data: 0.2387 max mem: 21002 Epoch: [188] [140/312] eta: 0:01:42 lr: 0.001373 min_lr: 0.001373 loss: 3.5936 (3.7876) weight_decay: 0.0500 (0.0500) time: 0.5061 data: 0.1232 max mem: 21002 Epoch: [188] [150/312] eta: 0:01:38 lr: 0.001372 min_lr: 0.001372 loss: 3.7967 (3.7879) weight_decay: 0.0500 (0.0500) time: 0.5127 data: 0.1170 max mem: 21002 Epoch: [188] [160/312] eta: 0:01:31 lr: 0.001372 min_lr: 0.001372 loss: 3.9163 (3.7905) weight_decay: 0.0500 (0.0500) time: 0.6132 data: 0.1177 max mem: 21002 Epoch: [188] [170/312] eta: 0:01:24 lr: 0.001371 min_lr: 0.001371 loss: 3.9163 (3.7894) weight_decay: 0.0500 (0.0500) time: 0.4977 data: 0.0887 max mem: 21002 Epoch: [188] [180/312] eta: 0:01:18 lr: 0.001370 min_lr: 0.001370 loss: 4.0368 (3.7982) weight_decay: 0.0500 (0.0500) time: 0.5365 data: 0.1430 max mem: 21002 Epoch: [188] [190/312] eta: 0:01:10 lr: 0.001370 min_lr: 0.001370 loss: 4.0933 (3.8048) weight_decay: 0.0500 (0.0500) time: 0.4277 data: 0.0559 max mem: 21002 Epoch: [188] [200/312] eta: 0:01:05 lr: 0.001369 min_lr: 0.001369 loss: 3.9175 (3.8030) weight_decay: 0.0500 (0.0500) time: 0.4818 data: 0.1137 max mem: 21002 Epoch: [188] [210/312] eta: 0:01:00 lr: 0.001368 min_lr: 0.001368 loss: 3.9379 (3.8058) weight_decay: 0.0500 (0.0500) time: 0.7212 data: 0.2499 max mem: 21002 Epoch: [188] [220/312] eta: 0:00:53 lr: 0.001368 min_lr: 0.001368 loss: 3.9808 (3.8081) weight_decay: 0.0500 (0.0500) time: 0.5274 data: 0.1369 max mem: 21002 Epoch: [188] [230/312] eta: 0:00:47 lr: 0.001367 min_lr: 0.001367 loss: 3.8129 (3.7962) weight_decay: 0.0500 (0.0500) time: 0.4974 data: 0.0896 max mem: 21002 Epoch: [188] [240/312] eta: 0:00:41 lr: 0.001366 min_lr: 0.001366 loss: 4.0012 (3.8147) weight_decay: 0.0500 (0.0500) time: 0.5794 data: 0.1028 max mem: 21002 Epoch: [188] [250/312] eta: 0:00:35 lr: 0.001366 min_lr: 0.001366 loss: 4.2199 (3.8290) weight_decay: 0.0500 (0.0500) time: 0.4528 data: 0.0722 max mem: 21002 Epoch: [188] [260/312] eta: 0:00:30 lr: 0.001365 min_lr: 0.001365 loss: 3.9395 (3.8268) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.1773 max mem: 21002 Epoch: [188] [270/312] eta: 0:00:23 lr: 0.001364 min_lr: 0.001364 loss: 3.8643 (3.8356) weight_decay: 0.0500 (0.0500) time: 0.5079 data: 0.1189 max mem: 21002 Epoch: [188] [280/312] eta: 0:00:18 lr: 0.001364 min_lr: 0.001364 loss: 4.0621 (3.8380) weight_decay: 0.0500 (0.0500) time: 0.5012 data: 0.0881 max mem: 21002 Epoch: [188] [290/312] eta: 0:00:12 lr: 0.001363 min_lr: 0.001363 loss: 3.8255 (3.8293) weight_decay: 0.0500 (0.0500) time: 0.6001 data: 0.1287 max mem: 21002 Epoch: [188] [300/312] eta: 0:00:06 lr: 0.001362 min_lr: 0.001362 loss: 3.6892 (3.8251) weight_decay: 0.0500 (0.0500) time: 0.3822 data: 0.0410 max mem: 21002 Epoch: [188] [310/312] eta: 0:00:01 lr: 0.001361 min_lr: 0.001361 loss: 3.9144 (3.8286) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [188] [311/312] eta: 0:00:00 lr: 0.001361 min_lr: 0.001361 loss: 3.9144 (3.8270) weight_decay: 0.0500 (0.0500) time: 0.2769 data: 0.0001 max mem: 21002 Epoch: [188] Total time: 0:02:52 (0.5542 s / it) Averaged stats: lr: 0.001361 min_lr: 0.001361 loss: 3.9144 (3.8198) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:24 loss: 1.5904 (1.5904) acc1: 72.2656 (72.2656) acc5: 90.3646 (90.3646) time: 9.4206 data: 9.3020 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.8049 (1.8085) acc1: 62.3698 (62.0960) acc5: 85.2865 (84.6400) time: 1.1332 data: 1.0337 max mem: 21002 Test: Total time: 0:00:10 (1.1434 s / it) * Acc@1 62.020 Acc@5 84.650 loss 1.810 Accuracy of the model on the 50000 test images: 62.0% Max accuracy: 63.08% Epoch: [189] [ 0/312] eta: 1:02:35 lr: 0.001361 min_lr: 0.001361 loss: 2.6549 (2.6549) weight_decay: 0.0500 (0.0500) time: 12.0359 data: 10.3591 max mem: 21002 Epoch: [189] [ 10/312] eta: 0:08:43 lr: 0.001361 min_lr: 0.001361 loss: 3.9349 (3.7189) weight_decay: 0.0500 (0.0500) time: 1.7331 data: 1.1040 max mem: 21002 Epoch: [189] [ 20/312] eta: 0:05:11 lr: 0.001360 min_lr: 0.001360 loss: 3.9512 (3.8681) weight_decay: 0.0500 (0.0500) time: 0.5193 data: 0.0993 max mem: 21002 Epoch: [189] [ 30/312] eta: 0:03:50 lr: 0.001359 min_lr: 0.001359 loss: 4.0370 (3.8947) weight_decay: 0.0500 (0.0500) time: 0.3124 data: 0.0105 max mem: 21002 Epoch: [189] [ 40/312] eta: 0:03:16 lr: 0.001359 min_lr: 0.001359 loss: 4.0656 (3.9249) weight_decay: 0.0500 (0.0500) time: 0.3589 data: 0.0014 max mem: 21002 Epoch: [189] [ 50/312] eta: 0:03:03 lr: 0.001358 min_lr: 0.001358 loss: 3.9855 (3.9124) weight_decay: 0.0500 (0.0500) time: 0.5186 data: 0.0234 max mem: 21002 Epoch: [189] [ 60/312] eta: 0:02:41 lr: 0.001357 min_lr: 0.001357 loss: 3.8250 (3.8827) weight_decay: 0.0500 (0.0500) time: 0.4702 data: 0.0227 max mem: 21002 Epoch: [189] [ 70/312] eta: 0:02:35 lr: 0.001357 min_lr: 0.001357 loss: 3.6131 (3.8310) weight_decay: 0.0500 (0.0500) time: 0.4901 data: 0.0617 max mem: 21002 Epoch: [189] [ 80/312] eta: 0:02:26 lr: 0.001356 min_lr: 0.001356 loss: 3.6131 (3.8235) weight_decay: 0.0500 (0.0500) time: 0.6123 data: 0.1413 max mem: 21002 Epoch: [189] [ 90/312] eta: 0:02:16 lr: 0.001355 min_lr: 0.001355 loss: 4.0239 (3.8261) weight_decay: 0.0500 (0.0500) time: 0.5244 data: 0.1205 max mem: 21002 Epoch: [189] [100/312] eta: 0:02:09 lr: 0.001355 min_lr: 0.001355 loss: 3.8915 (3.8301) weight_decay: 0.0500 (0.0500) time: 0.5216 data: 0.1021 max mem: 21002 Epoch: [189] [110/312] eta: 0:01:58 lr: 0.001354 min_lr: 0.001354 loss: 3.9432 (3.8481) weight_decay: 0.0500 (0.0500) time: 0.4584 data: 0.0921 max mem: 21002 Epoch: [189] [120/312] eta: 0:01:54 lr: 0.001353 min_lr: 0.001353 loss: 4.0814 (3.8608) weight_decay: 0.0500 (0.0500) time: 0.5326 data: 0.1430 max mem: 21002 Epoch: [189] [130/312] eta: 0:01:49 lr: 0.001352 min_lr: 0.001352 loss: 4.0683 (3.8680) weight_decay: 0.0500 (0.0500) time: 0.6674 data: 0.1959 max mem: 21002 Epoch: [189] [140/312] eta: 0:01:40 lr: 0.001352 min_lr: 0.001352 loss: 4.0683 (3.8723) weight_decay: 0.0500 (0.0500) time: 0.4862 data: 0.1013 max mem: 21002 Epoch: [189] [150/312] eta: 0:01:34 lr: 0.001351 min_lr: 0.001351 loss: 4.0054 (3.8704) weight_decay: 0.0500 (0.0500) time: 0.4925 data: 0.0965 max mem: 21002 Epoch: [189] [160/312] eta: 0:01:28 lr: 0.001350 min_lr: 0.001350 loss: 3.9960 (3.8826) weight_decay: 0.0500 (0.0500) time: 0.5780 data: 0.0913 max mem: 21002 Epoch: [189] [170/312] eta: 0:01:22 lr: 0.001350 min_lr: 0.001350 loss: 3.9607 (3.8761) weight_decay: 0.0500 (0.0500) time: 0.5391 data: 0.0622 max mem: 21002 Epoch: [189] [180/312] eta: 0:01:16 lr: 0.001349 min_lr: 0.001349 loss: 3.8411 (3.8686) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0909 max mem: 21002 Epoch: [189] [190/312] eta: 0:01:09 lr: 0.001348 min_lr: 0.001348 loss: 3.8411 (3.8666) weight_decay: 0.0500 (0.0500) time: 0.5095 data: 0.0664 max mem: 21002 Epoch: [189] [200/312] eta: 0:01:04 lr: 0.001348 min_lr: 0.001348 loss: 4.0771 (3.8720) weight_decay: 0.0500 (0.0500) time: 0.5301 data: 0.0847 max mem: 21002 Epoch: [189] [210/312] eta: 0:00:58 lr: 0.001347 min_lr: 0.001347 loss: 4.1394 (3.8749) weight_decay: 0.0500 (0.0500) time: 0.6138 data: 0.0722 max mem: 21002 Epoch: [189] [220/312] eta: 0:00:52 lr: 0.001346 min_lr: 0.001346 loss: 4.0132 (3.8781) weight_decay: 0.0500 (0.0500) time: 0.5489 data: 0.0370 max mem: 21002 Epoch: [189] [230/312] eta: 0:00:46 lr: 0.001346 min_lr: 0.001346 loss: 3.9125 (3.8618) weight_decay: 0.0500 (0.0500) time: 0.5239 data: 0.0389 max mem: 21002 Epoch: [189] [240/312] eta: 0:00:41 lr: 0.001345 min_lr: 0.001345 loss: 3.4789 (3.8444) weight_decay: 0.0500 (0.0500) time: 0.5281 data: 0.0399 max mem: 21002 Epoch: [189] [250/312] eta: 0:00:35 lr: 0.001344 min_lr: 0.001344 loss: 3.4442 (3.8321) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0798 max mem: 21002 Epoch: [189] [260/312] eta: 0:00:29 lr: 0.001344 min_lr: 0.001344 loss: 3.9300 (3.8406) weight_decay: 0.0500 (0.0500) time: 0.5681 data: 0.0697 max mem: 21002 Epoch: [189] [270/312] eta: 0:00:23 lr: 0.001343 min_lr: 0.001343 loss: 3.9939 (3.8428) weight_decay: 0.0500 (0.0500) time: 0.5069 data: 0.0613 max mem: 21002 Epoch: [189] [280/312] eta: 0:00:18 lr: 0.001342 min_lr: 0.001342 loss: 3.9149 (3.8445) weight_decay: 0.0500 (0.0500) time: 0.5411 data: 0.0927 max mem: 21002 Epoch: [189] [290/312] eta: 0:00:12 lr: 0.001342 min_lr: 0.001342 loss: 3.8324 (3.8412) weight_decay: 0.0500 (0.0500) time: 0.5265 data: 0.0625 max mem: 21002 Epoch: [189] [300/312] eta: 0:00:06 lr: 0.001341 min_lr: 0.001341 loss: 3.6636 (3.8334) weight_decay: 0.0500 (0.0500) time: 0.4333 data: 0.0305 max mem: 21002 Epoch: [189] [310/312] eta: 0:00:01 lr: 0.001340 min_lr: 0.001340 loss: 3.8734 (3.8343) weight_decay: 0.0500 (0.0500) time: 0.3250 data: 0.0184 max mem: 21002 Epoch: [189] [311/312] eta: 0:00:00 lr: 0.001340 min_lr: 0.001340 loss: 3.7811 (3.8317) weight_decay: 0.0500 (0.0500) time: 0.3249 data: 0.0184 max mem: 21002 Epoch: [189] Total time: 0:02:52 (0.5528 s / it) Averaged stats: lr: 0.001340 min_lr: 0.001340 loss: 3.7811 (3.8132) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:23 loss: 1.7435 (1.7435) acc1: 69.6615 (69.6615) acc5: 90.3646 (90.3646) time: 9.3195 data: 9.2007 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 2.1031 (1.9596) acc1: 60.4167 (61.2960) acc5: 85.8073 (84.6560) time: 1.1276 data: 1.0224 max mem: 21002 Test: Total time: 0:00:10 (1.1500 s / it) * Acc@1 61.702 Acc@5 84.888 loss 1.957 Accuracy of the model on the 50000 test images: 61.7% Max accuracy: 63.08% Epoch: [190] [ 0/312] eta: 1:07:47 lr: 0.001340 min_lr: 0.001340 loss: 2.5269 (2.5269) weight_decay: 0.0500 (0.0500) time: 13.0374 data: 12.7440 max mem: 21002 Epoch: [190] [ 10/312] eta: 0:08:24 lr: 0.001339 min_lr: 0.001339 loss: 3.5821 (3.4284) weight_decay: 0.0500 (0.0500) time: 1.6712 data: 1.1603 max mem: 21002 Epoch: [190] [ 20/312] eta: 0:05:10 lr: 0.001339 min_lr: 0.001339 loss: 3.5888 (3.5853) weight_decay: 0.0500 (0.0500) time: 0.4664 data: 0.0581 max mem: 21002 Epoch: [190] [ 30/312] eta: 0:03:49 lr: 0.001338 min_lr: 0.001338 loss: 4.0021 (3.7416) weight_decay: 0.0500 (0.0500) time: 0.3445 data: 0.0576 max mem: 21002 Epoch: [190] [ 40/312] eta: 0:03:18 lr: 0.001337 min_lr: 0.001337 loss: 4.0966 (3.8253) weight_decay: 0.0500 (0.0500) time: 0.3803 data: 0.0928 max mem: 21002 Epoch: [190] [ 50/312] eta: 0:03:08 lr: 0.001337 min_lr: 0.001337 loss: 4.0169 (3.8439) weight_decay: 0.0500 (0.0500) time: 0.5662 data: 0.2819 max mem: 21002 Epoch: [190] [ 60/312] eta: 0:02:43 lr: 0.001336 min_lr: 0.001336 loss: 3.9110 (3.8311) weight_decay: 0.0500 (0.0500) time: 0.4775 data: 0.1944 max mem: 21002 Epoch: [190] [ 70/312] eta: 0:02:38 lr: 0.001335 min_lr: 0.001335 loss: 3.9848 (3.8451) weight_decay: 0.0500 (0.0500) time: 0.4954 data: 0.2104 max mem: 21002 Epoch: [190] [ 80/312] eta: 0:02:31 lr: 0.001335 min_lr: 0.001335 loss: 3.9910 (3.8423) weight_decay: 0.0500 (0.0500) time: 0.6718 data: 0.3844 max mem: 21002 Epoch: [190] [ 90/312] eta: 0:02:16 lr: 0.001334 min_lr: 0.001334 loss: 3.7193 (3.7826) weight_decay: 0.0500 (0.0500) time: 0.4727 data: 0.1794 max mem: 21002 Epoch: [190] [100/312] eta: 0:02:11 lr: 0.001333 min_lr: 0.001333 loss: 3.2921 (3.7506) weight_decay: 0.0500 (0.0500) time: 0.4951 data: 0.2007 max mem: 21002 Epoch: [190] [110/312] eta: 0:02:00 lr: 0.001333 min_lr: 0.001333 loss: 3.5748 (3.7572) weight_decay: 0.0500 (0.0500) time: 0.5031 data: 0.2007 max mem: 21002 Epoch: [190] [120/312] eta: 0:01:56 lr: 0.001332 min_lr: 0.001332 loss: 4.0577 (3.7634) weight_decay: 0.0500 (0.0500) time: 0.5184 data: 0.2182 max mem: 21002 Epoch: [190] [130/312] eta: 0:01:50 lr: 0.001331 min_lr: 0.001331 loss: 3.9018 (3.7638) weight_decay: 0.0500 (0.0500) time: 0.6779 data: 0.3915 max mem: 21002 Epoch: [190] [140/312] eta: 0:01:41 lr: 0.001331 min_lr: 0.001331 loss: 3.9675 (3.7940) weight_decay: 0.0500 (0.0500) time: 0.4882 data: 0.1745 max mem: 21002 Epoch: [190] [150/312] eta: 0:01:35 lr: 0.001330 min_lr: 0.001330 loss: 4.0010 (3.7907) weight_decay: 0.0500 (0.0500) time: 0.4925 data: 0.1791 max mem: 21002 Epoch: [190] [160/312] eta: 0:01:30 lr: 0.001329 min_lr: 0.001329 loss: 3.6965 (3.7820) weight_decay: 0.0500 (0.0500) time: 0.6481 data: 0.3479 max mem: 21002 Epoch: [190] [170/312] eta: 0:01:22 lr: 0.001329 min_lr: 0.001329 loss: 4.0714 (3.7993) weight_decay: 0.0500 (0.0500) time: 0.5006 data: 0.1699 max mem: 21002 Epoch: [190] [180/312] eta: 0:01:17 lr: 0.001328 min_lr: 0.001328 loss: 4.0680 (3.7957) weight_decay: 0.0500 (0.0500) time: 0.4995 data: 0.1837 max mem: 21002 Epoch: [190] [190/312] eta: 0:01:09 lr: 0.001327 min_lr: 0.001327 loss: 3.5809 (3.7884) weight_decay: 0.0500 (0.0500) time: 0.4893 data: 0.1838 max mem: 21002 Epoch: [190] [200/312] eta: 0:01:04 lr: 0.001327 min_lr: 0.001327 loss: 3.4890 (3.7892) weight_decay: 0.0500 (0.0500) time: 0.5053 data: 0.1889 max mem: 21002 Epoch: [190] [210/312] eta: 0:00:59 lr: 0.001326 min_lr: 0.001326 loss: 3.9044 (3.7952) weight_decay: 0.0500 (0.0500) time: 0.6502 data: 0.3543 max mem: 21002 Epoch: [190] [220/312] eta: 0:00:52 lr: 0.001325 min_lr: 0.001325 loss: 4.0180 (3.7982) weight_decay: 0.0500 (0.0500) time: 0.4824 data: 0.1665 max mem: 21002 Epoch: [190] [230/312] eta: 0:00:46 lr: 0.001325 min_lr: 0.001325 loss: 3.8735 (3.7926) weight_decay: 0.0500 (0.0500) time: 0.5055 data: 0.1918 max mem: 21002 Epoch: [190] [240/312] eta: 0:00:41 lr: 0.001324 min_lr: 0.001324 loss: 4.0118 (3.8037) weight_decay: 0.0500 (0.0500) time: 0.6732 data: 0.3771 max mem: 21002 Epoch: [190] [250/312] eta: 0:00:35 lr: 0.001323 min_lr: 0.001323 loss: 4.0118 (3.7992) weight_decay: 0.0500 (0.0500) time: 0.5193 data: 0.1864 max mem: 21002 Epoch: [190] [260/312] eta: 0:00:29 lr: 0.001322 min_lr: 0.001322 loss: 3.8818 (3.8041) weight_decay: 0.0500 (0.0500) time: 0.4939 data: 0.1711 max mem: 21002 Epoch: [190] [270/312] eta: 0:00:23 lr: 0.001322 min_lr: 0.001322 loss: 3.9297 (3.8108) weight_decay: 0.0500 (0.0500) time: 0.4945 data: 0.1712 max mem: 21002 Epoch: [190] [280/312] eta: 0:00:18 lr: 0.001321 min_lr: 0.001321 loss: 3.9180 (3.8073) weight_decay: 0.0500 (0.0500) time: 0.5256 data: 0.1923 max mem: 21002 Epoch: [190] [290/312] eta: 0:00:12 lr: 0.001320 min_lr: 0.001320 loss: 3.6733 (3.8007) weight_decay: 0.0500 (0.0500) time: 0.6234 data: 0.3233 max mem: 21002 Epoch: [190] [300/312] eta: 0:00:06 lr: 0.001320 min_lr: 0.001320 loss: 3.8414 (3.8014) weight_decay: 0.0500 (0.0500) time: 0.4160 data: 0.1317 max mem: 21002 Epoch: [190] [310/312] eta: 0:00:01 lr: 0.001319 min_lr: 0.001319 loss: 3.8414 (3.7946) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0002 max mem: 21002 Epoch: [190] [311/312] eta: 0:00:00 lr: 0.001319 min_lr: 0.001319 loss: 3.8414 (3.7932) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0002 max mem: 21002 Epoch: [190] Total time: 0:02:52 (0.5517 s / it) Averaged stats: lr: 0.001319 min_lr: 0.001319 loss: 3.8414 (3.8157) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:22 loss: 1.4860 (1.4860) acc1: 71.7448 (71.7448) acc5: 88.6719 (88.6719) time: 9.1168 data: 9.0006 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6496 (1.5995) acc1: 64.3229 (65.2320) acc5: 86.3281 (86.9920) time: 1.1010 data: 1.0001 max mem: 21002 Test: Total time: 0:00:10 (1.1242 s / it) * Acc@1 65.106 Acc@5 87.048 loss 1.602 Accuracy of the model on the 50000 test images: 65.1% Max accuracy: 65.11% Epoch: [191] [ 0/312] eta: 1:00:36 lr: 0.001319 min_lr: 0.001319 loss: 4.7072 (4.7072) weight_decay: 0.0500 (0.0500) time: 11.6570 data: 10.0700 max mem: 21002 Epoch: [191] [ 10/312] eta: 0:07:54 lr: 0.001318 min_lr: 0.001318 loss: 4.0483 (4.0673) weight_decay: 0.0500 (0.0500) time: 1.5698 data: 1.0015 max mem: 21002 Epoch: [191] [ 20/312] eta: 0:04:54 lr: 0.001318 min_lr: 0.001318 loss: 3.9375 (3.9358) weight_decay: 0.0500 (0.0500) time: 0.4775 data: 0.0908 max mem: 21002 Epoch: [191] [ 30/312] eta: 0:03:39 lr: 0.001317 min_lr: 0.001317 loss: 3.8413 (3.8750) weight_decay: 0.0500 (0.0500) time: 0.3432 data: 0.0482 max mem: 21002 Epoch: [191] [ 40/312] eta: 0:03:18 lr: 0.001316 min_lr: 0.001316 loss: 3.8413 (3.8509) weight_decay: 0.0500 (0.0500) time: 0.4325 data: 0.0982 max mem: 21002 Epoch: [191] [ 50/312] eta: 0:03:03 lr: 0.001316 min_lr: 0.001316 loss: 3.6076 (3.7595) weight_decay: 0.0500 (0.0500) time: 0.5759 data: 0.2091 max mem: 21002 Epoch: [191] [ 60/312] eta: 0:02:44 lr: 0.001315 min_lr: 0.001315 loss: 3.4332 (3.7363) weight_decay: 0.0500 (0.0500) time: 0.4967 data: 0.1831 max mem: 21002 Epoch: [191] [ 70/312] eta: 0:02:37 lr: 0.001314 min_lr: 0.001314 loss: 3.8348 (3.7418) weight_decay: 0.0500 (0.0500) time: 0.5255 data: 0.2376 max mem: 21002 Epoch: [191] [ 80/312] eta: 0:02:33 lr: 0.001314 min_lr: 0.001314 loss: 3.7276 (3.7136) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.3923 max mem: 21002 Epoch: [191] [ 90/312] eta: 0:02:17 lr: 0.001313 min_lr: 0.001313 loss: 3.8742 (3.7269) weight_decay: 0.0500 (0.0500) time: 0.5079 data: 0.2229 max mem: 21002 Epoch: [191] [100/312] eta: 0:02:13 lr: 0.001312 min_lr: 0.001312 loss: 3.8884 (3.7075) weight_decay: 0.0500 (0.0500) time: 0.5014 data: 0.2177 max mem: 21002 Epoch: [191] [110/312] eta: 0:02:00 lr: 0.001312 min_lr: 0.001312 loss: 3.7323 (3.7046) weight_decay: 0.0500 (0.0500) time: 0.5019 data: 0.2173 max mem: 21002 Epoch: [191] [120/312] eta: 0:01:55 lr: 0.001311 min_lr: 0.001311 loss: 3.7549 (3.7036) weight_decay: 0.0500 (0.0500) time: 0.4758 data: 0.1909 max mem: 21002 Epoch: [191] [130/312] eta: 0:01:49 lr: 0.001310 min_lr: 0.001310 loss: 3.8552 (3.7072) weight_decay: 0.0500 (0.0500) time: 0.6235 data: 0.3364 max mem: 21002 Epoch: [191] [140/312] eta: 0:01:39 lr: 0.001310 min_lr: 0.001310 loss: 3.9293 (3.7321) weight_decay: 0.0500 (0.0500) time: 0.4342 data: 0.1462 max mem: 21002 Epoch: [191] [150/312] eta: 0:01:35 lr: 0.001309 min_lr: 0.001309 loss: 3.9601 (3.7391) weight_decay: 0.0500 (0.0500) time: 0.5164 data: 0.2294 max mem: 21002 Epoch: [191] [160/312] eta: 0:01:29 lr: 0.001308 min_lr: 0.001308 loss: 4.0674 (3.7556) weight_decay: 0.0500 (0.0500) time: 0.6724 data: 0.3867 max mem: 21002 Epoch: [191] [170/312] eta: 0:01:21 lr: 0.001307 min_lr: 0.001307 loss: 4.0663 (3.7615) weight_decay: 0.0500 (0.0500) time: 0.4686 data: 0.1872 max mem: 21002 Epoch: [191] [180/312] eta: 0:01:16 lr: 0.001307 min_lr: 0.001307 loss: 3.8075 (3.7598) weight_decay: 0.0500 (0.0500) time: 0.5090 data: 0.2140 max mem: 21002 Epoch: [191] [190/312] eta: 0:01:09 lr: 0.001306 min_lr: 0.001306 loss: 3.5206 (3.7470) weight_decay: 0.0500 (0.0500) time: 0.5170 data: 0.2012 max mem: 21002 Epoch: [191] [200/312] eta: 0:01:04 lr: 0.001305 min_lr: 0.001305 loss: 3.8651 (3.7627) weight_decay: 0.0500 (0.0500) time: 0.5010 data: 0.1644 max mem: 21002 Epoch: [191] [210/312] eta: 0:00:58 lr: 0.001305 min_lr: 0.001305 loss: 3.9345 (3.7572) weight_decay: 0.0500 (0.0500) time: 0.6574 data: 0.2618 max mem: 21002 Epoch: [191] [220/312] eta: 0:00:52 lr: 0.001304 min_lr: 0.001304 loss: 3.9345 (3.7733) weight_decay: 0.0500 (0.0500) time: 0.5625 data: 0.1380 max mem: 21002 Epoch: [191] [230/312] eta: 0:00:46 lr: 0.001303 min_lr: 0.001303 loss: 4.0720 (3.7712) weight_decay: 0.0500 (0.0500) time: 0.5125 data: 0.1288 max mem: 21002 Epoch: [191] [240/312] eta: 0:00:41 lr: 0.001303 min_lr: 0.001303 loss: 3.7359 (3.7723) weight_decay: 0.0500 (0.0500) time: 0.5602 data: 0.1423 max mem: 21002 Epoch: [191] [250/312] eta: 0:00:35 lr: 0.001302 min_lr: 0.001302 loss: 3.8765 (3.7756) weight_decay: 0.0500 (0.0500) time: 0.5521 data: 0.1237 max mem: 21002 Epoch: [191] [260/312] eta: 0:00:29 lr: 0.001301 min_lr: 0.001301 loss: 4.0639 (3.7858) weight_decay: 0.0500 (0.0500) time: 0.5676 data: 0.2049 max mem: 21002 Epoch: [191] [270/312] eta: 0:00:23 lr: 0.001301 min_lr: 0.001301 loss: 4.1689 (3.7906) weight_decay: 0.0500 (0.0500) time: 0.4836 data: 0.1191 max mem: 21002 Epoch: [191] [280/312] eta: 0:00:18 lr: 0.001300 min_lr: 0.001300 loss: 4.0748 (3.7919) weight_decay: 0.0500 (0.0500) time: 0.4856 data: 0.1094 max mem: 21002 Epoch: [191] [290/312] eta: 0:00:12 lr: 0.001299 min_lr: 0.001299 loss: 4.0648 (3.7974) weight_decay: 0.0500 (0.0500) time: 0.5728 data: 0.1687 max mem: 21002 Epoch: [191] [300/312] eta: 0:00:06 lr: 0.001299 min_lr: 0.001299 loss: 4.0102 (3.7989) weight_decay: 0.0500 (0.0500) time: 0.4180 data: 0.0598 max mem: 21002 Epoch: [191] [310/312] eta: 0:00:01 lr: 0.001298 min_lr: 0.001298 loss: 3.9497 (3.7990) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [191] [311/312] eta: 0:00:00 lr: 0.001298 min_lr: 0.001298 loss: 3.9347 (3.7968) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [191] Total time: 0:02:51 (0.5494 s / it) Averaged stats: lr: 0.001298 min_lr: 0.001298 loss: 3.9347 (3.8026) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:24 loss: 1.3352 (1.3352) acc1: 72.3958 (72.3958) acc5: 91.0156 (91.0156) time: 9.3880 data: 9.2693 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.7483 (1.6087) acc1: 64.0625 (65.5200) acc5: 85.1562 (86.5440) time: 1.1280 data: 1.0300 max mem: 21002 Test: Total time: 0:00:10 (1.1427 s / it) * Acc@1 64.778 Acc@5 86.934 loss 1.620 Accuracy of the model on the 50000 test images: 64.8% Max accuracy: 65.11% Epoch: [192] [ 0/312] eta: 1:02:22 lr: 0.001298 min_lr: 0.001298 loss: 4.2022 (4.2022) weight_decay: 0.0500 (0.0500) time: 11.9942 data: 9.2953 max mem: 21002 Epoch: [192] [ 10/312] eta: 0:07:39 lr: 0.001297 min_lr: 0.001297 loss: 4.1816 (4.1044) weight_decay: 0.0500 (0.0500) time: 1.5203 data: 1.0126 max mem: 21002 Epoch: [192] [ 20/312] eta: 0:04:53 lr: 0.001297 min_lr: 0.001297 loss: 3.8714 (3.8251) weight_decay: 0.0500 (0.0500) time: 0.4548 data: 0.1528 max mem: 21002 Epoch: [192] [ 30/312] eta: 0:03:37 lr: 0.001296 min_lr: 0.001296 loss: 3.7059 (3.8173) weight_decay: 0.0500 (0.0500) time: 0.3611 data: 0.0610 max mem: 21002 Epoch: [192] [ 40/312] eta: 0:03:18 lr: 0.001295 min_lr: 0.001295 loss: 3.5190 (3.7165) weight_decay: 0.0500 (0.0500) time: 0.4410 data: 0.1346 max mem: 21002 Epoch: [192] [ 50/312] eta: 0:03:08 lr: 0.001295 min_lr: 0.001295 loss: 3.5190 (3.7139) weight_decay: 0.0500 (0.0500) time: 0.6353 data: 0.3046 max mem: 21002 Epoch: [192] [ 60/312] eta: 0:02:43 lr: 0.001294 min_lr: 0.001294 loss: 3.6690 (3.7088) weight_decay: 0.0500 (0.0500) time: 0.4801 data: 0.1706 max mem: 21002 Epoch: [192] [ 70/312] eta: 0:02:39 lr: 0.001293 min_lr: 0.001293 loss: 3.5038 (3.7141) weight_decay: 0.0500 (0.0500) time: 0.5001 data: 0.1918 max mem: 21002 Epoch: [192] [ 80/312] eta: 0:02:31 lr: 0.001293 min_lr: 0.001293 loss: 4.0026 (3.7471) weight_decay: 0.0500 (0.0500) time: 0.6629 data: 0.3232 max mem: 21002 Epoch: [192] [ 90/312] eta: 0:02:16 lr: 0.001292 min_lr: 0.001292 loss: 4.0470 (3.7702) weight_decay: 0.0500 (0.0500) time: 0.4606 data: 0.1323 max mem: 21002 Epoch: [192] [100/312] eta: 0:02:12 lr: 0.001291 min_lr: 0.001291 loss: 3.8334 (3.7527) weight_decay: 0.0500 (0.0500) time: 0.5207 data: 0.1233 max mem: 21002 Epoch: [192] [110/312] eta: 0:02:00 lr: 0.001291 min_lr: 0.001291 loss: 3.8334 (3.7555) weight_decay: 0.0500 (0.0500) time: 0.5122 data: 0.1281 max mem: 21002 Epoch: [192] [120/312] eta: 0:01:55 lr: 0.001290 min_lr: 0.001290 loss: 3.8790 (3.7543) weight_decay: 0.0500 (0.0500) time: 0.4897 data: 0.0917 max mem: 21002 Epoch: [192] [130/312] eta: 0:01:50 lr: 0.001289 min_lr: 0.001289 loss: 3.8670 (3.7602) weight_decay: 0.0500 (0.0500) time: 0.6732 data: 0.2126 max mem: 21002 Epoch: [192] [140/312] eta: 0:01:40 lr: 0.001289 min_lr: 0.001289 loss: 4.0390 (3.7818) weight_decay: 0.0500 (0.0500) time: 0.4718 data: 0.1267 max mem: 21002 Epoch: [192] [150/312] eta: 0:01:35 lr: 0.001288 min_lr: 0.001288 loss: 4.0907 (3.7950) weight_decay: 0.0500 (0.0500) time: 0.4871 data: 0.1318 max mem: 21002 Epoch: [192] [160/312] eta: 0:01:29 lr: 0.001287 min_lr: 0.001287 loss: 3.8245 (3.7894) weight_decay: 0.0500 (0.0500) time: 0.6159 data: 0.2234 max mem: 21002 Epoch: [192] [170/312] eta: 0:01:21 lr: 0.001286 min_lr: 0.001286 loss: 3.8013 (3.7925) weight_decay: 0.0500 (0.0500) time: 0.4442 data: 0.1212 max mem: 21002 Epoch: [192] [180/312] eta: 0:01:16 lr: 0.001286 min_lr: 0.001286 loss: 3.8364 (3.7895) weight_decay: 0.0500 (0.0500) time: 0.5165 data: 0.1766 max mem: 21002 Epoch: [192] [190/312] eta: 0:01:08 lr: 0.001285 min_lr: 0.001285 loss: 3.8167 (3.7838) weight_decay: 0.0500 (0.0500) time: 0.4862 data: 0.1476 max mem: 21002 Epoch: [192] [200/312] eta: 0:01:03 lr: 0.001284 min_lr: 0.001284 loss: 3.9653 (3.7927) weight_decay: 0.0500 (0.0500) time: 0.4545 data: 0.1464 max mem: 21002 Epoch: [192] [210/312] eta: 0:00:58 lr: 0.001284 min_lr: 0.001284 loss: 3.8082 (3.7824) weight_decay: 0.0500 (0.0500) time: 0.6675 data: 0.3588 max mem: 21002 Epoch: [192] [220/312] eta: 0:00:51 lr: 0.001283 min_lr: 0.001283 loss: 4.0406 (3.7894) weight_decay: 0.0500 (0.0500) time: 0.4975 data: 0.2130 max mem: 21002 Epoch: [192] [230/312] eta: 0:00:46 lr: 0.001282 min_lr: 0.001282 loss: 3.9705 (3.7888) weight_decay: 0.0500 (0.0500) time: 0.4793 data: 0.1940 max mem: 21002 Epoch: [192] [240/312] eta: 0:00:41 lr: 0.001282 min_lr: 0.001282 loss: 3.5772 (3.7824) weight_decay: 0.0500 (0.0500) time: 0.6792 data: 0.3916 max mem: 21002 Epoch: [192] [250/312] eta: 0:00:34 lr: 0.001281 min_lr: 0.001281 loss: 3.7431 (3.7862) weight_decay: 0.0500 (0.0500) time: 0.4847 data: 0.1994 max mem: 21002 Epoch: [192] [260/312] eta: 0:00:29 lr: 0.001280 min_lr: 0.001280 loss: 4.0431 (3.7910) weight_decay: 0.0500 (0.0500) time: 0.4788 data: 0.1942 max mem: 21002 Epoch: [192] [270/312] eta: 0:00:23 lr: 0.001280 min_lr: 0.001280 loss: 4.0550 (3.8027) weight_decay: 0.0500 (0.0500) time: 0.4797 data: 0.1929 max mem: 21002 Epoch: [192] [280/312] eta: 0:00:17 lr: 0.001279 min_lr: 0.001279 loss: 3.7773 (3.7900) weight_decay: 0.0500 (0.0500) time: 0.4583 data: 0.1716 max mem: 21002 Epoch: [192] [290/312] eta: 0:00:12 lr: 0.001278 min_lr: 0.001278 loss: 3.4477 (3.7876) weight_decay: 0.0500 (0.0500) time: 0.6132 data: 0.3288 max mem: 21002 Epoch: [192] [300/312] eta: 0:00:06 lr: 0.001278 min_lr: 0.001278 loss: 3.9121 (3.7892) weight_decay: 0.0500 (0.0500) time: 0.4381 data: 0.1577 max mem: 21002 Epoch: [192] [310/312] eta: 0:00:01 lr: 0.001277 min_lr: 0.001277 loss: 3.7944 (3.7935) weight_decay: 0.0500 (0.0500) time: 0.2816 data: 0.0050 max mem: 21002 Epoch: [192] [311/312] eta: 0:00:00 lr: 0.001277 min_lr: 0.001277 loss: 3.7944 (3.7923) weight_decay: 0.0500 (0.0500) time: 0.2816 data: 0.0050 max mem: 21002 Epoch: [192] Total time: 0:02:49 (0.5431 s / it) Averaged stats: lr: 0.001277 min_lr: 0.001277 loss: 3.7944 (3.8010) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:20 loss: 1.4336 (1.4336) acc1: 71.0938 (71.0938) acc5: 90.1042 (90.1042) time: 8.9478 data: 8.8291 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.7625 (1.6643) acc1: 63.6719 (64.8000) acc5: 86.5885 (86.7520) time: 1.0795 data: 0.9811 max mem: 21002 Test: Total time: 0:00:09 (1.0926 s / it) * Acc@1 65.116 Acc@5 86.672 loss 1.662 Accuracy of the model on the 50000 test images: 65.1% Max accuracy: 65.12% Epoch: [193] [ 0/312] eta: 0:59:48 lr: 0.001277 min_lr: 0.001277 loss: 3.2973 (3.2973) weight_decay: 0.0500 (0.0500) time: 11.5005 data: 11.2005 max mem: 21002 Epoch: [193] [ 10/312] eta: 0:07:26 lr: 0.001276 min_lr: 0.001276 loss: 3.3256 (3.5984) weight_decay: 0.0500 (0.0500) time: 1.4798 data: 1.0460 max mem: 21002 Epoch: [193] [ 20/312] eta: 0:04:58 lr: 0.001276 min_lr: 0.001276 loss: 3.8071 (3.7713) weight_decay: 0.0500 (0.0500) time: 0.4973 data: 0.1296 max mem: 21002 Epoch: [193] [ 30/312] eta: 0:03:41 lr: 0.001275 min_lr: 0.001275 loss: 4.0342 (3.8389) weight_decay: 0.0500 (0.0500) time: 0.4035 data: 0.1146 max mem: 21002 Epoch: [193] [ 40/312] eta: 0:03:23 lr: 0.001274 min_lr: 0.001274 loss: 4.0194 (3.8412) weight_decay: 0.0500 (0.0500) time: 0.4605 data: 0.1690 max mem: 21002 Epoch: [193] [ 50/312] eta: 0:03:12 lr: 0.001274 min_lr: 0.001274 loss: 3.8554 (3.8199) weight_decay: 0.0500 (0.0500) time: 0.6567 data: 0.3667 max mem: 21002 Epoch: [193] [ 60/312] eta: 0:02:46 lr: 0.001273 min_lr: 0.001273 loss: 4.0419 (3.8605) weight_decay: 0.0500 (0.0500) time: 0.4843 data: 0.1984 max mem: 21002 Epoch: [193] [ 70/312] eta: 0:02:38 lr: 0.001272 min_lr: 0.001272 loss: 4.0693 (3.8976) weight_decay: 0.0500 (0.0500) time: 0.4558 data: 0.1592 max mem: 21002 Epoch: [193] [ 80/312] eta: 0:02:32 lr: 0.001272 min_lr: 0.001272 loss: 3.9503 (3.8939) weight_decay: 0.0500 (0.0500) time: 0.6520 data: 0.3551 max mem: 21002 Epoch: [193] [ 90/312] eta: 0:02:17 lr: 0.001271 min_lr: 0.001271 loss: 3.9387 (3.8952) weight_decay: 0.0500 (0.0500) time: 0.4825 data: 0.1965 max mem: 21002 Epoch: [193] [100/312] eta: 0:02:12 lr: 0.001270 min_lr: 0.001270 loss: 3.9003 (3.8734) weight_decay: 0.0500 (0.0500) time: 0.4900 data: 0.2043 max mem: 21002 Epoch: [193] [110/312] eta: 0:02:00 lr: 0.001270 min_lr: 0.001270 loss: 3.7904 (3.8782) weight_decay: 0.0500 (0.0500) time: 0.4900 data: 0.2043 max mem: 21002 Epoch: [193] [120/312] eta: 0:01:55 lr: 0.001269 min_lr: 0.001269 loss: 3.9167 (3.8650) weight_decay: 0.0500 (0.0500) time: 0.4832 data: 0.1973 max mem: 21002 Epoch: [193] [130/312] eta: 0:01:50 lr: 0.001268 min_lr: 0.001268 loss: 3.8963 (3.8676) weight_decay: 0.0500 (0.0500) time: 0.6799 data: 0.3936 max mem: 21002 Epoch: [193] [140/312] eta: 0:01:40 lr: 0.001268 min_lr: 0.001268 loss: 3.8655 (3.8668) weight_decay: 0.0500 (0.0500) time: 0.4831 data: 0.1969 max mem: 21002 Epoch: [193] [150/312] eta: 0:01:35 lr: 0.001267 min_lr: 0.001267 loss: 4.0210 (3.8898) weight_decay: 0.0500 (0.0500) time: 0.4810 data: 0.1946 max mem: 21002 Epoch: [193] [160/312] eta: 0:01:30 lr: 0.001266 min_lr: 0.001266 loss: 4.1889 (3.8778) weight_decay: 0.0500 (0.0500) time: 0.6606 data: 0.3743 max mem: 21002 Epoch: [193] [170/312] eta: 0:01:21 lr: 0.001266 min_lr: 0.001266 loss: 4.0633 (3.8766) weight_decay: 0.0500 (0.0500) time: 0.4642 data: 0.1803 max mem: 21002 Epoch: [193] [180/312] eta: 0:01:16 lr: 0.001265 min_lr: 0.001265 loss: 4.0633 (3.8776) weight_decay: 0.0500 (0.0500) time: 0.4787 data: 0.1940 max mem: 21002 Epoch: [193] [190/312] eta: 0:01:09 lr: 0.001264 min_lr: 0.001264 loss: 4.0728 (3.8810) weight_decay: 0.0500 (0.0500) time: 0.4793 data: 0.1940 max mem: 21002 Epoch: [193] [200/312] eta: 0:01:04 lr: 0.001264 min_lr: 0.001264 loss: 4.0791 (3.8853) weight_decay: 0.0500 (0.0500) time: 0.4883 data: 0.2017 max mem: 21002 Epoch: [193] [210/312] eta: 0:00:58 lr: 0.001263 min_lr: 0.001263 loss: 4.2136 (3.9028) weight_decay: 0.0500 (0.0500) time: 0.6936 data: 0.4029 max mem: 21002 Epoch: [193] [220/312] eta: 0:00:51 lr: 0.001262 min_lr: 0.001262 loss: 4.0113 (3.9005) weight_decay: 0.0500 (0.0500) time: 0.4918 data: 0.2019 max mem: 21002 Epoch: [193] [230/312] eta: 0:00:46 lr: 0.001262 min_lr: 0.001262 loss: 3.8386 (3.9041) weight_decay: 0.0500 (0.0500) time: 0.4889 data: 0.2014 max mem: 21002 Epoch: [193] [240/312] eta: 0:00:41 lr: 0.001261 min_lr: 0.001261 loss: 4.0428 (3.8982) weight_decay: 0.0500 (0.0500) time: 0.6880 data: 0.4007 max mem: 21002 Epoch: [193] [250/312] eta: 0:00:34 lr: 0.001260 min_lr: 0.001260 loss: 3.6252 (3.8868) weight_decay: 0.0500 (0.0500) time: 0.4862 data: 0.2000 max mem: 21002 Epoch: [193] [260/312] eta: 0:00:29 lr: 0.001260 min_lr: 0.001260 loss: 3.7720 (3.8797) weight_decay: 0.0500 (0.0500) time: 0.4673 data: 0.1815 max mem: 21002 Epoch: [193] [270/312] eta: 0:00:23 lr: 0.001259 min_lr: 0.001259 loss: 3.9519 (3.8812) weight_decay: 0.0500 (0.0500) time: 0.4692 data: 0.1815 max mem: 21002 Epoch: [193] [280/312] eta: 0:00:17 lr: 0.001258 min_lr: 0.001258 loss: 4.0274 (3.8820) weight_decay: 0.0500 (0.0500) time: 0.4764 data: 0.1899 max mem: 21002 Epoch: [193] [290/312] eta: 0:00:12 lr: 0.001258 min_lr: 0.001258 loss: 3.6068 (3.8691) weight_decay: 0.0500 (0.0500) time: 0.6127 data: 0.3274 max mem: 21002 Epoch: [193] [300/312] eta: 0:00:06 lr: 0.001257 min_lr: 0.001257 loss: 3.4837 (3.8609) weight_decay: 0.0500 (0.0500) time: 0.4197 data: 0.1378 max mem: 21002 Epoch: [193] [310/312] eta: 0:00:01 lr: 0.001256 min_lr: 0.001256 loss: 3.8924 (3.8610) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [193] [311/312] eta: 0:00:00 lr: 0.001256 min_lr: 0.001256 loss: 3.8924 (3.8613) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [193] Total time: 0:02:50 (0.5451 s / it) Averaged stats: lr: 0.001256 min_lr: 0.001256 loss: 3.8924 (3.7965) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.5504 (1.5504) acc1: 71.3542 (71.3542) acc5: 91.4062 (91.4062) time: 8.6917 data: 8.5735 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.9494 (1.8396) acc1: 63.8021 (64.5600) acc5: 85.9375 (86.4800) time: 1.0509 data: 0.9527 max mem: 21002 Test: Total time: 0:00:09 (1.0613 s / it) * Acc@1 64.366 Acc@5 86.394 loss 1.843 Accuracy of the model on the 50000 test images: 64.4% Max accuracy: 65.12% Epoch: [194] [ 0/312] eta: 1:03:19 lr: 0.001256 min_lr: 0.001256 loss: 3.7694 (3.7694) weight_decay: 0.0500 (0.0500) time: 12.1780 data: 10.9782 max mem: 21002 Epoch: [194] [ 10/312] eta: 0:08:30 lr: 0.001255 min_lr: 0.001255 loss: 4.1025 (3.8242) weight_decay: 0.0500 (0.0500) time: 1.6905 data: 1.0120 max mem: 21002 Epoch: [194] [ 20/312] eta: 0:05:10 lr: 0.001255 min_lr: 0.001255 loss: 3.9985 (3.8168) weight_decay: 0.0500 (0.0500) time: 0.5062 data: 0.0122 max mem: 21002 Epoch: [194] [ 30/312] eta: 0:03:49 lr: 0.001254 min_lr: 0.001254 loss: 3.7735 (3.7881) weight_decay: 0.0500 (0.0500) time: 0.3302 data: 0.0050 max mem: 21002 Epoch: [194] [ 40/312] eta: 0:03:07 lr: 0.001253 min_lr: 0.001253 loss: 3.7229 (3.7429) weight_decay: 0.0500 (0.0500) time: 0.2999 data: 0.0011 max mem: 21002 Epoch: [194] [ 50/312] eta: 0:02:58 lr: 0.001253 min_lr: 0.001253 loss: 3.3267 (3.6615) weight_decay: 0.0500 (0.0500) time: 0.4813 data: 0.1111 max mem: 21002 Epoch: [194] [ 60/312] eta: 0:02:38 lr: 0.001252 min_lr: 0.001252 loss: 3.3594 (3.6457) weight_decay: 0.0500 (0.0500) time: 0.5047 data: 0.1425 max mem: 21002 Epoch: [194] [ 70/312] eta: 0:02:31 lr: 0.001251 min_lr: 0.001251 loss: 3.5588 (3.6451) weight_decay: 0.0500 (0.0500) time: 0.4833 data: 0.1370 max mem: 21002 Epoch: [194] [ 80/312] eta: 0:02:25 lr: 0.001251 min_lr: 0.001251 loss: 3.5710 (3.6321) weight_decay: 0.0500 (0.0500) time: 0.6220 data: 0.2369 max mem: 21002 Epoch: [194] [ 90/312] eta: 0:02:11 lr: 0.001250 min_lr: 0.001250 loss: 3.9135 (3.6809) weight_decay: 0.0500 (0.0500) time: 0.4762 data: 0.1322 max mem: 21002 Epoch: [194] [100/312] eta: 0:02:07 lr: 0.001249 min_lr: 0.001249 loss: 3.9703 (3.6697) weight_decay: 0.0500 (0.0500) time: 0.5033 data: 0.1279 max mem: 21002 Epoch: [194] [110/312] eta: 0:01:56 lr: 0.001249 min_lr: 0.001249 loss: 3.8182 (3.6908) weight_decay: 0.0500 (0.0500) time: 0.4877 data: 0.1279 max mem: 21002 Epoch: [194] [120/312] eta: 0:01:51 lr: 0.001248 min_lr: 0.001248 loss: 3.9460 (3.7062) weight_decay: 0.0500 (0.0500) time: 0.4642 data: 0.1159 max mem: 21002 Epoch: [194] [130/312] eta: 0:01:47 lr: 0.001247 min_lr: 0.001247 loss: 4.0404 (3.7355) weight_decay: 0.0500 (0.0500) time: 0.6663 data: 0.2362 max mem: 21002 Epoch: [194] [140/312] eta: 0:01:37 lr: 0.001247 min_lr: 0.001247 loss: 3.9221 (3.7247) weight_decay: 0.0500 (0.0500) time: 0.5040 data: 0.1210 max mem: 21002 Epoch: [194] [150/312] eta: 0:01:33 lr: 0.001246 min_lr: 0.001246 loss: 3.7126 (3.7272) weight_decay: 0.0500 (0.0500) time: 0.5000 data: 0.0972 max mem: 21002 Epoch: [194] [160/312] eta: 0:01:27 lr: 0.001245 min_lr: 0.001245 loss: 3.8106 (3.7320) weight_decay: 0.0500 (0.0500) time: 0.6252 data: 0.1867 max mem: 21002 Epoch: [194] [170/312] eta: 0:01:20 lr: 0.001245 min_lr: 0.001245 loss: 3.9555 (3.7453) weight_decay: 0.0500 (0.0500) time: 0.4749 data: 0.0903 max mem: 21002 Epoch: [194] [180/312] eta: 0:01:15 lr: 0.001244 min_lr: 0.001244 loss: 3.9300 (3.7359) weight_decay: 0.0500 (0.0500) time: 0.5309 data: 0.0975 max mem: 21002 Epoch: [194] [190/312] eta: 0:01:07 lr: 0.001243 min_lr: 0.001243 loss: 3.8922 (3.7429) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.0976 max mem: 21002 Epoch: [194] [200/312] eta: 0:01:02 lr: 0.001243 min_lr: 0.001243 loss: 3.8922 (3.7429) weight_decay: 0.0500 (0.0500) time: 0.4778 data: 0.1055 max mem: 21002 Epoch: [194] [210/312] eta: 0:00:57 lr: 0.001242 min_lr: 0.001242 loss: 3.8491 (3.7553) weight_decay: 0.0500 (0.0500) time: 0.6765 data: 0.2335 max mem: 21002 Epoch: [194] [220/312] eta: 0:00:51 lr: 0.001241 min_lr: 0.001241 loss: 3.8173 (3.7484) weight_decay: 0.0500 (0.0500) time: 0.4869 data: 0.1291 max mem: 21002 Epoch: [194] [230/312] eta: 0:00:45 lr: 0.001241 min_lr: 0.001241 loss: 4.0237 (3.7624) weight_decay: 0.0500 (0.0500) time: 0.4812 data: 0.1392 max mem: 21002 Epoch: [194] [240/312] eta: 0:00:40 lr: 0.001240 min_lr: 0.001240 loss: 4.0569 (3.7583) weight_decay: 0.0500 (0.0500) time: 0.6259 data: 0.2769 max mem: 21002 Epoch: [194] [250/312] eta: 0:00:34 lr: 0.001239 min_lr: 0.001239 loss: 3.9282 (3.7610) weight_decay: 0.0500 (0.0500) time: 0.4901 data: 0.1388 max mem: 21002 Epoch: [194] [260/312] eta: 0:00:29 lr: 0.001239 min_lr: 0.001239 loss: 3.8069 (3.7547) weight_decay: 0.0500 (0.0500) time: 0.5973 data: 0.1319 max mem: 21002 Epoch: [194] [270/312] eta: 0:00:23 lr: 0.001238 min_lr: 0.001238 loss: 3.7739 (3.7537) weight_decay: 0.0500 (0.0500) time: 0.5402 data: 0.1318 max mem: 21002 Epoch: [194] [280/312] eta: 0:00:17 lr: 0.001237 min_lr: 0.001237 loss: 3.7413 (3.7454) weight_decay: 0.0500 (0.0500) time: 0.4728 data: 0.0692 max mem: 21002 Epoch: [194] [290/312] eta: 0:00:12 lr: 0.001237 min_lr: 0.001237 loss: 3.8781 (3.7534) weight_decay: 0.0500 (0.0500) time: 0.6286 data: 0.1429 max mem: 21002 Epoch: [194] [300/312] eta: 0:00:06 lr: 0.001236 min_lr: 0.001236 loss: 4.0101 (3.7496) weight_decay: 0.0500 (0.0500) time: 0.4400 data: 0.0741 max mem: 21002 Epoch: [194] [310/312] eta: 0:00:01 lr: 0.001235 min_lr: 0.001235 loss: 3.7303 (3.7542) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [194] [311/312] eta: 0:00:00 lr: 0.001235 min_lr: 0.001235 loss: 3.7303 (3.7554) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [194] Total time: 0:02:49 (0.5435 s / it) Averaged stats: lr: 0.001235 min_lr: 0.001235 loss: 3.7303 (3.8011) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:23 loss: 1.4876 (1.4876) acc1: 72.7865 (72.7865) acc5: 90.7552 (90.7552) time: 9.2467 data: 9.1281 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.9474 (1.8391) acc1: 63.9323 (65.1840) acc5: 86.4583 (86.3680) time: 1.1130 data: 1.0143 max mem: 21002 Test: Total time: 0:00:10 (1.1270 s / it) * Acc@1 64.890 Acc@5 86.890 loss 1.830 Accuracy of the model on the 50000 test images: 64.9% Max accuracy: 65.12% Epoch: [195] [ 0/312] eta: 0:54:07 lr: 0.001235 min_lr: 0.001235 loss: 3.8809 (3.8809) weight_decay: 0.0500 (0.0500) time: 10.4095 data: 9.3041 max mem: 21002 Epoch: [195] [ 10/312] eta: 0:07:23 lr: 0.001235 min_lr: 0.001235 loss: 3.7823 (3.7627) weight_decay: 0.0500 (0.0500) time: 1.4689 data: 1.0904 max mem: 21002 Epoch: [195] [ 20/312] eta: 0:05:17 lr: 0.001234 min_lr: 0.001234 loss: 3.6219 (3.5999) weight_decay: 0.0500 (0.0500) time: 0.6202 data: 0.2332 max mem: 21002 Epoch: [195] [ 30/312] eta: 0:03:53 lr: 0.001233 min_lr: 0.001233 loss: 3.6219 (3.6464) weight_decay: 0.0500 (0.0500) time: 0.4783 data: 0.0990 max mem: 21002 Epoch: [195] [ 40/312] eta: 0:03:39 lr: 0.001233 min_lr: 0.001233 loss: 3.9045 (3.6647) weight_decay: 0.0500 (0.0500) time: 0.5147 data: 0.1004 max mem: 21002 Epoch: [195] [ 50/312] eta: 0:03:27 lr: 0.001232 min_lr: 0.001232 loss: 3.9314 (3.7028) weight_decay: 0.0500 (0.0500) time: 0.7324 data: 0.1984 max mem: 21002 Epoch: [195] [ 60/312] eta: 0:02:58 lr: 0.001231 min_lr: 0.001231 loss: 3.9314 (3.7244) weight_decay: 0.0500 (0.0500) time: 0.5077 data: 0.0988 max mem: 21002 Epoch: [195] [ 70/312] eta: 0:02:43 lr: 0.001231 min_lr: 0.001231 loss: 3.7779 (3.6997) weight_decay: 0.0500 (0.0500) time: 0.3796 data: 0.0007 max mem: 21002 Epoch: [195] [ 80/312] eta: 0:02:29 lr: 0.001230 min_lr: 0.001230 loss: 3.9179 (3.7283) weight_decay: 0.0500 (0.0500) time: 0.4422 data: 0.0008 max mem: 21002 Epoch: [195] [ 90/312] eta: 0:02:20 lr: 0.001229 min_lr: 0.001229 loss: 4.0858 (3.7401) weight_decay: 0.0500 (0.0500) time: 0.4916 data: 0.0590 max mem: 21002 Epoch: [195] [100/312] eta: 0:02:18 lr: 0.001229 min_lr: 0.001229 loss: 4.0333 (3.7605) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.1225 max mem: 21002 Epoch: [195] [110/312] eta: 0:02:05 lr: 0.001228 min_lr: 0.001228 loss: 3.9890 (3.7744) weight_decay: 0.0500 (0.0500) time: 0.5464 data: 0.0648 max mem: 21002 Epoch: [195] [120/312] eta: 0:01:59 lr: 0.001227 min_lr: 0.001227 loss: 4.1793 (3.8174) weight_decay: 0.0500 (0.0500) time: 0.4862 data: 0.0322 max mem: 21002 Epoch: [195] [130/312] eta: 0:01:54 lr: 0.001227 min_lr: 0.001227 loss: 4.1793 (3.8214) weight_decay: 0.0500 (0.0500) time: 0.6998 data: 0.0461 max mem: 21002 Epoch: [195] [140/312] eta: 0:01:44 lr: 0.001226 min_lr: 0.001226 loss: 3.6900 (3.8240) weight_decay: 0.0500 (0.0500) time: 0.5060 data: 0.0149 max mem: 21002 Epoch: [195] [150/312] eta: 0:01:39 lr: 0.001225 min_lr: 0.001225 loss: 3.8238 (3.8156) weight_decay: 0.0500 (0.0500) time: 0.4782 data: 0.0542 max mem: 21002 Epoch: [195] [160/312] eta: 0:01:29 lr: 0.001225 min_lr: 0.001225 loss: 3.5737 (3.8089) weight_decay: 0.0500 (0.0500) time: 0.4815 data: 0.0541 max mem: 21002 Epoch: [195] [170/312] eta: 0:01:24 lr: 0.001224 min_lr: 0.001224 loss: 3.8740 (3.8135) weight_decay: 0.0500 (0.0500) time: 0.4856 data: 0.0342 max mem: 21002 Epoch: [195] [180/312] eta: 0:01:20 lr: 0.001223 min_lr: 0.001223 loss: 3.8740 (3.8129) weight_decay: 0.0500 (0.0500) time: 0.7369 data: 0.0395 max mem: 21002 Epoch: [195] [190/312] eta: 0:01:12 lr: 0.001223 min_lr: 0.001223 loss: 4.1172 (3.8302) weight_decay: 0.0500 (0.0500) time: 0.5439 data: 0.0059 max mem: 21002 Epoch: [195] [200/312] eta: 0:01:05 lr: 0.001222 min_lr: 0.001222 loss: 4.1522 (3.8370) weight_decay: 0.0500 (0.0500) time: 0.4122 data: 0.0200 max mem: 21002 Epoch: [195] [210/312] eta: 0:01:00 lr: 0.001221 min_lr: 0.001221 loss: 3.9931 (3.8190) weight_decay: 0.0500 (0.0500) time: 0.6027 data: 0.0201 max mem: 21002 Epoch: [195] [220/312] eta: 0:00:53 lr: 0.001221 min_lr: 0.001221 loss: 3.4733 (3.8042) weight_decay: 0.0500 (0.0500) time: 0.4793 data: 0.0007 max mem: 21002 Epoch: [195] [230/312] eta: 0:00:47 lr: 0.001220 min_lr: 0.001220 loss: 3.8192 (3.8072) weight_decay: 0.0500 (0.0500) time: 0.4874 data: 0.0009 max mem: 21002 Epoch: [195] [240/312] eta: 0:00:41 lr: 0.001219 min_lr: 0.001219 loss: 3.9203 (3.8064) weight_decay: 0.0500 (0.0500) time: 0.4911 data: 0.0010 max mem: 21002 Epoch: [195] [250/312] eta: 0:00:35 lr: 0.001219 min_lr: 0.001219 loss: 3.8645 (3.8058) weight_decay: 0.0500 (0.0500) time: 0.5311 data: 0.0094 max mem: 21002 Epoch: [195] [260/312] eta: 0:00:30 lr: 0.001218 min_lr: 0.001218 loss: 3.9867 (3.8103) weight_decay: 0.0500 (0.0500) time: 0.7679 data: 0.0179 max mem: 21002 Epoch: [195] [270/312] eta: 0:00:24 lr: 0.001217 min_lr: 0.001217 loss: 4.0727 (3.8155) weight_decay: 0.0500 (0.0500) time: 0.5285 data: 0.0092 max mem: 21002 Epoch: [195] [280/312] eta: 0:00:18 lr: 0.001217 min_lr: 0.001217 loss: 3.6840 (3.8060) weight_decay: 0.0500 (0.0500) time: 0.4144 data: 0.0093 max mem: 21002 Epoch: [195] [290/312] eta: 0:00:12 lr: 0.001216 min_lr: 0.001216 loss: 3.8917 (3.8121) weight_decay: 0.0500 (0.0500) time: 0.5284 data: 0.0091 max mem: 21002 Epoch: [195] [300/312] eta: 0:00:06 lr: 0.001215 min_lr: 0.001215 loss: 4.1307 (3.8102) weight_decay: 0.0500 (0.0500) time: 0.3948 data: 0.0002 max mem: 21002 Epoch: [195] [310/312] eta: 0:00:01 lr: 0.001215 min_lr: 0.001215 loss: 3.8568 (3.8054) weight_decay: 0.0500 (0.0500) time: 0.2769 data: 0.0001 max mem: 21002 Epoch: [195] [311/312] eta: 0:00:00 lr: 0.001215 min_lr: 0.001215 loss: 3.8568 (3.8047) weight_decay: 0.0500 (0.0500) time: 0.2768 data: 0.0001 max mem: 21002 Epoch: [195] Total time: 0:02:53 (0.5557 s / it) Averaged stats: lr: 0.001215 min_lr: 0.001215 loss: 3.8568 (3.7983) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:21 loss: 1.4826 (1.4826) acc1: 72.3958 (72.3958) acc5: 91.1458 (91.1458) time: 9.0695 data: 8.9519 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.9016 (1.7686) acc1: 62.2396 (63.5840) acc5: 85.4167 (86.2720) time: 1.0965 data: 0.9947 max mem: 21002 Test: Total time: 0:00:10 (1.1395 s / it) * Acc@1 63.670 Acc@5 86.188 loss 1.768 Accuracy of the model on the 50000 test images: 63.7% Max accuracy: 65.12% Epoch: [196] [ 0/312] eta: 0:58:29 lr: 0.001215 min_lr: 0.001215 loss: 4.1501 (4.1501) weight_decay: 0.0500 (0.0500) time: 11.2498 data: 8.8349 max mem: 21002 Epoch: [196] [ 10/312] eta: 0:07:51 lr: 0.001214 min_lr: 0.001214 loss: 3.4934 (3.7382) weight_decay: 0.0500 (0.0500) time: 1.5606 data: 1.0296 max mem: 21002 Epoch: [196] [ 20/312] eta: 0:05:31 lr: 0.001213 min_lr: 0.001213 loss: 3.6328 (3.7424) weight_decay: 0.0500 (0.0500) time: 0.6282 data: 0.1822 max mem: 21002 Epoch: [196] [ 30/312] eta: 0:04:03 lr: 0.001213 min_lr: 0.001213 loss: 4.0224 (3.8078) weight_decay: 0.0500 (0.0500) time: 0.4810 data: 0.0581 max mem: 21002 Epoch: [196] [ 40/312] eta: 0:03:30 lr: 0.001212 min_lr: 0.001212 loss: 3.6152 (3.7244) weight_decay: 0.0500 (0.0500) time: 0.3938 data: 0.0008 max mem: 21002 Epoch: [196] [ 50/312] eta: 0:03:20 lr: 0.001211 min_lr: 0.001211 loss: 3.6538 (3.7276) weight_decay: 0.0500 (0.0500) time: 0.6100 data: 0.0360 max mem: 21002 Epoch: [196] [ 60/312] eta: 0:02:54 lr: 0.001211 min_lr: 0.001211 loss: 3.7025 (3.7579) weight_decay: 0.0500 (0.0500) time: 0.5244 data: 0.0502 max mem: 21002 Epoch: [196] [ 70/312] eta: 0:02:46 lr: 0.001210 min_lr: 0.001210 loss: 3.8564 (3.7520) weight_decay: 0.0500 (0.0500) time: 0.4886 data: 0.0617 max mem: 21002 Epoch: [196] [ 80/312] eta: 0:02:37 lr: 0.001209 min_lr: 0.001209 loss: 3.8679 (3.7694) weight_decay: 0.0500 (0.0500) time: 0.6363 data: 0.0483 max mem: 21002 Epoch: [196] [ 90/312] eta: 0:02:21 lr: 0.001209 min_lr: 0.001209 loss: 3.7587 (3.7528) weight_decay: 0.0500 (0.0500) time: 0.4642 data: 0.0163 max mem: 21002 Epoch: [196] [100/312] eta: 0:02:17 lr: 0.001208 min_lr: 0.001208 loss: 3.7628 (3.7615) weight_decay: 0.0500 (0.0500) time: 0.5185 data: 0.0661 max mem: 21002 Epoch: [196] [110/312] eta: 0:02:04 lr: 0.001207 min_lr: 0.001207 loss: 3.6681 (3.7444) weight_decay: 0.0500 (0.0500) time: 0.5079 data: 0.0514 max mem: 21002 Epoch: [196] [120/312] eta: 0:01:59 lr: 0.001207 min_lr: 0.001207 loss: 3.6072 (3.7469) weight_decay: 0.0500 (0.0500) time: 0.4916 data: 0.0378 max mem: 21002 Epoch: [196] [130/312] eta: 0:01:52 lr: 0.001206 min_lr: 0.001206 loss: 4.0878 (3.7720) weight_decay: 0.0500 (0.0500) time: 0.6499 data: 0.0539 max mem: 21002 Epoch: [196] [140/312] eta: 0:01:42 lr: 0.001205 min_lr: 0.001205 loss: 4.1070 (3.7786) weight_decay: 0.0500 (0.0500) time: 0.4632 data: 0.0323 max mem: 21002 Epoch: [196] [150/312] eta: 0:01:37 lr: 0.001205 min_lr: 0.001205 loss: 3.9826 (3.7911) weight_decay: 0.0500 (0.0500) time: 0.4998 data: 0.0491 max mem: 21002 Epoch: [196] [160/312] eta: 0:01:32 lr: 0.001204 min_lr: 0.001204 loss: 3.8819 (3.7954) weight_decay: 0.0500 (0.0500) time: 0.6781 data: 0.0529 max mem: 21002 Epoch: [196] [170/312] eta: 0:01:24 lr: 0.001203 min_lr: 0.001203 loss: 3.8267 (3.7850) weight_decay: 0.0500 (0.0500) time: 0.5083 data: 0.0491 max mem: 21002 Epoch: [196] [180/312] eta: 0:01:18 lr: 0.001203 min_lr: 0.001203 loss: 3.9672 (3.8017) weight_decay: 0.0500 (0.0500) time: 0.5036 data: 0.0340 max mem: 21002 Epoch: [196] [190/312] eta: 0:01:10 lr: 0.001202 min_lr: 0.001202 loss: 3.9672 (3.8038) weight_decay: 0.0500 (0.0500) time: 0.4763 data: 0.0051 max mem: 21002 Epoch: [196] [200/312] eta: 0:01:06 lr: 0.001201 min_lr: 0.001201 loss: 3.9278 (3.8157) weight_decay: 0.0500 (0.0500) time: 0.5411 data: 0.0483 max mem: 21002 Epoch: [196] [210/312] eta: 0:01:00 lr: 0.001201 min_lr: 0.001201 loss: 3.9672 (3.8150) weight_decay: 0.0500 (0.0500) time: 0.6842 data: 0.0483 max mem: 21002 Epoch: [196] [220/312] eta: 0:00:53 lr: 0.001200 min_lr: 0.001200 loss: 3.9590 (3.8236) weight_decay: 0.0500 (0.0500) time: 0.4321 data: 0.0007 max mem: 21002 Epoch: [196] [230/312] eta: 0:00:47 lr: 0.001199 min_lr: 0.001199 loss: 4.0654 (3.8205) weight_decay: 0.0500 (0.0500) time: 0.4700 data: 0.0007 max mem: 21002 Epoch: [196] [240/312] eta: 0:00:42 lr: 0.001199 min_lr: 0.001199 loss: 4.0213 (3.8233) weight_decay: 0.0500 (0.0500) time: 0.6934 data: 0.0007 max mem: 21002 Epoch: [196] [250/312] eta: 0:00:35 lr: 0.001198 min_lr: 0.001198 loss: 3.9056 (3.8262) weight_decay: 0.0500 (0.0500) time: 0.5433 data: 0.0303 max mem: 21002 Epoch: [196] [260/312] eta: 0:00:30 lr: 0.001197 min_lr: 0.001197 loss: 3.8548 (3.8242) weight_decay: 0.0500 (0.0500) time: 0.5252 data: 0.0595 max mem: 21002 Epoch: [196] [270/312] eta: 0:00:23 lr: 0.001197 min_lr: 0.001197 loss: 3.8748 (3.8195) weight_decay: 0.0500 (0.0500) time: 0.4998 data: 0.0346 max mem: 21002 Epoch: [196] [280/312] eta: 0:00:18 lr: 0.001196 min_lr: 0.001196 loss: 3.8727 (3.8180) weight_decay: 0.0500 (0.0500) time: 0.4911 data: 0.0261 max mem: 21002 Epoch: [196] [290/312] eta: 0:00:12 lr: 0.001196 min_lr: 0.001196 loss: 3.9290 (3.8253) weight_decay: 0.0500 (0.0500) time: 0.5750 data: 0.0382 max mem: 21002 Epoch: [196] [300/312] eta: 0:00:06 lr: 0.001195 min_lr: 0.001195 loss: 4.1144 (3.8293) weight_decay: 0.0500 (0.0500) time: 0.3720 data: 0.0173 max mem: 21002 Epoch: [196] [310/312] eta: 0:00:01 lr: 0.001194 min_lr: 0.001194 loss: 3.9929 (3.8353) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [196] [311/312] eta: 0:00:00 lr: 0.001194 min_lr: 0.001194 loss: 3.9929 (3.8358) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [196] Total time: 0:02:53 (0.5554 s / it) Averaged stats: lr: 0.001194 min_lr: 0.001194 loss: 3.9929 (3.7915) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.6824 (1.6824) acc1: 73.1771 (73.1771) acc5: 90.7552 (90.7552) time: 8.8083 data: 8.6895 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 2.1923 (2.0166) acc1: 58.7240 (62.0320) acc5: 83.0729 (84.6400) time: 1.0652 data: 0.9656 max mem: 21002 Test: Total time: 0:00:09 (1.0965 s / it) * Acc@1 61.992 Acc@5 84.776 loss 2.027 Accuracy of the model on the 50000 test images: 62.0% Max accuracy: 65.12% Epoch: [197] [ 0/312] eta: 1:06:21 lr: 0.001194 min_lr: 0.001194 loss: 3.8870 (3.8870) weight_decay: 0.0500 (0.0500) time: 12.7598 data: 8.7190 max mem: 21002 Epoch: [197] [ 10/312] eta: 0:08:41 lr: 0.001193 min_lr: 0.001193 loss: 3.7538 (3.6370) weight_decay: 0.0500 (0.0500) time: 1.7276 data: 1.1039 max mem: 21002 Epoch: [197] [ 20/312] eta: 0:05:12 lr: 0.001193 min_lr: 0.001193 loss: 3.9575 (3.7721) weight_decay: 0.0500 (0.0500) time: 0.4868 data: 0.1837 max mem: 21002 Epoch: [197] [ 30/312] eta: 0:03:51 lr: 0.001192 min_lr: 0.001192 loss: 4.0268 (3.7911) weight_decay: 0.0500 (0.0500) time: 0.3225 data: 0.0128 max mem: 21002 Epoch: [197] [ 40/312] eta: 0:03:09 lr: 0.001191 min_lr: 0.001191 loss: 3.9581 (3.7988) weight_decay: 0.0500 (0.0500) time: 0.3025 data: 0.0006 max mem: 21002 Epoch: [197] [ 50/312] eta: 0:03:00 lr: 0.001191 min_lr: 0.001191 loss: 3.9581 (3.8103) weight_decay: 0.0500 (0.0500) time: 0.4843 data: 0.0760 max mem: 21002 Epoch: [197] [ 60/312] eta: 0:02:41 lr: 0.001190 min_lr: 0.001190 loss: 3.7582 (3.7981) weight_decay: 0.0500 (0.0500) time: 0.5248 data: 0.0823 max mem: 21002 Epoch: [197] [ 70/312] eta: 0:02:36 lr: 0.001189 min_lr: 0.001189 loss: 3.7116 (3.7547) weight_decay: 0.0500 (0.0500) time: 0.5403 data: 0.0779 max mem: 21002 Epoch: [197] [ 80/312] eta: 0:02:25 lr: 0.001189 min_lr: 0.001189 loss: 3.8462 (3.7847) weight_decay: 0.0500 (0.0500) time: 0.5837 data: 0.0885 max mem: 21002 Epoch: [197] [ 90/312] eta: 0:02:16 lr: 0.001188 min_lr: 0.001188 loss: 4.0908 (3.7910) weight_decay: 0.0500 (0.0500) time: 0.4913 data: 0.0684 max mem: 21002 Epoch: [197] [100/312] eta: 0:02:12 lr: 0.001187 min_lr: 0.001187 loss: 3.9548 (3.7732) weight_decay: 0.0500 (0.0500) time: 0.6109 data: 0.1104 max mem: 21002 Epoch: [197] [110/312] eta: 0:02:00 lr: 0.001187 min_lr: 0.001187 loss: 3.9728 (3.7842) weight_decay: 0.0500 (0.0500) time: 0.5200 data: 0.0736 max mem: 21002 Epoch: [197] [120/312] eta: 0:01:55 lr: 0.001186 min_lr: 0.001186 loss: 3.9231 (3.7697) weight_decay: 0.0500 (0.0500) time: 0.5001 data: 0.0668 max mem: 21002 Epoch: [197] [130/312] eta: 0:01:50 lr: 0.001186 min_lr: 0.001186 loss: 3.8722 (3.7734) weight_decay: 0.0500 (0.0500) time: 0.6546 data: 0.1020 max mem: 21002 Epoch: [197] [140/312] eta: 0:01:41 lr: 0.001185 min_lr: 0.001185 loss: 3.8722 (3.7626) weight_decay: 0.0500 (0.0500) time: 0.5145 data: 0.0738 max mem: 21002 Epoch: [197] [150/312] eta: 0:01:35 lr: 0.001184 min_lr: 0.001184 loss: 3.6943 (3.7604) weight_decay: 0.0500 (0.0500) time: 0.5077 data: 0.0681 max mem: 21002 Epoch: [197] [160/312] eta: 0:01:28 lr: 0.001184 min_lr: 0.001184 loss: 3.7026 (3.7496) weight_decay: 0.0500 (0.0500) time: 0.5167 data: 0.0746 max mem: 21002 Epoch: [197] [170/312] eta: 0:01:22 lr: 0.001183 min_lr: 0.001183 loss: 3.7106 (3.7603) weight_decay: 0.0500 (0.0500) time: 0.5036 data: 0.1206 max mem: 21002 Epoch: [197] [180/312] eta: 0:01:17 lr: 0.001182 min_lr: 0.001182 loss: 3.8050 (3.7630) weight_decay: 0.0500 (0.0500) time: 0.6296 data: 0.1417 max mem: 21002 Epoch: [197] [190/312] eta: 0:01:09 lr: 0.001182 min_lr: 0.001182 loss: 3.9541 (3.7626) weight_decay: 0.0500 (0.0500) time: 0.5024 data: 0.0715 max mem: 21002 Epoch: [197] [200/312] eta: 0:01:04 lr: 0.001181 min_lr: 0.001181 loss: 3.8127 (3.7557) weight_decay: 0.0500 (0.0500) time: 0.5000 data: 0.0860 max mem: 21002 Epoch: [197] [210/312] eta: 0:00:59 lr: 0.001180 min_lr: 0.001180 loss: 3.8127 (3.7523) weight_decay: 0.0500 (0.0500) time: 0.6441 data: 0.1177 max mem: 21002 Epoch: [197] [220/312] eta: 0:00:52 lr: 0.001180 min_lr: 0.001180 loss: 3.9055 (3.7605) weight_decay: 0.0500 (0.0500) time: 0.5241 data: 0.0965 max mem: 21002 Epoch: [197] [230/312] eta: 0:00:47 lr: 0.001179 min_lr: 0.001179 loss: 3.9239 (3.7680) weight_decay: 0.0500 (0.0500) time: 0.5290 data: 0.1201 max mem: 21002 Epoch: [197] [240/312] eta: 0:00:40 lr: 0.001178 min_lr: 0.001178 loss: 3.8627 (3.7713) weight_decay: 0.0500 (0.0500) time: 0.4749 data: 0.0869 max mem: 21002 Epoch: [197] [250/312] eta: 0:00:35 lr: 0.001178 min_lr: 0.001178 loss: 3.8627 (3.7754) weight_decay: 0.0500 (0.0500) time: 0.4952 data: 0.0933 max mem: 21002 Epoch: [197] [260/312] eta: 0:00:29 lr: 0.001177 min_lr: 0.001177 loss: 3.9558 (3.7735) weight_decay: 0.0500 (0.0500) time: 0.6961 data: 0.1429 max mem: 21002 Epoch: [197] [270/312] eta: 0:00:23 lr: 0.001176 min_lr: 0.001176 loss: 3.6576 (3.7656) weight_decay: 0.0500 (0.0500) time: 0.5445 data: 0.0674 max mem: 21002 Epoch: [197] [280/312] eta: 0:00:18 lr: 0.001176 min_lr: 0.001176 loss: 3.8487 (3.7747) weight_decay: 0.0500 (0.0500) time: 0.5389 data: 0.0756 max mem: 21002 Epoch: [197] [290/312] eta: 0:00:12 lr: 0.001175 min_lr: 0.001175 loss: 3.7427 (3.7687) weight_decay: 0.0500 (0.0500) time: 0.5784 data: 0.1145 max mem: 21002 Epoch: [197] [300/312] eta: 0:00:06 lr: 0.001174 min_lr: 0.001174 loss: 3.8934 (3.7811) weight_decay: 0.0500 (0.0500) time: 0.3773 data: 0.0557 max mem: 21002 Epoch: [197] [310/312] eta: 0:00:01 lr: 0.001174 min_lr: 0.001174 loss: 4.0873 (3.7909) weight_decay: 0.0500 (0.0500) time: 0.2956 data: 0.0111 max mem: 21002 Epoch: [197] [311/312] eta: 0:00:00 lr: 0.001174 min_lr: 0.001174 loss: 4.0873 (3.7935) weight_decay: 0.0500 (0.0500) time: 0.2844 data: 0.0001 max mem: 21002 Epoch: [197] Total time: 0:02:52 (0.5537 s / it) Averaged stats: lr: 0.001174 min_lr: 0.001174 loss: 4.0873 (3.7989) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:23 loss: 1.6352 (1.6352) acc1: 69.1406 (69.1406) acc5: 89.1927 (89.1927) time: 9.2608 data: 9.1429 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 2.0134 (1.9224) acc1: 61.8490 (61.3920) acc5: 83.7240 (84.4480) time: 1.1185 data: 1.0159 max mem: 21002 Test: Total time: 0:00:10 (1.1390 s / it) * Acc@1 60.992 Acc@5 84.430 loss 1.937 Accuracy of the model on the 50000 test images: 61.0% Max accuracy: 65.12% Epoch: [198] [ 0/312] eta: 1:03:05 lr: 0.001174 min_lr: 0.001174 loss: 3.9752 (3.9752) weight_decay: 0.0500 (0.0500) time: 12.1317 data: 10.2066 max mem: 21002 Epoch: [198] [ 10/312] eta: 0:08:12 lr: 0.001173 min_lr: 0.001173 loss: 3.5113 (3.5129) weight_decay: 0.0500 (0.0500) time: 1.6296 data: 1.1246 max mem: 21002 Epoch: [198] [ 20/312] eta: 0:05:34 lr: 0.001172 min_lr: 0.001172 loss: 3.5113 (3.5810) weight_decay: 0.0500 (0.0500) time: 0.5954 data: 0.2132 max mem: 21002 Epoch: [198] [ 30/312] eta: 0:04:05 lr: 0.001172 min_lr: 0.001172 loss: 3.5028 (3.5049) weight_decay: 0.0500 (0.0500) time: 0.4515 data: 0.1055 max mem: 21002 Epoch: [198] [ 40/312] eta: 0:03:36 lr: 0.001171 min_lr: 0.001171 loss: 3.6734 (3.5815) weight_decay: 0.0500 (0.0500) time: 0.4311 data: 0.0976 max mem: 21002 Epoch: [198] [ 50/312] eta: 0:03:21 lr: 0.001170 min_lr: 0.001170 loss: 3.7814 (3.5531) weight_decay: 0.0500 (0.0500) time: 0.6100 data: 0.2111 max mem: 21002 Epoch: [198] [ 60/312] eta: 0:02:53 lr: 0.001170 min_lr: 0.001170 loss: 3.7405 (3.6092) weight_decay: 0.0500 (0.0500) time: 0.4686 data: 0.1144 max mem: 21002 Epoch: [198] [ 70/312] eta: 0:02:47 lr: 0.001169 min_lr: 0.001169 loss: 3.7827 (3.6400) weight_decay: 0.0500 (0.0500) time: 0.5046 data: 0.1169 max mem: 21002 Epoch: [198] [ 80/312] eta: 0:02:35 lr: 0.001168 min_lr: 0.001168 loss: 3.7239 (3.6357) weight_decay: 0.0500 (0.0500) time: 0.6073 data: 0.1228 max mem: 21002 Epoch: [198] [ 90/312] eta: 0:02:25 lr: 0.001168 min_lr: 0.001168 loss: 3.7239 (3.6411) weight_decay: 0.0500 (0.0500) time: 0.5283 data: 0.1442 max mem: 21002 Epoch: [198] [100/312] eta: 0:02:19 lr: 0.001167 min_lr: 0.001167 loss: 3.7652 (3.6797) weight_decay: 0.0500 (0.0500) time: 0.6221 data: 0.2429 max mem: 21002 Epoch: [198] [110/312] eta: 0:02:06 lr: 0.001166 min_lr: 0.001166 loss: 3.9628 (3.6924) weight_decay: 0.0500 (0.0500) time: 0.4875 data: 0.1054 max mem: 21002 Epoch: [198] [120/312] eta: 0:02:01 lr: 0.001166 min_lr: 0.001166 loss: 3.9628 (3.7172) weight_decay: 0.0500 (0.0500) time: 0.4892 data: 0.1219 max mem: 21002 Epoch: [198] [130/312] eta: 0:01:55 lr: 0.001165 min_lr: 0.001165 loss: 3.9767 (3.7165) weight_decay: 0.0500 (0.0500) time: 0.6927 data: 0.2442 max mem: 21002 Epoch: [198] [140/312] eta: 0:01:45 lr: 0.001164 min_lr: 0.001164 loss: 3.7218 (3.7214) weight_decay: 0.0500 (0.0500) time: 0.4957 data: 0.1230 max mem: 21002 Epoch: [198] [150/312] eta: 0:01:40 lr: 0.001164 min_lr: 0.001164 loss: 3.8626 (3.7246) weight_decay: 0.0500 (0.0500) time: 0.4898 data: 0.1356 max mem: 21002 Epoch: [198] [160/312] eta: 0:01:32 lr: 0.001163 min_lr: 0.001163 loss: 3.9515 (3.7367) weight_decay: 0.0500 (0.0500) time: 0.5613 data: 0.1609 max mem: 21002 Epoch: [198] [170/312] eta: 0:01:25 lr: 0.001162 min_lr: 0.001162 loss: 3.9765 (3.7385) weight_decay: 0.0500 (0.0500) time: 0.4656 data: 0.1312 max mem: 21002 Epoch: [198] [180/312] eta: 0:01:19 lr: 0.001162 min_lr: 0.001162 loss: 3.6891 (3.7254) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.2232 max mem: 21002 Epoch: [198] [190/312] eta: 0:01:11 lr: 0.001161 min_lr: 0.001161 loss: 3.6891 (3.7335) weight_decay: 0.0500 (0.0500) time: 0.4912 data: 0.1180 max mem: 21002 Epoch: [198] [200/312] eta: 0:01:06 lr: 0.001161 min_lr: 0.001161 loss: 4.1217 (3.7468) weight_decay: 0.0500 (0.0500) time: 0.4650 data: 0.1252 max mem: 21002 Epoch: [198] [210/312] eta: 0:01:00 lr: 0.001160 min_lr: 0.001160 loss: 3.9197 (3.7381) weight_decay: 0.0500 (0.0500) time: 0.6700 data: 0.2894 max mem: 21002 Epoch: [198] [220/312] eta: 0:00:53 lr: 0.001159 min_lr: 0.001159 loss: 3.5707 (3.7349) weight_decay: 0.0500 (0.0500) time: 0.4999 data: 0.1654 max mem: 21002 Epoch: [198] [230/312] eta: 0:00:48 lr: 0.001159 min_lr: 0.001159 loss: 3.8341 (3.7395) weight_decay: 0.0500 (0.0500) time: 0.5019 data: 0.1434 max mem: 21002 Epoch: [198] [240/312] eta: 0:00:41 lr: 0.001158 min_lr: 0.001158 loss: 4.0026 (3.7457) weight_decay: 0.0500 (0.0500) time: 0.5614 data: 0.1489 max mem: 21002 Epoch: [198] [250/312] eta: 0:00:36 lr: 0.001157 min_lr: 0.001157 loss: 3.6138 (3.7323) weight_decay: 0.0500 (0.0500) time: 0.5246 data: 0.1782 max mem: 21002 Epoch: [198] [260/312] eta: 0:00:30 lr: 0.001157 min_lr: 0.001157 loss: 3.8197 (3.7404) weight_decay: 0.0500 (0.0500) time: 0.6631 data: 0.3312 max mem: 21002 Epoch: [198] [270/312] eta: 0:00:24 lr: 0.001156 min_lr: 0.001156 loss: 4.0256 (3.7442) weight_decay: 0.0500 (0.0500) time: 0.4906 data: 0.1598 max mem: 21002 Epoch: [198] [280/312] eta: 0:00:18 lr: 0.001155 min_lr: 0.001155 loss: 3.9235 (3.7396) weight_decay: 0.0500 (0.0500) time: 0.4986 data: 0.1695 max mem: 21002 Epoch: [198] [290/312] eta: 0:00:12 lr: 0.001155 min_lr: 0.001155 loss: 3.5161 (3.7347) weight_decay: 0.0500 (0.0500) time: 0.5987 data: 0.2520 max mem: 21002 Epoch: [198] [300/312] eta: 0:00:06 lr: 0.001154 min_lr: 0.001154 loss: 3.9366 (3.7376) weight_decay: 0.0500 (0.0500) time: 0.3844 data: 0.0830 max mem: 21002 Epoch: [198] [310/312] eta: 0:00:01 lr: 0.001153 min_lr: 0.001153 loss: 3.9366 (3.7362) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [198] [311/312] eta: 0:00:00 lr: 0.001153 min_lr: 0.001153 loss: 3.9366 (3.7362) weight_decay: 0.0500 (0.0500) time: 0.2769 data: 0.0001 max mem: 21002 Epoch: [198] Total time: 0:02:55 (0.5612 s / it) Averaged stats: lr: 0.001153 min_lr: 0.001153 loss: 3.9366 (3.7996) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.5282 (1.5282) acc1: 71.4844 (71.4844) acc5: 90.3646 (90.3646) time: 8.7553 data: 8.6373 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.9388 (1.7852) acc1: 60.6771 (63.3440) acc5: 84.7656 (85.8080) time: 1.0891 data: 0.9913 max mem: 21002 Test: Total time: 0:00:09 (1.0991 s / it) * Acc@1 63.380 Acc@5 85.766 loss 1.796 Accuracy of the model on the 50000 test images: 63.4% Max accuracy: 65.12% Epoch: [199] [ 0/312] eta: 1:06:14 lr: 0.001153 min_lr: 0.001153 loss: 3.9370 (3.9370) weight_decay: 0.0500 (0.0500) time: 12.7389 data: 11.0115 max mem: 21002 Epoch: [199] [ 10/312] eta: 0:07:57 lr: 0.001153 min_lr: 0.001153 loss: 3.9370 (3.7451) weight_decay: 0.0500 (0.0500) time: 1.5823 data: 1.0775 max mem: 21002 Epoch: [199] [ 20/312] eta: 0:05:53 lr: 0.001152 min_lr: 0.001152 loss: 3.8905 (3.7569) weight_decay: 0.0500 (0.0500) time: 0.6325 data: 0.1418 max mem: 21002 Epoch: [199] [ 30/312] eta: 0:04:16 lr: 0.001151 min_lr: 0.001151 loss: 3.9854 (3.8179) weight_decay: 0.0500 (0.0500) time: 0.5402 data: 0.1000 max mem: 21002 Epoch: [199] [ 40/312] eta: 0:03:29 lr: 0.001151 min_lr: 0.001151 loss: 3.9854 (3.8417) weight_decay: 0.0500 (0.0500) time: 0.3059 data: 0.0039 max mem: 21002 Epoch: [199] [ 50/312] eta: 0:03:10 lr: 0.001150 min_lr: 0.001150 loss: 3.7799 (3.7851) weight_decay: 0.0500 (0.0500) time: 0.4463 data: 0.0042 max mem: 21002 Epoch: [199] [ 60/312] eta: 0:02:46 lr: 0.001149 min_lr: 0.001149 loss: 3.7799 (3.7691) weight_decay: 0.0500 (0.0500) time: 0.4422 data: 0.0187 max mem: 21002 Epoch: [199] [ 70/312] eta: 0:02:44 lr: 0.001149 min_lr: 0.001149 loss: 4.0534 (3.7912) weight_decay: 0.0500 (0.0500) time: 0.5528 data: 0.0640 max mem: 21002 Epoch: [199] [ 80/312] eta: 0:02:30 lr: 0.001148 min_lr: 0.001148 loss: 4.0814 (3.8045) weight_decay: 0.0500 (0.0500) time: 0.6073 data: 0.0463 max mem: 21002 Epoch: [199] [ 90/312] eta: 0:02:18 lr: 0.001147 min_lr: 0.001147 loss: 4.0204 (3.7759) weight_decay: 0.0500 (0.0500) time: 0.4384 data: 0.0057 max mem: 21002 Epoch: [199] [100/312] eta: 0:02:16 lr: 0.001147 min_lr: 0.001147 loss: 4.0204 (3.8082) weight_decay: 0.0500 (0.0500) time: 0.6199 data: 0.0339 max mem: 21002 Epoch: [199] [110/312] eta: 0:02:03 lr: 0.001146 min_lr: 0.001146 loss: 4.1146 (3.8176) weight_decay: 0.0500 (0.0500) time: 0.5410 data: 0.0289 max mem: 21002 Epoch: [199] [120/312] eta: 0:01:57 lr: 0.001145 min_lr: 0.001145 loss: 4.0210 (3.8125) weight_decay: 0.0500 (0.0500) time: 0.4698 data: 0.0403 max mem: 21002 Epoch: [199] [130/312] eta: 0:01:52 lr: 0.001145 min_lr: 0.001145 loss: 3.9811 (3.8126) weight_decay: 0.0500 (0.0500) time: 0.6622 data: 0.0404 max mem: 21002 Epoch: [199] [140/312] eta: 0:01:43 lr: 0.001144 min_lr: 0.001144 loss: 4.0683 (3.8150) weight_decay: 0.0500 (0.0500) time: 0.5399 data: 0.0639 max mem: 21002 Epoch: [199] [150/312] eta: 0:01:38 lr: 0.001143 min_lr: 0.001143 loss: 4.0763 (3.8107) weight_decay: 0.0500 (0.0500) time: 0.5175 data: 0.1239 max mem: 21002 Epoch: [199] [160/312] eta: 0:01:30 lr: 0.001143 min_lr: 0.001143 loss: 4.0589 (3.8144) weight_decay: 0.0500 (0.0500) time: 0.5348 data: 0.0607 max mem: 21002 Epoch: [199] [170/312] eta: 0:01:23 lr: 0.001142 min_lr: 0.001142 loss: 3.8263 (3.7981) weight_decay: 0.0500 (0.0500) time: 0.4640 data: 0.0214 max mem: 21002 Epoch: [199] [180/312] eta: 0:01:18 lr: 0.001142 min_lr: 0.001142 loss: 3.8263 (3.8037) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0381 max mem: 21002 Epoch: [199] [190/312] eta: 0:01:10 lr: 0.001141 min_lr: 0.001141 loss: 4.0518 (3.8123) weight_decay: 0.0500 (0.0500) time: 0.5202 data: 0.0175 max mem: 21002 Epoch: [199] [200/312] eta: 0:01:05 lr: 0.001140 min_lr: 0.001140 loss: 3.9881 (3.8148) weight_decay: 0.0500 (0.0500) time: 0.5196 data: 0.0519 max mem: 21002 Epoch: [199] [210/312] eta: 0:01:00 lr: 0.001140 min_lr: 0.001140 loss: 4.0026 (3.8174) weight_decay: 0.0500 (0.0500) time: 0.6607 data: 0.0521 max mem: 21002 Epoch: [199] [220/312] eta: 0:00:53 lr: 0.001139 min_lr: 0.001139 loss: 4.0026 (3.8234) weight_decay: 0.0500 (0.0500) time: 0.5049 data: 0.0069 max mem: 21002 Epoch: [199] [230/312] eta: 0:00:48 lr: 0.001138 min_lr: 0.001138 loss: 3.8974 (3.8268) weight_decay: 0.0500 (0.0500) time: 0.5541 data: 0.0642 max mem: 21002 Epoch: [199] [240/312] eta: 0:00:41 lr: 0.001138 min_lr: 0.001138 loss: 3.8238 (3.8155) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0582 max mem: 21002 Epoch: [199] [250/312] eta: 0:00:35 lr: 0.001137 min_lr: 0.001137 loss: 3.7372 (3.8235) weight_decay: 0.0500 (0.0500) time: 0.4849 data: 0.0437 max mem: 21002 Epoch: [199] [260/312] eta: 0:00:30 lr: 0.001136 min_lr: 0.001136 loss: 3.9547 (3.8218) weight_decay: 0.0500 (0.0500) time: 0.5748 data: 0.0863 max mem: 21002 Epoch: [199] [270/312] eta: 0:00:24 lr: 0.001136 min_lr: 0.001136 loss: 3.9555 (3.8267) weight_decay: 0.0500 (0.0500) time: 0.4956 data: 0.0434 max mem: 21002 Epoch: [199] [280/312] eta: 0:00:18 lr: 0.001135 min_lr: 0.001135 loss: 4.0119 (3.8253) weight_decay: 0.0500 (0.0500) time: 0.5332 data: 0.0695 max mem: 21002 Epoch: [199] [290/312] eta: 0:00:12 lr: 0.001134 min_lr: 0.001134 loss: 3.9426 (3.8335) weight_decay: 0.0500 (0.0500) time: 0.5764 data: 0.0692 max mem: 21002 Epoch: [199] [300/312] eta: 0:00:06 lr: 0.001134 min_lr: 0.001134 loss: 3.9974 (3.8361) weight_decay: 0.0500 (0.0500) time: 0.3755 data: 0.0035 max mem: 21002 Epoch: [199] [310/312] eta: 0:00:01 lr: 0.001133 min_lr: 0.001133 loss: 4.0204 (3.8358) weight_decay: 0.0500 (0.0500) time: 0.2908 data: 0.0034 max mem: 21002 Epoch: [199] [311/312] eta: 0:00:00 lr: 0.001133 min_lr: 0.001133 loss: 4.0204 (3.8386) weight_decay: 0.0500 (0.0500) time: 0.2880 data: 0.0001 max mem: 21002 Epoch: [199] Total time: 0:02:53 (0.5568 s / it) Averaged stats: lr: 0.001133 min_lr: 0.001133 loss: 4.0204 (3.8145) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.5540 (1.5540) acc1: 69.1406 (69.1406) acc5: 90.3646 (90.3646) time: 8.8336 data: 8.7148 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.8866 (1.7930) acc1: 63.5417 (64.0960) acc5: 85.8073 (85.6160) time: 1.2225 data: 1.1260 max mem: 21002 Test: Total time: 0:00:11 (1.2331 s / it) * Acc@1 63.924 Acc@5 86.090 loss 1.798 Accuracy of the model on the 50000 test images: 63.9% Max accuracy: 65.12% Epoch: [200] [ 0/312] eta: 1:05:30 lr: 0.001133 min_lr: 0.001133 loss: 3.8477 (3.8477) weight_decay: 0.0500 (0.0500) time: 12.5973 data: 10.8452 max mem: 21002 Epoch: [200] [ 10/312] eta: 0:09:02 lr: 0.001132 min_lr: 0.001132 loss: 4.0023 (3.9353) weight_decay: 0.0500 (0.0500) time: 1.7963 data: 1.0675 max mem: 21002 Epoch: [200] [ 20/312] eta: 0:05:29 lr: 0.001132 min_lr: 0.001132 loss: 4.0878 (4.0155) weight_decay: 0.0500 (0.0500) time: 0.5541 data: 0.0497 max mem: 21002 Epoch: [200] [ 30/312] eta: 0:04:04 lr: 0.001131 min_lr: 0.001131 loss: 3.9155 (3.9220) weight_decay: 0.0500 (0.0500) time: 0.3571 data: 0.0235 max mem: 21002 Epoch: [200] [ 40/312] eta: 0:03:17 lr: 0.001130 min_lr: 0.001130 loss: 3.9155 (3.9061) weight_decay: 0.0500 (0.0500) time: 0.3048 data: 0.0191 max mem: 21002 Epoch: [200] [ 50/312] eta: 0:02:56 lr: 0.001130 min_lr: 0.001130 loss: 3.8912 (3.8517) weight_decay: 0.0500 (0.0500) time: 0.3692 data: 0.0008 max mem: 21002 Epoch: [200] [ 60/312] eta: 0:02:41 lr: 0.001129 min_lr: 0.001129 loss: 3.9204 (3.8725) weight_decay: 0.0500 (0.0500) time: 0.4708 data: 0.0777 max mem: 21002 Epoch: [200] [ 70/312] eta: 0:02:33 lr: 0.001128 min_lr: 0.001128 loss: 4.0447 (3.8543) weight_decay: 0.0500 (0.0500) time: 0.5428 data: 0.1074 max mem: 21002 Epoch: [200] [ 80/312] eta: 0:02:22 lr: 0.001128 min_lr: 0.001128 loss: 3.9007 (3.8164) weight_decay: 0.0500 (0.0500) time: 0.5323 data: 0.1204 max mem: 21002 Epoch: [200] [ 90/312] eta: 0:02:14 lr: 0.001127 min_lr: 0.001127 loss: 3.7108 (3.8052) weight_decay: 0.0500 (0.0500) time: 0.5041 data: 0.0907 max mem: 21002 Epoch: [200] [100/312] eta: 0:02:10 lr: 0.001126 min_lr: 0.001126 loss: 3.7939 (3.8028) weight_decay: 0.0500 (0.0500) time: 0.6070 data: 0.0757 max mem: 21002 Epoch: [200] [110/312] eta: 0:02:01 lr: 0.001126 min_lr: 0.001126 loss: 3.8995 (3.8034) weight_decay: 0.0500 (0.0500) time: 0.5701 data: 0.1577 max mem: 21002 Epoch: [200] [120/312] eta: 0:01:53 lr: 0.001125 min_lr: 0.001125 loss: 3.9605 (3.7978) weight_decay: 0.0500 (0.0500) time: 0.4722 data: 0.0954 max mem: 21002 Epoch: [200] [130/312] eta: 0:01:48 lr: 0.001125 min_lr: 0.001125 loss: 3.9110 (3.7973) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.1208 max mem: 21002 Epoch: [200] [140/312] eta: 0:01:42 lr: 0.001124 min_lr: 0.001124 loss: 3.9223 (3.7979) weight_decay: 0.0500 (0.0500) time: 0.6262 data: 0.2297 max mem: 21002 Epoch: [200] [150/312] eta: 0:01:34 lr: 0.001123 min_lr: 0.001123 loss: 3.8688 (3.7859) weight_decay: 0.0500 (0.0500) time: 0.4998 data: 0.1222 max mem: 21002 Epoch: [200] [160/312] eta: 0:01:29 lr: 0.001123 min_lr: 0.001123 loss: 3.8374 (3.7903) weight_decay: 0.0500 (0.0500) time: 0.5236 data: 0.1372 max mem: 21002 Epoch: [200] [170/312] eta: 0:01:21 lr: 0.001122 min_lr: 0.001122 loss: 3.9011 (3.7856) weight_decay: 0.0500 (0.0500) time: 0.5033 data: 0.1381 max mem: 21002 Epoch: [200] [180/312] eta: 0:01:16 lr: 0.001121 min_lr: 0.001121 loss: 3.8438 (3.7748) weight_decay: 0.0500 (0.0500) time: 0.5463 data: 0.1449 max mem: 21002 Epoch: [200] [190/312] eta: 0:01:11 lr: 0.001121 min_lr: 0.001121 loss: 3.5704 (3.7576) weight_decay: 0.0500 (0.0500) time: 0.6614 data: 0.2930 max mem: 21002 Epoch: [200] [200/312] eta: 0:01:04 lr: 0.001120 min_lr: 0.001120 loss: 3.6168 (3.7571) weight_decay: 0.0500 (0.0500) time: 0.5036 data: 0.1503 max mem: 21002 Epoch: [200] [210/312] eta: 0:00:58 lr: 0.001119 min_lr: 0.001119 loss: 3.7346 (3.7551) weight_decay: 0.0500 (0.0500) time: 0.5098 data: 0.1740 max mem: 21002 Epoch: [200] [220/312] eta: 0:00:53 lr: 0.001119 min_lr: 0.001119 loss: 3.6790 (3.7467) weight_decay: 0.0500 (0.0500) time: 0.6748 data: 0.3770 max mem: 21002 Epoch: [200] [230/312] eta: 0:00:46 lr: 0.001118 min_lr: 0.001118 loss: 3.7196 (3.7467) weight_decay: 0.0500 (0.0500) time: 0.4985 data: 0.2044 max mem: 21002 Epoch: [200] [240/312] eta: 0:00:41 lr: 0.001117 min_lr: 0.001117 loss: 3.8623 (3.7532) weight_decay: 0.0500 (0.0500) time: 0.5114 data: 0.2170 max mem: 21002 Epoch: [200] [250/312] eta: 0:00:35 lr: 0.001117 min_lr: 0.001117 loss: 3.8926 (3.7511) weight_decay: 0.0500 (0.0500) time: 0.5066 data: 0.2172 max mem: 21002 Epoch: [200] [260/312] eta: 0:00:29 lr: 0.001116 min_lr: 0.001116 loss: 3.9524 (3.7640) weight_decay: 0.0500 (0.0500) time: 0.4843 data: 0.1963 max mem: 21002 Epoch: [200] [270/312] eta: 0:00:24 lr: 0.001115 min_lr: 0.001115 loss: 4.1254 (3.7663) weight_decay: 0.0500 (0.0500) time: 0.7136 data: 0.4261 max mem: 21002 Epoch: [200] [280/312] eta: 0:00:18 lr: 0.001115 min_lr: 0.001115 loss: 3.9853 (3.7703) weight_decay: 0.0500 (0.0500) time: 0.5205 data: 0.2325 max mem: 21002 Epoch: [200] [290/312] eta: 0:00:12 lr: 0.001114 min_lr: 0.001114 loss: 3.6386 (3.7629) weight_decay: 0.0500 (0.0500) time: 0.5045 data: 0.2178 max mem: 21002 Epoch: [200] [300/312] eta: 0:00:06 lr: 0.001114 min_lr: 0.001114 loss: 3.9554 (3.7752) weight_decay: 0.0500 (0.0500) time: 0.5293 data: 0.2484 max mem: 21002 Epoch: [200] [310/312] eta: 0:00:01 lr: 0.001113 min_lr: 0.001113 loss: 4.0613 (3.7774) weight_decay: 0.0500 (0.0500) time: 0.3097 data: 0.0328 max mem: 21002 Epoch: [200] [311/312] eta: 0:00:00 lr: 0.001113 min_lr: 0.001113 loss: 4.0346 (3.7777) weight_decay: 0.0500 (0.0500) time: 0.3096 data: 0.0328 max mem: 21002 Epoch: [200] Total time: 0:02:54 (0.5578 s / it) Averaged stats: lr: 0.001113 min_lr: 0.001113 loss: 4.0346 (3.7765) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:27 loss: 1.3601 (1.3601) acc1: 75.5208 (75.5208) acc5: 92.8385 (92.8385) time: 9.7563 data: 9.6375 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.8377 (1.7516) acc1: 63.0208 (65.2640) acc5: 86.5885 (86.9280) time: 1.1693 data: 1.0709 max mem: 21002 Test: Total time: 0:00:10 (1.1804 s / it) * Acc@1 65.396 Acc@5 86.940 loss 1.761 Accuracy of the model on the 50000 test images: 65.4% Max accuracy: 65.40% Epoch: [201] [ 0/312] eta: 1:05:10 lr: 0.001113 min_lr: 0.001113 loss: 3.6972 (3.6972) weight_decay: 0.0500 (0.0500) time: 12.5327 data: 11.1358 max mem: 21002 Epoch: [201] [ 10/312] eta: 0:08:26 lr: 0.001112 min_lr: 0.001112 loss: 3.8623 (3.8790) weight_decay: 0.0500 (0.0500) time: 1.6760 data: 1.0134 max mem: 21002 Epoch: [201] [ 20/312] eta: 0:05:05 lr: 0.001112 min_lr: 0.001112 loss: 3.9351 (3.8665) weight_decay: 0.0500 (0.0500) time: 0.4719 data: 0.0359 max mem: 21002 Epoch: [201] [ 30/312] eta: 0:03:46 lr: 0.001111 min_lr: 0.001111 loss: 3.8633 (3.8094) weight_decay: 0.0500 (0.0500) time: 0.3224 data: 0.0357 max mem: 21002 Epoch: [201] [ 40/312] eta: 0:03:08 lr: 0.001110 min_lr: 0.001110 loss: 3.6941 (3.7734) weight_decay: 0.0500 (0.0500) time: 0.3254 data: 0.0008 max mem: 21002 Epoch: [201] [ 50/312] eta: 0:02:59 lr: 0.001110 min_lr: 0.001110 loss: 3.8396 (3.7766) weight_decay: 0.0500 (0.0500) time: 0.4980 data: 0.1414 max mem: 21002 Epoch: [201] [ 60/312] eta: 0:02:37 lr: 0.001109 min_lr: 0.001109 loss: 3.8386 (3.7620) weight_decay: 0.0500 (0.0500) time: 0.4852 data: 0.1651 max mem: 21002 Epoch: [201] [ 70/312] eta: 0:02:35 lr: 0.001108 min_lr: 0.001108 loss: 4.0086 (3.7906) weight_decay: 0.0500 (0.0500) time: 0.5331 data: 0.1659 max mem: 21002 Epoch: [201] [ 80/312] eta: 0:02:24 lr: 0.001108 min_lr: 0.001108 loss: 3.9117 (3.7663) weight_decay: 0.0500 (0.0500) time: 0.6105 data: 0.2471 max mem: 21002 Epoch: [201] [ 90/312] eta: 0:02:16 lr: 0.001107 min_lr: 0.001107 loss: 3.9117 (3.7749) weight_decay: 0.0500 (0.0500) time: 0.5165 data: 0.1204 max mem: 21002 Epoch: [201] [100/312] eta: 0:02:09 lr: 0.001106 min_lr: 0.001106 loss: 3.8502 (3.7527) weight_decay: 0.0500 (0.0500) time: 0.5598 data: 0.0908 max mem: 21002 Epoch: [201] [110/312] eta: 0:02:00 lr: 0.001106 min_lr: 0.001106 loss: 3.6280 (3.7491) weight_decay: 0.0500 (0.0500) time: 0.5275 data: 0.1164 max mem: 21002 Epoch: [201] [120/312] eta: 0:01:53 lr: 0.001105 min_lr: 0.001105 loss: 3.7150 (3.7557) weight_decay: 0.0500 (0.0500) time: 0.4952 data: 0.1135 max mem: 21002 Epoch: [201] [130/312] eta: 0:01:48 lr: 0.001104 min_lr: 0.001104 loss: 3.6991 (3.7477) weight_decay: 0.0500 (0.0500) time: 0.5682 data: 0.1728 max mem: 21002 Epoch: [201] [140/312] eta: 0:01:40 lr: 0.001104 min_lr: 0.001104 loss: 3.7016 (3.7647) weight_decay: 0.0500 (0.0500) time: 0.5509 data: 0.1341 max mem: 21002 Epoch: [201] [150/312] eta: 0:01:34 lr: 0.001103 min_lr: 0.001103 loss: 3.8887 (3.7753) weight_decay: 0.0500 (0.0500) time: 0.5072 data: 0.1155 max mem: 21002 Epoch: [201] [160/312] eta: 0:01:28 lr: 0.001102 min_lr: 0.001102 loss: 3.8184 (3.7686) weight_decay: 0.0500 (0.0500) time: 0.5434 data: 0.1642 max mem: 21002 Epoch: [201] [170/312] eta: 0:01:21 lr: 0.001102 min_lr: 0.001102 loss: 3.8295 (3.7828) weight_decay: 0.0500 (0.0500) time: 0.5308 data: 0.1387 max mem: 21002 Epoch: [201] [180/312] eta: 0:01:16 lr: 0.001101 min_lr: 0.001101 loss: 3.8295 (3.7772) weight_decay: 0.0500 (0.0500) time: 0.5598 data: 0.1300 max mem: 21002 Epoch: [201] [190/312] eta: 0:01:09 lr: 0.001101 min_lr: 0.001101 loss: 3.8182 (3.7889) weight_decay: 0.0500 (0.0500) time: 0.5430 data: 0.1448 max mem: 21002 Epoch: [201] [200/312] eta: 0:01:04 lr: 0.001100 min_lr: 0.001100 loss: 3.8082 (3.7770) weight_decay: 0.0500 (0.0500) time: 0.5248 data: 0.1345 max mem: 21002 Epoch: [201] [210/312] eta: 0:00:58 lr: 0.001099 min_lr: 0.001099 loss: 3.8082 (3.7868) weight_decay: 0.0500 (0.0500) time: 0.5685 data: 0.1418 max mem: 21002 Epoch: [201] [220/312] eta: 0:00:52 lr: 0.001099 min_lr: 0.001099 loss: 4.0145 (3.7877) weight_decay: 0.0500 (0.0500) time: 0.5088 data: 0.1495 max mem: 21002 Epoch: [201] [230/312] eta: 0:00:46 lr: 0.001098 min_lr: 0.001098 loss: 3.8725 (3.7988) weight_decay: 0.0500 (0.0500) time: 0.5361 data: 0.1681 max mem: 21002 Epoch: [201] [240/312] eta: 0:00:40 lr: 0.001097 min_lr: 0.001097 loss: 3.8725 (3.7934) weight_decay: 0.0500 (0.0500) time: 0.5451 data: 0.1841 max mem: 21002 Epoch: [201] [250/312] eta: 0:00:34 lr: 0.001097 min_lr: 0.001097 loss: 3.9714 (3.8021) weight_decay: 0.0500 (0.0500) time: 0.5047 data: 0.1412 max mem: 21002 Epoch: [201] [260/312] eta: 0:00:29 lr: 0.001096 min_lr: 0.001096 loss: 4.1127 (3.8034) weight_decay: 0.0500 (0.0500) time: 0.5790 data: 0.1343 max mem: 21002 Epoch: [201] [270/312] eta: 0:00:23 lr: 0.001095 min_lr: 0.001095 loss: 3.8601 (3.8075) weight_decay: 0.0500 (0.0500) time: 0.5180 data: 0.1459 max mem: 21002 Epoch: [201] [280/312] eta: 0:00:18 lr: 0.001095 min_lr: 0.001095 loss: 3.8601 (3.8075) weight_decay: 0.0500 (0.0500) time: 0.5285 data: 0.1266 max mem: 21002 Epoch: [201] [290/312] eta: 0:00:12 lr: 0.001094 min_lr: 0.001094 loss: 3.8808 (3.8063) weight_decay: 0.0500 (0.0500) time: 0.6016 data: 0.0945 max mem: 21002 Epoch: [201] [300/312] eta: 0:00:06 lr: 0.001094 min_lr: 0.001094 loss: 3.9220 (3.8115) weight_decay: 0.0500 (0.0500) time: 0.4600 data: 0.0735 max mem: 21002 Epoch: [201] [310/312] eta: 0:00:01 lr: 0.001093 min_lr: 0.001093 loss: 4.1031 (3.8181) weight_decay: 0.0500 (0.0500) time: 0.3160 data: 0.0404 max mem: 21002 Epoch: [201] [311/312] eta: 0:00:00 lr: 0.001093 min_lr: 0.001093 loss: 4.0226 (3.8183) weight_decay: 0.0500 (0.0500) time: 0.3126 data: 0.0364 max mem: 21002 Epoch: [201] Total time: 0:02:51 (0.5504 s / it) Averaged stats: lr: 0.001093 min_lr: 0.001093 loss: 4.0226 (3.7976) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:25 loss: 1.5932 (1.5932) acc1: 69.0104 (69.0104) acc5: 89.4531 (89.4531) time: 9.4831 data: 9.3645 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.9804 (1.8702) acc1: 61.5885 (62.7680) acc5: 84.6354 (85.4400) time: 1.1384 data: 1.0405 max mem: 21002 Test: Total time: 0:00:10 (1.1491 s / it) * Acc@1 63.076 Acc@5 85.576 loss 1.876 Accuracy of the model on the 50000 test images: 63.1% Max accuracy: 65.40% Epoch: [202] [ 0/312] eta: 1:06:40 lr: 0.001093 min_lr: 0.001093 loss: 3.5896 (3.5896) weight_decay: 0.0500 (0.0500) time: 12.8211 data: 12.5253 max mem: 21002 Epoch: [202] [ 10/312] eta: 0:08:33 lr: 0.001092 min_lr: 0.001092 loss: 3.5896 (3.6127) weight_decay: 0.0500 (0.0500) time: 1.7014 data: 1.1391 max mem: 21002 Epoch: [202] [ 20/312] eta: 0:05:12 lr: 0.001091 min_lr: 0.001091 loss: 3.6969 (3.6568) weight_decay: 0.0500 (0.0500) time: 0.4816 data: 0.0452 max mem: 21002 Epoch: [202] [ 30/312] eta: 0:03:50 lr: 0.001091 min_lr: 0.001091 loss: 3.6171 (3.6235) weight_decay: 0.0500 (0.0500) time: 0.3305 data: 0.0453 max mem: 21002 Epoch: [202] [ 40/312] eta: 0:03:16 lr: 0.001090 min_lr: 0.001090 loss: 3.9925 (3.7031) weight_decay: 0.0500 (0.0500) time: 0.3573 data: 0.0153 max mem: 21002 Epoch: [202] [ 50/312] eta: 0:03:04 lr: 0.001090 min_lr: 0.001090 loss: 4.0069 (3.7347) weight_decay: 0.0500 (0.0500) time: 0.5269 data: 0.1319 max mem: 21002 Epoch: [202] [ 60/312] eta: 0:02:44 lr: 0.001089 min_lr: 0.001089 loss: 3.8598 (3.7289) weight_decay: 0.0500 (0.0500) time: 0.5083 data: 0.1174 max mem: 21002 Epoch: [202] [ 70/312] eta: 0:02:39 lr: 0.001088 min_lr: 0.001088 loss: 3.5990 (3.7234) weight_decay: 0.0500 (0.0500) time: 0.5480 data: 0.0996 max mem: 21002 Epoch: [202] [ 80/312] eta: 0:02:25 lr: 0.001088 min_lr: 0.001088 loss: 3.7934 (3.7159) weight_decay: 0.0500 (0.0500) time: 0.5569 data: 0.1262 max mem: 21002 Epoch: [202] [ 90/312] eta: 0:02:21 lr: 0.001087 min_lr: 0.001087 loss: 3.7934 (3.7202) weight_decay: 0.0500 (0.0500) time: 0.5514 data: 0.0603 max mem: 21002 Epoch: [202] [100/312] eta: 0:02:13 lr: 0.001086 min_lr: 0.001086 loss: 3.9384 (3.7456) weight_decay: 0.0500 (0.0500) time: 0.6246 data: 0.0651 max mem: 21002 Epoch: [202] [110/312] eta: 0:02:01 lr: 0.001086 min_lr: 0.001086 loss: 4.0143 (3.7590) weight_decay: 0.0500 (0.0500) time: 0.4365 data: 0.0483 max mem: 21002 Epoch: [202] [120/312] eta: 0:01:56 lr: 0.001085 min_lr: 0.001085 loss: 3.8614 (3.7415) weight_decay: 0.0500 (0.0500) time: 0.5050 data: 0.0719 max mem: 21002 Epoch: [202] [130/312] eta: 0:01:49 lr: 0.001084 min_lr: 0.001084 loss: 3.8614 (3.7458) weight_decay: 0.0500 (0.0500) time: 0.6248 data: 0.1069 max mem: 21002 Epoch: [202] [140/312] eta: 0:01:42 lr: 0.001084 min_lr: 0.001084 loss: 4.0024 (3.7480) weight_decay: 0.0500 (0.0500) time: 0.5307 data: 0.0565 max mem: 21002 Epoch: [202] [150/312] eta: 0:01:36 lr: 0.001083 min_lr: 0.001083 loss: 3.8573 (3.7508) weight_decay: 0.0500 (0.0500) time: 0.5614 data: 0.0640 max mem: 21002 Epoch: [202] [160/312] eta: 0:01:28 lr: 0.001083 min_lr: 0.001083 loss: 4.0030 (3.7627) weight_decay: 0.0500 (0.0500) time: 0.4919 data: 0.0951 max mem: 21002 Epoch: [202] [170/312] eta: 0:01:23 lr: 0.001082 min_lr: 0.001082 loss: 4.0072 (3.7657) weight_decay: 0.0500 (0.0500) time: 0.5174 data: 0.0368 max mem: 21002 Epoch: [202] [180/312] eta: 0:01:18 lr: 0.001081 min_lr: 0.001081 loss: 3.7875 (3.7517) weight_decay: 0.0500 (0.0500) time: 0.6723 data: 0.0331 max mem: 21002 Epoch: [202] [190/312] eta: 0:01:10 lr: 0.001081 min_lr: 0.001081 loss: 3.7907 (3.7613) weight_decay: 0.0500 (0.0500) time: 0.4947 data: 0.0459 max mem: 21002 Epoch: [202] [200/312] eta: 0:01:05 lr: 0.001080 min_lr: 0.001080 loss: 3.9850 (3.7618) weight_decay: 0.0500 (0.0500) time: 0.4747 data: 0.0993 max mem: 21002 Epoch: [202] [210/312] eta: 0:00:59 lr: 0.001079 min_lr: 0.001079 loss: 3.5696 (3.7580) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.1222 max mem: 21002 Epoch: [202] [220/312] eta: 0:00:53 lr: 0.001079 min_lr: 0.001079 loss: 3.9305 (3.7678) weight_decay: 0.0500 (0.0500) time: 0.5279 data: 0.0368 max mem: 21002 Epoch: [202] [230/312] eta: 0:00:47 lr: 0.001078 min_lr: 0.001078 loss: 3.8188 (3.7550) weight_decay: 0.0500 (0.0500) time: 0.5739 data: 0.0769 max mem: 21002 Epoch: [202] [240/312] eta: 0:00:41 lr: 0.001077 min_lr: 0.001077 loss: 3.7526 (3.7524) weight_decay: 0.0500 (0.0500) time: 0.5272 data: 0.1449 max mem: 21002 Epoch: [202] [250/312] eta: 0:00:35 lr: 0.001077 min_lr: 0.001077 loss: 3.8522 (3.7486) weight_decay: 0.0500 (0.0500) time: 0.4811 data: 0.0688 max mem: 21002 Epoch: [202] [260/312] eta: 0:00:29 lr: 0.001076 min_lr: 0.001076 loss: 3.9330 (3.7530) weight_decay: 0.0500 (0.0500) time: 0.6221 data: 0.0177 max mem: 21002 Epoch: [202] [270/312] eta: 0:00:23 lr: 0.001075 min_lr: 0.001075 loss: 3.7266 (3.7474) weight_decay: 0.0500 (0.0500) time: 0.4994 data: 0.0179 max mem: 21002 Epoch: [202] [280/312] eta: 0:00:18 lr: 0.001075 min_lr: 0.001075 loss: 3.8777 (3.7507) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.0733 max mem: 21002 Epoch: [202] [290/312] eta: 0:00:12 lr: 0.001074 min_lr: 0.001074 loss: 3.9874 (3.7597) weight_decay: 0.0500 (0.0500) time: 0.5563 data: 0.1117 max mem: 21002 Epoch: [202] [300/312] eta: 0:00:06 lr: 0.001074 min_lr: 0.001074 loss: 3.9874 (3.7542) weight_decay: 0.0500 (0.0500) time: 0.4091 data: 0.0391 max mem: 21002 Epoch: [202] [310/312] eta: 0:00:01 lr: 0.001073 min_lr: 0.001073 loss: 3.9029 (3.7573) weight_decay: 0.0500 (0.0500) time: 0.3299 data: 0.0001 max mem: 21002 Epoch: [202] [311/312] eta: 0:00:00 lr: 0.001073 min_lr: 0.001073 loss: 3.9582 (3.7587) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [202] Total time: 0:02:52 (0.5528 s / it) Averaged stats: lr: 0.001073 min_lr: 0.001073 loss: 3.9582 (3.7878) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.6591 (1.6591) acc1: 68.3594 (68.3594) acc5: 91.6667 (91.6667) time: 8.5244 data: 8.4057 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 2.0699 (1.9536) acc1: 62.5000 (63.3440) acc5: 84.5052 (85.6480) time: 1.0508 data: 0.9520 max mem: 21002 Test: Total time: 0:00:09 (1.0857 s / it) * Acc@1 63.510 Acc@5 85.934 loss 1.957 Accuracy of the model on the 50000 test images: 63.5% Max accuracy: 65.40% Epoch: [203] [ 0/312] eta: 1:00:29 lr: 0.001073 min_lr: 0.001073 loss: 3.0189 (3.0189) weight_decay: 0.0500 (0.0500) time: 11.6315 data: 7.7234 max mem: 21002 Epoch: [203] [ 10/312] eta: 0:08:54 lr: 0.001072 min_lr: 0.001072 loss: 4.0186 (3.8845) weight_decay: 0.0500 (0.0500) time: 1.7712 data: 0.9759 max mem: 21002 Epoch: [203] [ 20/312] eta: 0:05:10 lr: 0.001072 min_lr: 0.001072 loss: 4.0192 (3.9800) weight_decay: 0.0500 (0.0500) time: 0.5353 data: 0.1509 max mem: 21002 Epoch: [203] [ 30/312] eta: 0:03:49 lr: 0.001071 min_lr: 0.001071 loss: 3.9902 (3.8656) weight_decay: 0.0500 (0.0500) time: 0.2869 data: 0.0007 max mem: 21002 Epoch: [203] [ 40/312] eta: 0:03:09 lr: 0.001070 min_lr: 0.001070 loss: 3.8525 (3.8146) weight_decay: 0.0500 (0.0500) time: 0.3078 data: 0.0009 max mem: 21002 Epoch: [203] [ 50/312] eta: 0:03:01 lr: 0.001070 min_lr: 0.001070 loss: 3.8525 (3.8331) weight_decay: 0.0500 (0.0500) time: 0.5066 data: 0.0319 max mem: 21002 Epoch: [203] [ 60/312] eta: 0:02:43 lr: 0.001069 min_lr: 0.001069 loss: 3.7873 (3.8194) weight_decay: 0.0500 (0.0500) time: 0.5495 data: 0.0995 max mem: 21002 Epoch: [203] [ 70/312] eta: 0:02:34 lr: 0.001068 min_lr: 0.001068 loss: 3.8857 (3.8227) weight_decay: 0.0500 (0.0500) time: 0.5057 data: 0.0685 max mem: 21002 Epoch: [203] [ 80/312] eta: 0:02:21 lr: 0.001068 min_lr: 0.001068 loss: 3.9441 (3.8310) weight_decay: 0.0500 (0.0500) time: 0.5004 data: 0.0427 max mem: 21002 Epoch: [203] [ 90/312] eta: 0:02:18 lr: 0.001067 min_lr: 0.001067 loss: 4.0746 (3.8288) weight_decay: 0.0500 (0.0500) time: 0.5569 data: 0.0969 max mem: 21002 Epoch: [203] [100/312] eta: 0:02:11 lr: 0.001066 min_lr: 0.001066 loss: 3.7879 (3.8056) weight_decay: 0.0500 (0.0500) time: 0.6658 data: 0.0552 max mem: 21002 Epoch: [203] [110/312] eta: 0:02:01 lr: 0.001066 min_lr: 0.001066 loss: 3.6978 (3.7985) weight_decay: 0.0500 (0.0500) time: 0.5198 data: 0.0701 max mem: 21002 Epoch: [203] [120/312] eta: 0:01:56 lr: 0.001065 min_lr: 0.001065 loss: 3.6853 (3.7809) weight_decay: 0.0500 (0.0500) time: 0.5291 data: 0.1026 max mem: 21002 Epoch: [203] [130/312] eta: 0:01:50 lr: 0.001065 min_lr: 0.001065 loss: 3.8373 (3.7827) weight_decay: 0.0500 (0.0500) time: 0.6092 data: 0.0697 max mem: 21002 Epoch: [203] [140/312] eta: 0:01:41 lr: 0.001064 min_lr: 0.001064 loss: 3.9222 (3.7910) weight_decay: 0.0500 (0.0500) time: 0.4899 data: 0.0876 max mem: 21002 Epoch: [203] [150/312] eta: 0:01:35 lr: 0.001063 min_lr: 0.001063 loss: 3.9324 (3.8005) weight_decay: 0.0500 (0.0500) time: 0.4739 data: 0.0514 max mem: 21002 Epoch: [203] [160/312] eta: 0:01:27 lr: 0.001063 min_lr: 0.001063 loss: 3.9192 (3.7921) weight_decay: 0.0500 (0.0500) time: 0.4799 data: 0.0531 max mem: 21002 Epoch: [203] [170/312] eta: 0:01:23 lr: 0.001062 min_lr: 0.001062 loss: 3.7800 (3.7864) weight_decay: 0.0500 (0.0500) time: 0.5694 data: 0.1216 max mem: 21002 Epoch: [203] [180/312] eta: 0:01:16 lr: 0.001061 min_lr: 0.001061 loss: 3.7639 (3.7815) weight_decay: 0.0500 (0.0500) time: 0.6241 data: 0.0693 max mem: 21002 Epoch: [203] [190/312] eta: 0:01:10 lr: 0.001061 min_lr: 0.001061 loss: 3.7722 (3.7896) weight_decay: 0.0500 (0.0500) time: 0.4838 data: 0.0821 max mem: 21002 Epoch: [203] [200/312] eta: 0:01:04 lr: 0.001060 min_lr: 0.001060 loss: 3.6682 (3.7773) weight_decay: 0.0500 (0.0500) time: 0.5557 data: 0.1409 max mem: 21002 Epoch: [203] [210/312] eta: 0:00:58 lr: 0.001059 min_lr: 0.001059 loss: 4.0725 (3.7934) weight_decay: 0.0500 (0.0500) time: 0.6113 data: 0.0843 max mem: 21002 Epoch: [203] [220/312] eta: 0:00:52 lr: 0.001059 min_lr: 0.001059 loss: 4.0837 (3.7998) weight_decay: 0.0500 (0.0500) time: 0.4833 data: 0.0852 max mem: 21002 Epoch: [203] [230/312] eta: 0:00:46 lr: 0.001058 min_lr: 0.001058 loss: 3.8467 (3.7961) weight_decay: 0.0500 (0.0500) time: 0.4843 data: 0.0604 max mem: 21002 Epoch: [203] [240/312] eta: 0:00:40 lr: 0.001058 min_lr: 0.001058 loss: 3.8357 (3.7916) weight_decay: 0.0500 (0.0500) time: 0.5165 data: 0.0933 max mem: 21002 Epoch: [203] [250/312] eta: 0:00:35 lr: 0.001057 min_lr: 0.001057 loss: 4.0190 (3.8073) weight_decay: 0.0500 (0.0500) time: 0.5798 data: 0.1869 max mem: 21002 Epoch: [203] [260/312] eta: 0:00:29 lr: 0.001056 min_lr: 0.001056 loss: 4.1419 (3.8236) weight_decay: 0.0500 (0.0500) time: 0.5980 data: 0.0948 max mem: 21002 Epoch: [203] [270/312] eta: 0:00:23 lr: 0.001056 min_lr: 0.001056 loss: 4.0710 (3.8262) weight_decay: 0.0500 (0.0500) time: 0.4850 data: 0.0887 max mem: 21002 Epoch: [203] [280/312] eta: 0:00:18 lr: 0.001055 min_lr: 0.001055 loss: 3.7479 (3.8117) weight_decay: 0.0500 (0.0500) time: 0.5313 data: 0.1132 max mem: 21002 Epoch: [203] [290/312] eta: 0:00:12 lr: 0.001054 min_lr: 0.001054 loss: 3.4393 (3.8116) weight_decay: 0.0500 (0.0500) time: 0.6269 data: 0.0885 max mem: 21002 Epoch: [203] [300/312] eta: 0:00:06 lr: 0.001054 min_lr: 0.001054 loss: 3.8930 (3.8093) weight_decay: 0.0500 (0.0500) time: 0.5066 data: 0.1062 max mem: 21002 Epoch: [203] [310/312] eta: 0:00:01 lr: 0.001053 min_lr: 0.001053 loss: 4.0630 (3.8097) weight_decay: 0.0500 (0.0500) time: 0.3192 data: 0.0432 max mem: 21002 Epoch: [203] [311/312] eta: 0:00:00 lr: 0.001053 min_lr: 0.001053 loss: 4.0810 (3.8109) weight_decay: 0.0500 (0.0500) time: 0.3191 data: 0.0432 max mem: 21002 Epoch: [203] Total time: 0:02:53 (0.5556 s / it) Averaged stats: lr: 0.001053 min_lr: 0.001053 loss: 4.0810 (3.7872) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:20 loss: 1.4243 (1.4243) acc1: 73.0469 (73.0469) acc5: 91.2760 (91.2760) time: 8.9340 data: 8.8177 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.7382 (1.6268) acc1: 63.2812 (65.8880) acc5: 85.8073 (87.1520) time: 1.0815 data: 0.9798 max mem: 21002 Test: Total time: 0:00:09 (1.1100 s / it) * Acc@1 65.806 Acc@5 87.274 loss 1.636 Accuracy of the model on the 50000 test images: 65.8% Max accuracy: 65.81% Epoch: [204] [ 0/312] eta: 1:02:38 lr: 0.001053 min_lr: 0.001053 loss: 4.1761 (4.1761) weight_decay: 0.0500 (0.0500) time: 12.0472 data: 11.7098 max mem: 21002 Epoch: [204] [ 10/312] eta: 0:07:48 lr: 0.001052 min_lr: 0.001052 loss: 4.0655 (3.9716) weight_decay: 0.0500 (0.0500) time: 1.5507 data: 1.0651 max mem: 21002 Epoch: [204] [ 20/312] eta: 0:04:59 lr: 0.001052 min_lr: 0.001052 loss: 3.9622 (3.8982) weight_decay: 0.0500 (0.0500) time: 0.4731 data: 0.0813 max mem: 21002 Epoch: [204] [ 30/312] eta: 0:03:41 lr: 0.001051 min_lr: 0.001051 loss: 3.8459 (3.7522) weight_decay: 0.0500 (0.0500) time: 0.3671 data: 0.0813 max mem: 21002 Epoch: [204] [ 40/312] eta: 0:03:17 lr: 0.001050 min_lr: 0.001050 loss: 3.9419 (3.8010) weight_decay: 0.0500 (0.0500) time: 0.4128 data: 0.1185 max mem: 21002 Epoch: [204] [ 50/312] eta: 0:03:05 lr: 0.001050 min_lr: 0.001050 loss: 3.9880 (3.8069) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.2719 max mem: 21002 Epoch: [204] [ 60/312] eta: 0:02:41 lr: 0.001049 min_lr: 0.001049 loss: 3.9422 (3.8285) weight_decay: 0.0500 (0.0500) time: 0.4627 data: 0.1541 max mem: 21002 Epoch: [204] [ 70/312] eta: 0:02:35 lr: 0.001049 min_lr: 0.001049 loss: 3.9066 (3.8213) weight_decay: 0.0500 (0.0500) time: 0.4755 data: 0.1722 max mem: 21002 Epoch: [204] [ 80/312] eta: 0:02:32 lr: 0.001048 min_lr: 0.001048 loss: 3.9149 (3.8343) weight_decay: 0.0500 (0.0500) time: 0.7103 data: 0.3611 max mem: 21002 Epoch: [204] [ 90/312] eta: 0:02:16 lr: 0.001047 min_lr: 0.001047 loss: 3.9805 (3.8372) weight_decay: 0.0500 (0.0500) time: 0.5205 data: 0.1897 max mem: 21002 Epoch: [204] [100/312] eta: 0:02:11 lr: 0.001047 min_lr: 0.001047 loss: 3.9235 (3.8170) weight_decay: 0.0500 (0.0500) time: 0.4813 data: 0.1052 max mem: 21002 Epoch: [204] [110/312] eta: 0:01:59 lr: 0.001046 min_lr: 0.001046 loss: 3.6507 (3.7949) weight_decay: 0.0500 (0.0500) time: 0.4806 data: 0.1052 max mem: 21002 Epoch: [204] [120/312] eta: 0:01:56 lr: 0.001045 min_lr: 0.001045 loss: 3.6507 (3.7841) weight_decay: 0.0500 (0.0500) time: 0.5141 data: 0.1166 max mem: 21002 Epoch: [204] [130/312] eta: 0:01:51 lr: 0.001045 min_lr: 0.001045 loss: 3.7984 (3.7844) weight_decay: 0.0500 (0.0500) time: 0.7349 data: 0.1919 max mem: 21002 Epoch: [204] [140/312] eta: 0:01:41 lr: 0.001044 min_lr: 0.001044 loss: 4.0809 (3.8122) weight_decay: 0.0500 (0.0500) time: 0.5072 data: 0.0759 max mem: 21002 Epoch: [204] [150/312] eta: 0:01:36 lr: 0.001044 min_lr: 0.001044 loss: 4.1063 (3.8207) weight_decay: 0.0500 (0.0500) time: 0.4890 data: 0.0361 max mem: 21002 Epoch: [204] [160/312] eta: 0:01:31 lr: 0.001043 min_lr: 0.001043 loss: 3.8868 (3.8194) weight_decay: 0.0500 (0.0500) time: 0.6904 data: 0.0553 max mem: 21002 Epoch: [204] [170/312] eta: 0:01:23 lr: 0.001042 min_lr: 0.001042 loss: 3.8265 (3.8169) weight_decay: 0.0500 (0.0500) time: 0.4886 data: 0.0199 max mem: 21002 Epoch: [204] [180/312] eta: 0:01:18 lr: 0.001042 min_lr: 0.001042 loss: 3.4655 (3.8047) weight_decay: 0.0500 (0.0500) time: 0.4947 data: 0.0205 max mem: 21002 Epoch: [204] [190/312] eta: 0:01:10 lr: 0.001041 min_lr: 0.001041 loss: 3.7373 (3.8011) weight_decay: 0.0500 (0.0500) time: 0.4949 data: 0.0204 max mem: 21002 Epoch: [204] [200/312] eta: 0:01:05 lr: 0.001040 min_lr: 0.001040 loss: 3.7373 (3.7932) weight_decay: 0.0500 (0.0500) time: 0.4994 data: 0.0244 max mem: 21002 Epoch: [204] [210/312] eta: 0:01:00 lr: 0.001040 min_lr: 0.001040 loss: 3.4495 (3.7826) weight_decay: 0.0500 (0.0500) time: 0.7149 data: 0.0890 max mem: 21002 Epoch: [204] [220/312] eta: 0:00:52 lr: 0.001039 min_lr: 0.001039 loss: 3.6513 (3.7798) weight_decay: 0.0500 (0.0500) time: 0.5042 data: 0.0654 max mem: 21002 Epoch: [204] [230/312] eta: 0:00:47 lr: 0.001038 min_lr: 0.001038 loss: 3.6833 (3.7765) weight_decay: 0.0500 (0.0500) time: 0.4958 data: 0.0436 max mem: 21002 Epoch: [204] [240/312] eta: 0:00:42 lr: 0.001038 min_lr: 0.001038 loss: 3.7521 (3.7801) weight_decay: 0.0500 (0.0500) time: 0.6864 data: 0.0633 max mem: 21002 Epoch: [204] [250/312] eta: 0:00:35 lr: 0.001037 min_lr: 0.001037 loss: 3.9821 (3.7885) weight_decay: 0.0500 (0.0500) time: 0.4837 data: 0.0270 max mem: 21002 Epoch: [204] [260/312] eta: 0:00:29 lr: 0.001037 min_lr: 0.001037 loss: 4.0222 (3.7930) weight_decay: 0.0500 (0.0500) time: 0.4761 data: 0.0558 max mem: 21002 Epoch: [204] [270/312] eta: 0:00:23 lr: 0.001036 min_lr: 0.001036 loss: 4.0496 (3.7972) weight_decay: 0.0500 (0.0500) time: 0.4716 data: 0.0492 max mem: 21002 Epoch: [204] [280/312] eta: 0:00:18 lr: 0.001035 min_lr: 0.001035 loss: 3.7656 (3.7867) weight_decay: 0.0500 (0.0500) time: 0.4953 data: 0.0333 max mem: 21002 Epoch: [204] [290/312] eta: 0:00:12 lr: 0.001035 min_lr: 0.001035 loss: 3.4798 (3.7780) weight_decay: 0.0500 (0.0500) time: 0.6022 data: 0.0834 max mem: 21002 Epoch: [204] [300/312] eta: 0:00:06 lr: 0.001034 min_lr: 0.001034 loss: 3.6844 (3.7748) weight_decay: 0.0500 (0.0500) time: 0.3893 data: 0.0506 max mem: 21002 Epoch: [204] [310/312] eta: 0:00:01 lr: 0.001033 min_lr: 0.001033 loss: 3.8532 (3.7804) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [204] [311/312] eta: 0:00:00 lr: 0.001033 min_lr: 0.001033 loss: 3.8532 (3.7809) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [204] Total time: 0:02:52 (0.5520 s / it) Averaged stats: lr: 0.001033 min_lr: 0.001033 loss: 3.8532 (3.7765) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.2874 (1.2874) acc1: 74.2188 (74.2188) acc5: 93.7500 (93.7500) time: 8.8676 data: 8.7487 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.7578 (1.6174) acc1: 64.1927 (66.1440) acc5: 87.8906 (88.2720) time: 1.0721 data: 0.9722 max mem: 21002 Test: Total time: 0:00:09 (1.1073 s / it) * Acc@1 66.122 Acc@5 87.792 loss 1.633 Accuracy of the model on the 50000 test images: 66.1% Max accuracy: 66.12% Epoch: [205] [ 0/312] eta: 1:01:01 lr: 0.001033 min_lr: 0.001033 loss: 3.8379 (3.8379) weight_decay: 0.0500 (0.0500) time: 11.7364 data: 8.5670 max mem: 21002 Epoch: [205] [ 10/312] eta: 0:08:29 lr: 0.001033 min_lr: 0.001033 loss: 4.1277 (4.0822) weight_decay: 0.0500 (0.0500) time: 1.6871 data: 0.9889 max mem: 21002 Epoch: [205] [ 20/312] eta: 0:05:11 lr: 0.001032 min_lr: 0.001032 loss: 3.9807 (3.9551) weight_decay: 0.0500 (0.0500) time: 0.5334 data: 0.1648 max mem: 21002 Epoch: [205] [ 30/312] eta: 0:03:50 lr: 0.001031 min_lr: 0.001031 loss: 3.9807 (4.0048) weight_decay: 0.0500 (0.0500) time: 0.3369 data: 0.0495 max mem: 21002 Epoch: [205] [ 40/312] eta: 0:03:08 lr: 0.001031 min_lr: 0.001031 loss: 4.0551 (3.9408) weight_decay: 0.0500 (0.0500) time: 0.3016 data: 0.0006 max mem: 21002 Epoch: [205] [ 50/312] eta: 0:03:01 lr: 0.001030 min_lr: 0.001030 loss: 3.9050 (3.9410) weight_decay: 0.0500 (0.0500) time: 0.5050 data: 0.0989 max mem: 21002 Epoch: [205] [ 60/312] eta: 0:02:38 lr: 0.001030 min_lr: 0.001030 loss: 3.9398 (3.9586) weight_decay: 0.0500 (0.0500) time: 0.4905 data: 0.0989 max mem: 21002 Epoch: [205] [ 70/312] eta: 0:02:36 lr: 0.001029 min_lr: 0.001029 loss: 4.1286 (3.9932) weight_decay: 0.0500 (0.0500) time: 0.5201 data: 0.1057 max mem: 21002 Epoch: [205] [ 80/312] eta: 0:02:28 lr: 0.001028 min_lr: 0.001028 loss: 4.0717 (3.9547) weight_decay: 0.0500 (0.0500) time: 0.6836 data: 0.1404 max mem: 21002 Epoch: [205] [ 90/312] eta: 0:02:15 lr: 0.001028 min_lr: 0.001028 loss: 3.9276 (3.9391) weight_decay: 0.0500 (0.0500) time: 0.4840 data: 0.0724 max mem: 21002 Epoch: [205] [100/312] eta: 0:02:11 lr: 0.001027 min_lr: 0.001027 loss: 3.9384 (3.9232) weight_decay: 0.0500 (0.0500) time: 0.5336 data: 0.1047 max mem: 21002 Epoch: [205] [110/312] eta: 0:02:00 lr: 0.001026 min_lr: 0.001026 loss: 3.9384 (3.9088) weight_decay: 0.0500 (0.0500) time: 0.5369 data: 0.1063 max mem: 21002 Epoch: [205] [120/312] eta: 0:01:54 lr: 0.001026 min_lr: 0.001026 loss: 4.0627 (3.9009) weight_decay: 0.0500 (0.0500) time: 0.4697 data: 0.0697 max mem: 21002 Epoch: [205] [130/312] eta: 0:01:49 lr: 0.001025 min_lr: 0.001025 loss: 4.0125 (3.8972) weight_decay: 0.0500 (0.0500) time: 0.6363 data: 0.1088 max mem: 21002 Epoch: [205] [140/312] eta: 0:01:39 lr: 0.001025 min_lr: 0.001025 loss: 4.0125 (3.9047) weight_decay: 0.0500 (0.0500) time: 0.4983 data: 0.0861 max mem: 21002 Epoch: [205] [150/312] eta: 0:01:34 lr: 0.001024 min_lr: 0.001024 loss: 3.9050 (3.8931) weight_decay: 0.0500 (0.0500) time: 0.4768 data: 0.0599 max mem: 21002 Epoch: [205] [160/312] eta: 0:01:29 lr: 0.001023 min_lr: 0.001023 loss: 3.7261 (3.8845) weight_decay: 0.0500 (0.0500) time: 0.6524 data: 0.1053 max mem: 21002 Epoch: [205] [170/312] eta: 0:01:21 lr: 0.001023 min_lr: 0.001023 loss: 3.7327 (3.8781) weight_decay: 0.0500 (0.0500) time: 0.5094 data: 0.0743 max mem: 21002 Epoch: [205] [180/312] eta: 0:01:16 lr: 0.001022 min_lr: 0.001022 loss: 3.9566 (3.8884) weight_decay: 0.0500 (0.0500) time: 0.5269 data: 0.0728 max mem: 21002 Epoch: [205] [190/312] eta: 0:01:09 lr: 0.001021 min_lr: 0.001021 loss: 3.8287 (3.8736) weight_decay: 0.0500 (0.0500) time: 0.5077 data: 0.0733 max mem: 21002 Epoch: [205] [200/312] eta: 0:01:03 lr: 0.001021 min_lr: 0.001021 loss: 3.8254 (3.8706) weight_decay: 0.0500 (0.0500) time: 0.4636 data: 0.0604 max mem: 21002 Epoch: [205] [210/312] eta: 0:00:58 lr: 0.001020 min_lr: 0.001020 loss: 3.8272 (3.8531) weight_decay: 0.0500 (0.0500) time: 0.6122 data: 0.1036 max mem: 21002 Epoch: [205] [220/312] eta: 0:00:51 lr: 0.001019 min_lr: 0.001019 loss: 3.7027 (3.8421) weight_decay: 0.0500 (0.0500) time: 0.4632 data: 0.0735 max mem: 21002 Epoch: [205] [230/312] eta: 0:00:46 lr: 0.001019 min_lr: 0.001019 loss: 3.5394 (3.8312) weight_decay: 0.0500 (0.0500) time: 0.5092 data: 0.1154 max mem: 21002 Epoch: [205] [240/312] eta: 0:00:41 lr: 0.001018 min_lr: 0.001018 loss: 3.3993 (3.8163) weight_decay: 0.0500 (0.0500) time: 0.6852 data: 0.1965 max mem: 21002 Epoch: [205] [250/312] eta: 0:00:34 lr: 0.001018 min_lr: 0.001018 loss: 3.6561 (3.8181) weight_decay: 0.0500 (0.0500) time: 0.4811 data: 0.0901 max mem: 21002 Epoch: [205] [260/312] eta: 0:00:29 lr: 0.001017 min_lr: 0.001017 loss: 3.9824 (3.8082) weight_decay: 0.0500 (0.0500) time: 0.4947 data: 0.0881 max mem: 21002 Epoch: [205] [270/312] eta: 0:00:23 lr: 0.001016 min_lr: 0.001016 loss: 4.0020 (3.8147) weight_decay: 0.0500 (0.0500) time: 0.5049 data: 0.0972 max mem: 21002 Epoch: [205] [280/312] eta: 0:00:17 lr: 0.001016 min_lr: 0.001016 loss: 4.0139 (3.8099) weight_decay: 0.0500 (0.0500) time: 0.4949 data: 0.1061 max mem: 21002 Epoch: [205] [290/312] eta: 0:00:12 lr: 0.001015 min_lr: 0.001015 loss: 3.8220 (3.8111) weight_decay: 0.0500 (0.0500) time: 0.6155 data: 0.1583 max mem: 21002 Epoch: [205] [300/312] eta: 0:00:06 lr: 0.001014 min_lr: 0.001014 loss: 4.0103 (3.8159) weight_decay: 0.0500 (0.0500) time: 0.4386 data: 0.0655 max mem: 21002 Epoch: [205] [310/312] eta: 0:00:01 lr: 0.001014 min_lr: 0.001014 loss: 4.0078 (3.8170) weight_decay: 0.0500 (0.0500) time: 0.2993 data: 0.0040 max mem: 21002 Epoch: [205] [311/312] eta: 0:00:00 lr: 0.001014 min_lr: 0.001014 loss: 4.0103 (3.8184) weight_decay: 0.0500 (0.0500) time: 0.2959 data: 0.0001 max mem: 21002 Epoch: [205] Total time: 0:02:50 (0.5471 s / it) Averaged stats: lr: 0.001014 min_lr: 0.001014 loss: 4.0103 (3.7878) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:13 loss: 1.6281 (1.6281) acc1: 75.0000 (75.0000) acc5: 92.1875 (92.1875) time: 8.1805 data: 8.0642 max mem: 21002 Test: [8/9] eta: 0:00:00 loss: 1.8834 (1.7929) acc1: 63.6719 (65.5040) acc5: 88.4115 (87.5680) time: 0.9925 data: 0.8961 max mem: 21002 Test: Total time: 0:00:09 (1.0101 s / it) * Acc@1 65.650 Acc@5 87.526 loss 1.809 Accuracy of the model on the 50000 test images: 65.7% Max accuracy: 66.12% Epoch: [206] [ 0/312] eta: 0:56:19 lr: 0.001014 min_lr: 0.001014 loss: 3.2265 (3.2265) weight_decay: 0.0500 (0.0500) time: 10.8314 data: 10.5447 max mem: 21002 Epoch: [206] [ 10/312] eta: 0:07:22 lr: 0.001013 min_lr: 0.001013 loss: 3.4512 (3.6449) weight_decay: 0.0500 (0.0500) time: 1.4660 data: 1.0316 max mem: 21002 Epoch: [206] [ 20/312] eta: 0:05:09 lr: 0.001012 min_lr: 0.001012 loss: 3.9325 (3.8639) weight_decay: 0.0500 (0.0500) time: 0.5719 data: 0.1758 max mem: 21002 Epoch: [206] [ 30/312] eta: 0:03:48 lr: 0.001012 min_lr: 0.001012 loss: 3.9570 (3.7566) weight_decay: 0.0500 (0.0500) time: 0.4514 data: 0.1360 max mem: 21002 Epoch: [206] [ 40/312] eta: 0:03:26 lr: 0.001011 min_lr: 0.001011 loss: 3.8509 (3.8012) weight_decay: 0.0500 (0.0500) time: 0.4418 data: 0.1487 max mem: 21002 Epoch: [206] [ 50/312] eta: 0:03:15 lr: 0.001011 min_lr: 0.001011 loss: 3.9512 (3.7942) weight_decay: 0.0500 (0.0500) time: 0.6450 data: 0.3224 max mem: 21002 Epoch: [206] [ 60/312] eta: 0:02:49 lr: 0.001010 min_lr: 0.001010 loss: 3.8133 (3.7531) weight_decay: 0.0500 (0.0500) time: 0.4902 data: 0.1743 max mem: 21002 Epoch: [206] [ 70/312] eta: 0:02:43 lr: 0.001009 min_lr: 0.001009 loss: 3.8996 (3.7951) weight_decay: 0.0500 (0.0500) time: 0.5013 data: 0.1429 max mem: 21002 Epoch: [206] [ 80/312] eta: 0:02:32 lr: 0.001009 min_lr: 0.001009 loss: 3.9443 (3.7622) weight_decay: 0.0500 (0.0500) time: 0.6183 data: 0.2578 max mem: 21002 Epoch: [206] [ 90/312] eta: 0:02:19 lr: 0.001008 min_lr: 0.001008 loss: 3.9443 (3.7726) weight_decay: 0.0500 (0.0500) time: 0.4532 data: 0.1267 max mem: 21002 Epoch: [206] [100/312] eta: 0:02:14 lr: 0.001007 min_lr: 0.001007 loss: 3.8463 (3.7718) weight_decay: 0.0500 (0.0500) time: 0.5442 data: 0.1670 max mem: 21002 Epoch: [206] [110/312] eta: 0:02:01 lr: 0.001007 min_lr: 0.001007 loss: 3.7982 (3.7616) weight_decay: 0.0500 (0.0500) time: 0.4922 data: 0.1559 max mem: 21002 Epoch: [206] [120/312] eta: 0:01:56 lr: 0.001006 min_lr: 0.001006 loss: 3.7982 (3.7445) weight_decay: 0.0500 (0.0500) time: 0.4610 data: 0.1366 max mem: 21002 Epoch: [206] [130/312] eta: 0:01:51 lr: 0.001006 min_lr: 0.001006 loss: 3.5819 (3.7301) weight_decay: 0.0500 (0.0500) time: 0.6569 data: 0.3006 max mem: 21002 Epoch: [206] [140/312] eta: 0:01:41 lr: 0.001005 min_lr: 0.001005 loss: 3.5763 (3.7165) weight_decay: 0.0500 (0.0500) time: 0.4809 data: 0.1647 max mem: 21002 Epoch: [206] [150/312] eta: 0:01:36 lr: 0.001004 min_lr: 0.001004 loss: 3.9070 (3.7390) weight_decay: 0.0500 (0.0500) time: 0.4785 data: 0.1748 max mem: 21002 Epoch: [206] [160/312] eta: 0:01:30 lr: 0.001004 min_lr: 0.001004 loss: 4.0380 (3.7454) weight_decay: 0.0500 (0.0500) time: 0.6496 data: 0.3462 max mem: 21002 Epoch: [206] [170/312] eta: 0:01:22 lr: 0.001003 min_lr: 0.001003 loss: 3.9424 (3.7684) weight_decay: 0.0500 (0.0500) time: 0.4791 data: 0.1721 max mem: 21002 Epoch: [206] [180/312] eta: 0:01:17 lr: 0.001002 min_lr: 0.001002 loss: 4.0470 (3.7722) weight_decay: 0.0500 (0.0500) time: 0.5269 data: 0.1793 max mem: 21002 Epoch: [206] [190/312] eta: 0:01:09 lr: 0.001002 min_lr: 0.001002 loss: 3.7722 (3.7638) weight_decay: 0.0500 (0.0500) time: 0.5071 data: 0.1792 max mem: 21002 Epoch: [206] [200/312] eta: 0:01:04 lr: 0.001001 min_lr: 0.001001 loss: 3.4899 (3.7476) weight_decay: 0.0500 (0.0500) time: 0.4941 data: 0.1525 max mem: 21002 Epoch: [206] [210/312] eta: 0:00:59 lr: 0.001001 min_lr: 0.001001 loss: 3.5198 (3.7481) weight_decay: 0.0500 (0.0500) time: 0.6847 data: 0.3021 max mem: 21002 Epoch: [206] [220/312] eta: 0:00:52 lr: 0.001000 min_lr: 0.001000 loss: 3.6843 (3.7376) weight_decay: 0.0500 (0.0500) time: 0.4783 data: 0.1502 max mem: 21002 Epoch: [206] [230/312] eta: 0:00:47 lr: 0.000999 min_lr: 0.000999 loss: 3.7732 (3.7441) weight_decay: 0.0500 (0.0500) time: 0.4885 data: 0.1415 max mem: 21002 Epoch: [206] [240/312] eta: 0:00:41 lr: 0.000999 min_lr: 0.000999 loss: 4.1613 (3.7598) weight_decay: 0.0500 (0.0500) time: 0.6605 data: 0.2835 max mem: 21002 Epoch: [206] [250/312] eta: 0:00:35 lr: 0.000998 min_lr: 0.000998 loss: 4.0201 (3.7604) weight_decay: 0.0500 (0.0500) time: 0.4784 data: 0.1426 max mem: 21002 Epoch: [206] [260/312] eta: 0:00:29 lr: 0.000998 min_lr: 0.000998 loss: 3.9277 (3.7557) weight_decay: 0.0500 (0.0500) time: 0.5009 data: 0.1358 max mem: 21002 Epoch: [206] [270/312] eta: 0:00:23 lr: 0.000997 min_lr: 0.000997 loss: 3.8494 (3.7570) weight_decay: 0.0500 (0.0500) time: 0.4822 data: 0.1366 max mem: 21002 Epoch: [206] [280/312] eta: 0:00:18 lr: 0.000996 min_lr: 0.000996 loss: 3.7110 (3.7537) weight_decay: 0.0500 (0.0500) time: 0.4830 data: 0.1188 max mem: 21002 Epoch: [206] [290/312] eta: 0:00:12 lr: 0.000996 min_lr: 0.000996 loss: 3.8663 (3.7569) weight_decay: 0.0500 (0.0500) time: 0.6157 data: 0.2012 max mem: 21002 Epoch: [206] [300/312] eta: 0:00:06 lr: 0.000995 min_lr: 0.000995 loss: 3.6774 (3.7471) weight_decay: 0.0500 (0.0500) time: 0.4163 data: 0.0836 max mem: 21002 Epoch: [206] [310/312] eta: 0:00:01 lr: 0.000994 min_lr: 0.000994 loss: 3.3919 (3.7427) weight_decay: 0.0500 (0.0500) time: 0.2776 data: 0.0001 max mem: 21002 Epoch: [206] [311/312] eta: 0:00:00 lr: 0.000994 min_lr: 0.000994 loss: 3.3919 (3.7419) weight_decay: 0.0500 (0.0500) time: 0.2776 data: 0.0001 max mem: 21002 Epoch: [206] Total time: 0:02:51 (0.5493 s / it) Averaged stats: lr: 0.000994 min_lr: 0.000994 loss: 3.3919 (3.7539) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:21 loss: 1.4499 (1.4499) acc1: 71.8750 (71.8750) acc5: 91.0156 (91.0156) time: 9.1101 data: 8.9913 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.8635 (1.6808) acc1: 62.8906 (64.5760) acc5: 84.5052 (86.2560) time: 1.0989 data: 0.9991 max mem: 21002 Test: Total time: 0:00:10 (1.1331 s / it) * Acc@1 64.860 Acc@5 86.818 loss 1.694 Accuracy of the model on the 50000 test images: 64.9% Max accuracy: 66.12% Epoch: [207] [ 0/312] eta: 1:01:38 lr: 0.000994 min_lr: 0.000994 loss: 3.3525 (3.3525) weight_decay: 0.0500 (0.0500) time: 11.8538 data: 9.5252 max mem: 21002 Epoch: [207] [ 10/312] eta: 0:08:19 lr: 0.000994 min_lr: 0.000994 loss: 3.8570 (3.8220) weight_decay: 0.0500 (0.0500) time: 1.6529 data: 1.0284 max mem: 21002 Epoch: [207] [ 20/312] eta: 0:05:04 lr: 0.000993 min_lr: 0.000993 loss: 3.8209 (3.7074) weight_decay: 0.0500 (0.0500) time: 0.5030 data: 0.0933 max mem: 21002 Epoch: [207] [ 30/312] eta: 0:03:45 lr: 0.000992 min_lr: 0.000992 loss: 3.8209 (3.7227) weight_decay: 0.0500 (0.0500) time: 0.3300 data: 0.0043 max mem: 21002 Epoch: [207] [ 40/312] eta: 0:03:10 lr: 0.000992 min_lr: 0.000992 loss: 3.6651 (3.6765) weight_decay: 0.0500 (0.0500) time: 0.3422 data: 0.0014 max mem: 21002 Epoch: [207] [ 50/312] eta: 0:03:04 lr: 0.000991 min_lr: 0.000991 loss: 3.5273 (3.6856) weight_decay: 0.0500 (0.0500) time: 0.5547 data: 0.0944 max mem: 21002 Epoch: [207] [ 60/312] eta: 0:02:44 lr: 0.000991 min_lr: 0.000991 loss: 3.5267 (3.6789) weight_decay: 0.0500 (0.0500) time: 0.5522 data: 0.1308 max mem: 21002 Epoch: [207] [ 70/312] eta: 0:02:35 lr: 0.000990 min_lr: 0.000990 loss: 3.8043 (3.7115) weight_decay: 0.0500 (0.0500) time: 0.4893 data: 0.0876 max mem: 21002 Epoch: [207] [ 80/312] eta: 0:02:26 lr: 0.000989 min_lr: 0.000989 loss: 3.7252 (3.6930) weight_decay: 0.0500 (0.0500) time: 0.5751 data: 0.0944 max mem: 21002 Epoch: [207] [ 90/312] eta: 0:02:15 lr: 0.000989 min_lr: 0.000989 loss: 3.5425 (3.6711) weight_decay: 0.0500 (0.0500) time: 0.4854 data: 0.1043 max mem: 21002 Epoch: [207] [100/312] eta: 0:02:10 lr: 0.000988 min_lr: 0.000988 loss: 3.8612 (3.6936) weight_decay: 0.0500 (0.0500) time: 0.5382 data: 0.1287 max mem: 21002 Epoch: [207] [110/312] eta: 0:01:58 lr: 0.000987 min_lr: 0.000987 loss: 3.8303 (3.6881) weight_decay: 0.0500 (0.0500) time: 0.4792 data: 0.0692 max mem: 21002 Epoch: [207] [120/312] eta: 0:01:53 lr: 0.000987 min_lr: 0.000987 loss: 3.7818 (3.6990) weight_decay: 0.0500 (0.0500) time: 0.4820 data: 0.0706 max mem: 21002 Epoch: [207] [130/312] eta: 0:01:49 lr: 0.000986 min_lr: 0.000986 loss: 3.7818 (3.6941) weight_decay: 0.0500 (0.0500) time: 0.6964 data: 0.1685 max mem: 21002 Epoch: [207] [140/312] eta: 0:01:39 lr: 0.000986 min_lr: 0.000986 loss: 3.4686 (3.6647) weight_decay: 0.0500 (0.0500) time: 0.5016 data: 0.0987 max mem: 21002 Epoch: [207] [150/312] eta: 0:01:34 lr: 0.000985 min_lr: 0.000985 loss: 3.4522 (3.6693) weight_decay: 0.0500 (0.0500) time: 0.4648 data: 0.0524 max mem: 21002 Epoch: [207] [160/312] eta: 0:01:28 lr: 0.000984 min_lr: 0.000984 loss: 3.8484 (3.6750) weight_decay: 0.0500 (0.0500) time: 0.6057 data: 0.0625 max mem: 21002 Epoch: [207] [170/312] eta: 0:01:21 lr: 0.000984 min_lr: 0.000984 loss: 3.9522 (3.6833) weight_decay: 0.0500 (0.0500) time: 0.4855 data: 0.0687 max mem: 21002 Epoch: [207] [180/312] eta: 0:01:16 lr: 0.000983 min_lr: 0.000983 loss: 3.9522 (3.6740) weight_decay: 0.0500 (0.0500) time: 0.5278 data: 0.1167 max mem: 21002 Epoch: [207] [190/312] eta: 0:01:08 lr: 0.000982 min_lr: 0.000982 loss: 3.7875 (3.6716) weight_decay: 0.0500 (0.0500) time: 0.4690 data: 0.0587 max mem: 21002 Epoch: [207] [200/312] eta: 0:01:03 lr: 0.000982 min_lr: 0.000982 loss: 3.9597 (3.6855) weight_decay: 0.0500 (0.0500) time: 0.5163 data: 0.0728 max mem: 21002 Epoch: [207] [210/312] eta: 0:00:58 lr: 0.000981 min_lr: 0.000981 loss: 3.8682 (3.6816) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.1180 max mem: 21002 Epoch: [207] [220/312] eta: 0:00:51 lr: 0.000981 min_lr: 0.000981 loss: 3.6750 (3.6724) weight_decay: 0.0500 (0.0500) time: 0.4713 data: 0.0459 max mem: 21002 Epoch: [207] [230/312] eta: 0:00:46 lr: 0.000980 min_lr: 0.000980 loss: 3.6789 (3.6809) weight_decay: 0.0500 (0.0500) time: 0.4971 data: 0.0520 max mem: 21002 Epoch: [207] [240/312] eta: 0:00:40 lr: 0.000979 min_lr: 0.000979 loss: 3.8968 (3.6854) weight_decay: 0.0500 (0.0500) time: 0.6138 data: 0.0520 max mem: 21002 Epoch: [207] [250/312] eta: 0:00:34 lr: 0.000979 min_lr: 0.000979 loss: 3.6966 (3.6814) weight_decay: 0.0500 (0.0500) time: 0.4826 data: 0.0532 max mem: 21002 Epoch: [207] [260/312] eta: 0:00:29 lr: 0.000978 min_lr: 0.000978 loss: 3.6765 (3.6834) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.1038 max mem: 21002 Epoch: [207] [270/312] eta: 0:00:23 lr: 0.000978 min_lr: 0.000978 loss: 3.8241 (3.6899) weight_decay: 0.0500 (0.0500) time: 0.5062 data: 0.0512 max mem: 21002 Epoch: [207] [280/312] eta: 0:00:17 lr: 0.000977 min_lr: 0.000977 loss: 3.7642 (3.6903) weight_decay: 0.0500 (0.0500) time: 0.4766 data: 0.0553 max mem: 21002 Epoch: [207] [290/312] eta: 0:00:12 lr: 0.000976 min_lr: 0.000976 loss: 4.1694 (3.7093) weight_decay: 0.0500 (0.0500) time: 0.6100 data: 0.0872 max mem: 21002 Epoch: [207] [300/312] eta: 0:00:06 lr: 0.000976 min_lr: 0.000976 loss: 4.2542 (3.7227) weight_decay: 0.0500 (0.0500) time: 0.4267 data: 0.0404 max mem: 21002 Epoch: [207] [310/312] eta: 0:00:01 lr: 0.000975 min_lr: 0.000975 loss: 4.0906 (3.7269) weight_decay: 0.0500 (0.0500) time: 0.2852 data: 0.0083 max mem: 21002 Epoch: [207] [311/312] eta: 0:00:00 lr: 0.000975 min_lr: 0.000975 loss: 4.0906 (3.7296) weight_decay: 0.0500 (0.0500) time: 0.2851 data: 0.0083 max mem: 21002 Epoch: [207] Total time: 0:02:50 (0.5461 s / it) Averaged stats: lr: 0.000975 min_lr: 0.000975 loss: 4.0906 (3.7689) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:12 loss: 1.5548 (1.5548) acc1: 75.3906 (75.3906) acc5: 92.8385 (92.8385) time: 8.0427 data: 7.9246 max mem: 21002 Test: [8/9] eta: 0:00:00 loss: 1.9774 (1.8569) acc1: 63.8021 (65.3440) acc5: 85.9375 (86.9600) time: 0.9953 data: 0.8918 max mem: 21002 Test: Total time: 0:00:09 (1.0050 s / it) * Acc@1 65.098 Acc@5 87.052 loss 1.870 Accuracy of the model on the 50000 test images: 65.1% Max accuracy: 66.12% Epoch: [208] [ 0/312] eta: 1:01:58 lr: 0.000975 min_lr: 0.000975 loss: 3.7652 (3.7652) weight_decay: 0.0500 (0.0500) time: 11.9175 data: 10.6560 max mem: 21002 Epoch: [208] [ 10/312] eta: 0:08:19 lr: 0.000974 min_lr: 0.000974 loss: 3.5572 (3.3909) weight_decay: 0.0500 (0.0500) time: 1.6532 data: 1.0261 max mem: 21002 Epoch: [208] [ 20/312] eta: 0:05:12 lr: 0.000974 min_lr: 0.000974 loss: 3.7883 (3.6524) weight_decay: 0.0500 (0.0500) time: 0.5280 data: 0.0691 max mem: 21002 Epoch: [208] [ 30/312] eta: 0:03:50 lr: 0.000973 min_lr: 0.000973 loss: 3.7851 (3.6308) weight_decay: 0.0500 (0.0500) time: 0.3599 data: 0.0378 max mem: 21002 Epoch: [208] [ 40/312] eta: 0:03:18 lr: 0.000972 min_lr: 0.000972 loss: 3.7563 (3.6801) weight_decay: 0.0500 (0.0500) time: 0.3688 data: 0.0009 max mem: 21002 Epoch: [208] [ 50/312] eta: 0:03:05 lr: 0.000972 min_lr: 0.000972 loss: 3.7907 (3.7028) weight_decay: 0.0500 (0.0500) time: 0.5386 data: 0.1089 max mem: 21002 Epoch: [208] [ 60/312] eta: 0:02:41 lr: 0.000971 min_lr: 0.000971 loss: 3.8066 (3.7201) weight_decay: 0.0500 (0.0500) time: 0.4613 data: 0.1088 max mem: 21002 Epoch: [208] [ 70/312] eta: 0:02:34 lr: 0.000971 min_lr: 0.000971 loss: 3.8604 (3.7318) weight_decay: 0.0500 (0.0500) time: 0.4611 data: 0.1045 max mem: 21002 Epoch: [208] [ 80/312] eta: 0:02:25 lr: 0.000970 min_lr: 0.000970 loss: 3.9537 (3.7444) weight_decay: 0.0500 (0.0500) time: 0.5947 data: 0.2150 max mem: 21002 Epoch: [208] [ 90/312] eta: 0:02:15 lr: 0.000969 min_lr: 0.000969 loss: 4.0998 (3.7879) weight_decay: 0.0500 (0.0500) time: 0.5077 data: 0.1113 max mem: 21002 Epoch: [208] [100/312] eta: 0:02:10 lr: 0.000969 min_lr: 0.000969 loss: 4.0998 (3.7920) weight_decay: 0.0500 (0.0500) time: 0.5664 data: 0.0810 max mem: 21002 Epoch: [208] [110/312] eta: 0:01:58 lr: 0.000968 min_lr: 0.000968 loss: 3.5665 (3.7583) weight_decay: 0.0500 (0.0500) time: 0.4815 data: 0.0809 max mem: 21002 Epoch: [208] [120/312] eta: 0:01:54 lr: 0.000968 min_lr: 0.000968 loss: 3.7687 (3.7602) weight_decay: 0.0500 (0.0500) time: 0.4878 data: 0.0918 max mem: 21002 Epoch: [208] [130/312] eta: 0:01:49 lr: 0.000967 min_lr: 0.000967 loss: 3.7892 (3.7658) weight_decay: 0.0500 (0.0500) time: 0.6897 data: 0.1984 max mem: 21002 Epoch: [208] [140/312] eta: 0:01:39 lr: 0.000966 min_lr: 0.000966 loss: 3.7842 (3.7704) weight_decay: 0.0500 (0.0500) time: 0.4895 data: 0.1074 max mem: 21002 Epoch: [208] [150/312] eta: 0:01:35 lr: 0.000966 min_lr: 0.000966 loss: 3.6365 (3.7631) weight_decay: 0.0500 (0.0500) time: 0.4862 data: 0.1123 max mem: 21002 Epoch: [208] [160/312] eta: 0:01:28 lr: 0.000965 min_lr: 0.000965 loss: 3.7412 (3.7557) weight_decay: 0.0500 (0.0500) time: 0.6016 data: 0.2299 max mem: 21002 Epoch: [208] [170/312] eta: 0:01:21 lr: 0.000964 min_lr: 0.000964 loss: 3.9019 (3.7769) weight_decay: 0.0500 (0.0500) time: 0.4858 data: 0.1183 max mem: 21002 Epoch: [208] [180/312] eta: 0:01:16 lr: 0.000964 min_lr: 0.000964 loss: 3.9879 (3.7788) weight_decay: 0.0500 (0.0500) time: 0.5648 data: 0.1080 max mem: 21002 Epoch: [208] [190/312] eta: 0:01:09 lr: 0.000963 min_lr: 0.000963 loss: 3.7782 (3.7740) weight_decay: 0.0500 (0.0500) time: 0.4824 data: 0.1081 max mem: 21002 Epoch: [208] [200/312] eta: 0:01:03 lr: 0.000963 min_lr: 0.000963 loss: 3.8715 (3.7766) weight_decay: 0.0500 (0.0500) time: 0.4807 data: 0.0482 max mem: 21002 Epoch: [208] [210/312] eta: 0:00:58 lr: 0.000962 min_lr: 0.000962 loss: 3.9257 (3.7786) weight_decay: 0.0500 (0.0500) time: 0.6789 data: 0.1567 max mem: 21002 Epoch: [208] [220/312] eta: 0:00:51 lr: 0.000961 min_lr: 0.000961 loss: 3.8528 (3.7751) weight_decay: 0.0500 (0.0500) time: 0.4850 data: 0.1093 max mem: 21002 Epoch: [208] [230/312] eta: 0:00:46 lr: 0.000961 min_lr: 0.000961 loss: 3.7235 (3.7618) weight_decay: 0.0500 (0.0500) time: 0.4991 data: 0.1087 max mem: 21002 Epoch: [208] [240/312] eta: 0:00:40 lr: 0.000960 min_lr: 0.000960 loss: 3.8018 (3.7615) weight_decay: 0.0500 (0.0500) time: 0.5985 data: 0.2081 max mem: 21002 Epoch: [208] [250/312] eta: 0:00:34 lr: 0.000960 min_lr: 0.000960 loss: 3.9350 (3.7688) weight_decay: 0.0500 (0.0500) time: 0.4902 data: 0.1001 max mem: 21002 Epoch: [208] [260/312] eta: 0:00:29 lr: 0.000959 min_lr: 0.000959 loss: 3.7846 (3.7588) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0917 max mem: 21002 Epoch: [208] [270/312] eta: 0:00:23 lr: 0.000958 min_lr: 0.000958 loss: 3.4471 (3.7540) weight_decay: 0.0500 (0.0500) time: 0.4900 data: 0.0918 max mem: 21002 Epoch: [208] [280/312] eta: 0:00:18 lr: 0.000958 min_lr: 0.000958 loss: 3.7248 (3.7515) weight_decay: 0.0500 (0.0500) time: 0.5048 data: 0.0835 max mem: 21002 Epoch: [208] [290/312] eta: 0:00:12 lr: 0.000957 min_lr: 0.000957 loss: 4.0017 (3.7606) weight_decay: 0.0500 (0.0500) time: 0.6419 data: 0.1398 max mem: 21002 Epoch: [208] [300/312] eta: 0:00:06 lr: 0.000956 min_lr: 0.000956 loss: 3.9113 (3.7608) weight_decay: 0.0500 (0.0500) time: 0.4199 data: 0.0567 max mem: 21002 Epoch: [208] [310/312] eta: 0:00:01 lr: 0.000956 min_lr: 0.000956 loss: 3.7783 (3.7603) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [208] [311/312] eta: 0:00:00 lr: 0.000956 min_lr: 0.000956 loss: 3.7045 (3.7566) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [208] Total time: 0:02:50 (0.5481 s / it) Averaged stats: lr: 0.000956 min_lr: 0.000956 loss: 3.7045 (3.7665) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:21 loss: 1.4237 (1.4237) acc1: 74.7396 (74.7396) acc5: 92.4479 (92.4479) time: 9.0625 data: 8.9442 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.8327 (1.7396) acc1: 64.7135 (65.9200) acc5: 87.6302 (87.2160) time: 1.0906 data: 0.9939 max mem: 21002 Test: Total time: 0:00:09 (1.1021 s / it) * Acc@1 65.494 Acc@5 87.204 loss 1.750 Accuracy of the model on the 50000 test images: 65.5% Max accuracy: 66.12% Epoch: [209] [ 0/312] eta: 0:57:57 lr: 0.000956 min_lr: 0.000956 loss: 4.1161 (4.1161) weight_decay: 0.0500 (0.0500) time: 11.1442 data: 9.2657 max mem: 21002 Epoch: [209] [ 10/312] eta: 0:08:30 lr: 0.000955 min_lr: 0.000955 loss: 3.9729 (3.7712) weight_decay: 0.0500 (0.0500) time: 1.6916 data: 1.0783 max mem: 21002 Epoch: [209] [ 20/312] eta: 0:05:00 lr: 0.000955 min_lr: 0.000955 loss: 3.7419 (3.6669) weight_decay: 0.0500 (0.0500) time: 0.5238 data: 0.1301 max mem: 21002 Epoch: [209] [ 30/312] eta: 0:03:43 lr: 0.000954 min_lr: 0.000954 loss: 3.7017 (3.6903) weight_decay: 0.0500 (0.0500) time: 0.2968 data: 0.0007 max mem: 21002 Epoch: [209] [ 40/312] eta: 0:03:08 lr: 0.000953 min_lr: 0.000953 loss: 3.8939 (3.7004) weight_decay: 0.0500 (0.0500) time: 0.3374 data: 0.0120 max mem: 21002 Epoch: [209] [ 50/312] eta: 0:03:00 lr: 0.000953 min_lr: 0.000953 loss: 3.6971 (3.6764) weight_decay: 0.0500 (0.0500) time: 0.5259 data: 0.2027 max mem: 21002 Epoch: [209] [ 60/312] eta: 0:02:36 lr: 0.000952 min_lr: 0.000952 loss: 3.8147 (3.7254) weight_decay: 0.0500 (0.0500) time: 0.4797 data: 0.1915 max mem: 21002 Epoch: [209] [ 70/312] eta: 0:02:31 lr: 0.000951 min_lr: 0.000951 loss: 3.8180 (3.6902) weight_decay: 0.0500 (0.0500) time: 0.4630 data: 0.1665 max mem: 21002 Epoch: [209] [ 80/312] eta: 0:02:25 lr: 0.000951 min_lr: 0.000951 loss: 3.4533 (3.6522) weight_decay: 0.0500 (0.0500) time: 0.6504 data: 0.3343 max mem: 21002 Epoch: [209] [ 90/312] eta: 0:02:11 lr: 0.000950 min_lr: 0.000950 loss: 3.6819 (3.6702) weight_decay: 0.0500 (0.0500) time: 0.4802 data: 0.1685 max mem: 21002 Epoch: [209] [100/312] eta: 0:02:06 lr: 0.000950 min_lr: 0.000950 loss: 3.9346 (3.6854) weight_decay: 0.0500 (0.0500) time: 0.4730 data: 0.1675 max mem: 21002 Epoch: [209] [110/312] eta: 0:01:56 lr: 0.000949 min_lr: 0.000949 loss: 3.7848 (3.6826) weight_decay: 0.0500 (0.0500) time: 0.5065 data: 0.1676 max mem: 21002 Epoch: [209] [120/312] eta: 0:01:52 lr: 0.000948 min_lr: 0.000948 loss: 3.8646 (3.6931) weight_decay: 0.0500 (0.0500) time: 0.5246 data: 0.1504 max mem: 21002 Epoch: [209] [130/312] eta: 0:01:47 lr: 0.000948 min_lr: 0.000948 loss: 4.0140 (3.6913) weight_decay: 0.0500 (0.0500) time: 0.6797 data: 0.3443 max mem: 21002 Epoch: [209] [140/312] eta: 0:01:38 lr: 0.000947 min_lr: 0.000947 loss: 3.9949 (3.7032) weight_decay: 0.0500 (0.0500) time: 0.4832 data: 0.1946 max mem: 21002 Epoch: [209] [150/312] eta: 0:01:33 lr: 0.000947 min_lr: 0.000947 loss: 3.9949 (3.7031) weight_decay: 0.0500 (0.0500) time: 0.4794 data: 0.1898 max mem: 21002 Epoch: [209] [160/312] eta: 0:01:28 lr: 0.000946 min_lr: 0.000946 loss: 3.7173 (3.7087) weight_decay: 0.0500 (0.0500) time: 0.6646 data: 0.3647 max mem: 21002 Epoch: [209] [170/312] eta: 0:01:20 lr: 0.000945 min_lr: 0.000945 loss: 3.9387 (3.7131) weight_decay: 0.0500 (0.0500) time: 0.5049 data: 0.1756 max mem: 21002 Epoch: [209] [180/312] eta: 0:01:15 lr: 0.000945 min_lr: 0.000945 loss: 4.0223 (3.7317) weight_decay: 0.0500 (0.0500) time: 0.4920 data: 0.1738 max mem: 21002 Epoch: [209] [190/312] eta: 0:01:08 lr: 0.000944 min_lr: 0.000944 loss: 3.9526 (3.7178) weight_decay: 0.0500 (0.0500) time: 0.4732 data: 0.1739 max mem: 21002 Epoch: [209] [200/312] eta: 0:01:03 lr: 0.000944 min_lr: 0.000944 loss: 3.6424 (3.7223) weight_decay: 0.0500 (0.0500) time: 0.4914 data: 0.1921 max mem: 21002 Epoch: [209] [210/312] eta: 0:00:58 lr: 0.000943 min_lr: 0.000943 loss: 3.8955 (3.7326) weight_decay: 0.0500 (0.0500) time: 0.6956 data: 0.4081 max mem: 21002 Epoch: [209] [220/312] eta: 0:00:51 lr: 0.000942 min_lr: 0.000942 loss: 4.0074 (3.7453) weight_decay: 0.0500 (0.0500) time: 0.5026 data: 0.2168 max mem: 21002 Epoch: [209] [230/312] eta: 0:00:46 lr: 0.000942 min_lr: 0.000942 loss: 3.7292 (3.7304) weight_decay: 0.0500 (0.0500) time: 0.4655 data: 0.1755 max mem: 21002 Epoch: [209] [240/312] eta: 0:00:40 lr: 0.000941 min_lr: 0.000941 loss: 3.4930 (3.7209) weight_decay: 0.0500 (0.0500) time: 0.6783 data: 0.3879 max mem: 21002 Epoch: [209] [250/312] eta: 0:00:34 lr: 0.000940 min_lr: 0.000940 loss: 3.7636 (3.7242) weight_decay: 0.0500 (0.0500) time: 0.5011 data: 0.2138 max mem: 21002 Epoch: [209] [260/312] eta: 0:00:29 lr: 0.000940 min_lr: 0.000940 loss: 3.7196 (3.7209) weight_decay: 0.0500 (0.0500) time: 0.4828 data: 0.1910 max mem: 21002 Epoch: [209] [270/312] eta: 0:00:23 lr: 0.000939 min_lr: 0.000939 loss: 3.4626 (3.7195) weight_decay: 0.0500 (0.0500) time: 0.4819 data: 0.1902 max mem: 21002 Epoch: [209] [280/312] eta: 0:00:17 lr: 0.000939 min_lr: 0.000939 loss: 3.7520 (3.7181) weight_decay: 0.0500 (0.0500) time: 0.4916 data: 0.1963 max mem: 21002 Epoch: [209] [290/312] eta: 0:00:12 lr: 0.000938 min_lr: 0.000938 loss: 3.7520 (3.7181) weight_decay: 0.0500 (0.0500) time: 0.6598 data: 0.3655 max mem: 21002 Epoch: [209] [300/312] eta: 0:00:06 lr: 0.000937 min_lr: 0.000937 loss: 3.6942 (3.7215) weight_decay: 0.0500 (0.0500) time: 0.4505 data: 0.1696 max mem: 21002 Epoch: [209] [310/312] eta: 0:00:01 lr: 0.000937 min_lr: 0.000937 loss: 3.6942 (3.7214) weight_decay: 0.0500 (0.0500) time: 0.2955 data: 0.0187 max mem: 21002 Epoch: [209] [311/312] eta: 0:00:00 lr: 0.000937 min_lr: 0.000937 loss: 3.6216 (3.7177) weight_decay: 0.0500 (0.0500) time: 0.2954 data: 0.0187 max mem: 21002 Epoch: [209] Total time: 0:02:50 (0.5457 s / it) Averaged stats: lr: 0.000937 min_lr: 0.000937 loss: 3.6216 (3.7693) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:20 loss: 1.4554 (1.4554) acc1: 75.0000 (75.0000) acc5: 91.2760 (91.2760) time: 8.9301 data: 8.8114 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.7152 (1.5846) acc1: 65.8854 (66.9920) acc5: 88.1510 (87.9200) time: 1.0906 data: 0.9791 max mem: 21002 Test: Total time: 0:00:10 (1.1193 s / it) * Acc@1 66.788 Acc@5 88.002 loss 1.592 Accuracy of the model on the 50000 test images: 66.8% Max accuracy: 66.79% Epoch: [210] [ 0/312] eta: 0:59:52 lr: 0.000937 min_lr: 0.000937 loss: 4.6661 (4.6661) weight_decay: 0.0500 (0.0500) time: 11.5156 data: 10.2976 max mem: 21002 Epoch: [210] [ 10/312] eta: 0:08:29 lr: 0.000936 min_lr: 0.000936 loss: 3.8867 (3.8736) weight_decay: 0.0500 (0.0500) time: 1.6878 data: 1.0318 max mem: 21002 Epoch: [210] [ 20/312] eta: 0:05:25 lr: 0.000935 min_lr: 0.000935 loss: 3.7411 (3.8684) weight_decay: 0.0500 (0.0500) time: 0.5956 data: 0.0822 max mem: 21002 Epoch: [210] [ 30/312] eta: 0:03:59 lr: 0.000935 min_lr: 0.000935 loss: 3.8193 (3.7719) weight_decay: 0.0500 (0.0500) time: 0.3897 data: 0.0300 max mem: 21002 Epoch: [210] [ 40/312] eta: 0:03:14 lr: 0.000934 min_lr: 0.000934 loss: 3.8193 (3.7623) weight_decay: 0.0500 (0.0500) time: 0.2929 data: 0.0007 max mem: 21002 Epoch: [210] [ 50/312] eta: 0:03:07 lr: 0.000934 min_lr: 0.000934 loss: 4.0674 (3.7958) weight_decay: 0.0500 (0.0500) time: 0.5078 data: 0.0703 max mem: 21002 Epoch: [210] [ 60/312] eta: 0:02:42 lr: 0.000933 min_lr: 0.000933 loss: 3.9818 (3.7615) weight_decay: 0.0500 (0.0500) time: 0.5076 data: 0.0746 max mem: 21002 Epoch: [210] [ 70/312] eta: 0:02:38 lr: 0.000932 min_lr: 0.000932 loss: 3.8258 (3.7956) weight_decay: 0.0500 (0.0500) time: 0.5011 data: 0.0663 max mem: 21002 Epoch: [210] [ 80/312] eta: 0:02:23 lr: 0.000932 min_lr: 0.000932 loss: 3.8470 (3.7879) weight_decay: 0.0500 (0.0500) time: 0.5348 data: 0.0645 max mem: 21002 Epoch: [210] [ 90/312] eta: 0:02:16 lr: 0.000931 min_lr: 0.000931 loss: 3.8656 (3.7829) weight_decay: 0.0500 (0.0500) time: 0.4659 data: 0.0567 max mem: 21002 Epoch: [210] [100/312] eta: 0:02:11 lr: 0.000931 min_lr: 0.000931 loss: 3.9927 (3.7935) weight_decay: 0.0500 (0.0500) time: 0.6239 data: 0.1135 max mem: 21002 Epoch: [210] [110/312] eta: 0:01:59 lr: 0.000930 min_lr: 0.000930 loss: 3.8976 (3.8045) weight_decay: 0.0500 (0.0500) time: 0.4996 data: 0.0789 max mem: 21002 Epoch: [210] [120/312] eta: 0:01:54 lr: 0.000929 min_lr: 0.000929 loss: 3.6740 (3.7813) weight_decay: 0.0500 (0.0500) time: 0.4876 data: 0.0700 max mem: 21002 Epoch: [210] [130/312] eta: 0:01:49 lr: 0.000929 min_lr: 0.000929 loss: 3.6080 (3.7782) weight_decay: 0.0500 (0.0500) time: 0.6625 data: 0.1085 max mem: 21002 Epoch: [210] [140/312] eta: 0:01:41 lr: 0.000928 min_lr: 0.000928 loss: 3.9429 (3.7885) weight_decay: 0.0500 (0.0500) time: 0.5333 data: 0.1143 max mem: 21002 Epoch: [210] [150/312] eta: 0:01:35 lr: 0.000928 min_lr: 0.000928 loss: 3.9996 (3.7941) weight_decay: 0.0500 (0.0500) time: 0.5069 data: 0.0809 max mem: 21002 Epoch: [210] [160/312] eta: 0:01:28 lr: 0.000927 min_lr: 0.000927 loss: 3.9051 (3.7864) weight_decay: 0.0500 (0.0500) time: 0.5294 data: 0.0416 max mem: 21002 Epoch: [210] [170/312] eta: 0:01:22 lr: 0.000926 min_lr: 0.000926 loss: 3.5929 (3.7632) weight_decay: 0.0500 (0.0500) time: 0.4892 data: 0.0465 max mem: 21002 Epoch: [210] [180/312] eta: 0:01:16 lr: 0.000926 min_lr: 0.000926 loss: 3.4695 (3.7607) weight_decay: 0.0500 (0.0500) time: 0.5798 data: 0.0581 max mem: 21002 Epoch: [210] [190/312] eta: 0:01:09 lr: 0.000925 min_lr: 0.000925 loss: 3.5997 (3.7532) weight_decay: 0.0500 (0.0500) time: 0.4734 data: 0.0473 max mem: 21002 Epoch: [210] [200/312] eta: 0:01:04 lr: 0.000925 min_lr: 0.000925 loss: 3.5997 (3.7508) weight_decay: 0.0500 (0.0500) time: 0.5055 data: 0.0457 max mem: 21002 Epoch: [210] [210/312] eta: 0:00:58 lr: 0.000924 min_lr: 0.000924 loss: 3.5820 (3.7465) weight_decay: 0.0500 (0.0500) time: 0.6424 data: 0.0612 max mem: 21002 Epoch: [210] [220/312] eta: 0:00:52 lr: 0.000923 min_lr: 0.000923 loss: 3.7291 (3.7490) weight_decay: 0.0500 (0.0500) time: 0.4831 data: 0.0534 max mem: 21002 Epoch: [210] [230/312] eta: 0:00:46 lr: 0.000923 min_lr: 0.000923 loss: 3.5839 (3.7394) weight_decay: 0.0500 (0.0500) time: 0.5121 data: 0.0505 max mem: 21002 Epoch: [210] [240/312] eta: 0:00:40 lr: 0.000922 min_lr: 0.000922 loss: 3.8611 (3.7427) weight_decay: 0.0500 (0.0500) time: 0.5719 data: 0.0697 max mem: 21002 Epoch: [210] [250/312] eta: 0:00:35 lr: 0.000922 min_lr: 0.000922 loss: 3.9419 (3.7310) weight_decay: 0.0500 (0.0500) time: 0.5199 data: 0.0799 max mem: 21002 Epoch: [210] [260/312] eta: 0:00:29 lr: 0.000921 min_lr: 0.000921 loss: 3.8529 (3.7298) weight_decay: 0.0500 (0.0500) time: 0.6129 data: 0.0705 max mem: 21002 Epoch: [210] [270/312] eta: 0:00:23 lr: 0.000920 min_lr: 0.000920 loss: 4.0470 (3.7404) weight_decay: 0.0500 (0.0500) time: 0.5040 data: 0.0534 max mem: 21002 Epoch: [210] [280/312] eta: 0:00:18 lr: 0.000920 min_lr: 0.000920 loss: 4.1349 (3.7437) weight_decay: 0.0500 (0.0500) time: 0.5350 data: 0.0664 max mem: 21002 Epoch: [210] [290/312] eta: 0:00:12 lr: 0.000919 min_lr: 0.000919 loss: 4.0270 (3.7518) weight_decay: 0.0500 (0.0500) time: 0.6395 data: 0.0475 max mem: 21002 Epoch: [210] [300/312] eta: 0:00:06 lr: 0.000918 min_lr: 0.000918 loss: 3.9139 (3.7515) weight_decay: 0.0500 (0.0500) time: 0.4302 data: 0.0241 max mem: 21002 Epoch: [210] [310/312] eta: 0:00:01 lr: 0.000918 min_lr: 0.000918 loss: 3.7510 (3.7470) weight_decay: 0.0500 (0.0500) time: 0.3000 data: 0.0188 max mem: 21002 Epoch: [210] [311/312] eta: 0:00:00 lr: 0.000918 min_lr: 0.000918 loss: 3.7342 (3.7470) weight_decay: 0.0500 (0.0500) time: 0.2819 data: 0.0001 max mem: 21002 Epoch: [210] Total time: 0:02:52 (0.5522 s / it) Averaged stats: lr: 0.000918 min_lr: 0.000918 loss: 3.7342 (3.7707) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.4654 (1.4654) acc1: 74.6094 (74.6094) acc5: 92.0573 (92.0573) time: 8.6858 data: 8.5671 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.7829 (1.6661) acc1: 66.2760 (66.7680) acc5: 86.7188 (87.7120) time: 1.0550 data: 0.9574 max mem: 21002 Test: Total time: 0:00:09 (1.0830 s / it) * Acc@1 66.274 Acc@5 87.874 loss 1.673 Accuracy of the model on the 50000 test images: 66.3% Max accuracy: 66.79% Epoch: [211] [ 0/312] eta: 0:59:53 lr: 0.000918 min_lr: 0.000918 loss: 4.0710 (4.0710) weight_decay: 0.0500 (0.0500) time: 11.5167 data: 9.4570 max mem: 21002 Epoch: [211] [ 10/312] eta: 0:08:29 lr: 0.000917 min_lr: 0.000917 loss: 3.9208 (3.7764) weight_decay: 0.0500 (0.0500) time: 1.6877 data: 1.0261 max mem: 21002 Epoch: [211] [ 20/312] eta: 0:05:04 lr: 0.000917 min_lr: 0.000917 loss: 3.7828 (3.7612) weight_decay: 0.0500 (0.0500) time: 0.5193 data: 0.0919 max mem: 21002 Epoch: [211] [ 30/312] eta: 0:03:45 lr: 0.000916 min_lr: 0.000916 loss: 3.6855 (3.7591) weight_decay: 0.0500 (0.0500) time: 0.3104 data: 0.0008 max mem: 21002 Epoch: [211] [ 40/312] eta: 0:03:08 lr: 0.000915 min_lr: 0.000915 loss: 3.7302 (3.7343) weight_decay: 0.0500 (0.0500) time: 0.3225 data: 0.0008 max mem: 21002 Epoch: [211] [ 50/312] eta: 0:02:57 lr: 0.000915 min_lr: 0.000915 loss: 3.5814 (3.7065) weight_decay: 0.0500 (0.0500) time: 0.4855 data: 0.0946 max mem: 21002 Epoch: [211] [ 60/312] eta: 0:02:35 lr: 0.000914 min_lr: 0.000914 loss: 3.8240 (3.7235) weight_decay: 0.0500 (0.0500) time: 0.4688 data: 0.1119 max mem: 21002 Epoch: [211] [ 70/312] eta: 0:02:31 lr: 0.000914 min_lr: 0.000914 loss: 3.7722 (3.7206) weight_decay: 0.0500 (0.0500) time: 0.4941 data: 0.1331 max mem: 21002 Epoch: [211] [ 80/312] eta: 0:02:25 lr: 0.000913 min_lr: 0.000913 loss: 3.9608 (3.7290) weight_decay: 0.0500 (0.0500) time: 0.6491 data: 0.1861 max mem: 21002 Epoch: [211] [ 90/312] eta: 0:02:11 lr: 0.000912 min_lr: 0.000912 loss: 3.8460 (3.7296) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.0909 max mem: 21002 Epoch: [211] [100/312] eta: 0:02:07 lr: 0.000912 min_lr: 0.000912 loss: 3.7772 (3.7309) weight_decay: 0.0500 (0.0500) time: 0.4925 data: 0.1000 max mem: 21002 Epoch: [211] [110/312] eta: 0:01:57 lr: 0.000911 min_lr: 0.000911 loss: 3.8182 (3.7319) weight_decay: 0.0500 (0.0500) time: 0.5131 data: 0.1114 max mem: 21002 Epoch: [211] [120/312] eta: 0:01:52 lr: 0.000911 min_lr: 0.000911 loss: 3.4100 (3.7059) weight_decay: 0.0500 (0.0500) time: 0.5097 data: 0.1009 max mem: 21002 Epoch: [211] [130/312] eta: 0:01:47 lr: 0.000910 min_lr: 0.000910 loss: 3.3585 (3.7035) weight_decay: 0.0500 (0.0500) time: 0.6508 data: 0.1847 max mem: 21002 Epoch: [211] [140/312] eta: 0:01:39 lr: 0.000909 min_lr: 0.000909 loss: 3.8320 (3.7131) weight_decay: 0.0500 (0.0500) time: 0.5230 data: 0.1157 max mem: 21002 Epoch: [211] [150/312] eta: 0:01:34 lr: 0.000909 min_lr: 0.000909 loss: 3.9531 (3.7255) weight_decay: 0.0500 (0.0500) time: 0.5481 data: 0.0560 max mem: 21002 Epoch: [211] [160/312] eta: 0:01:28 lr: 0.000908 min_lr: 0.000908 loss: 4.0654 (3.7442) weight_decay: 0.0500 (0.0500) time: 0.6380 data: 0.1049 max mem: 21002 Epoch: [211] [170/312] eta: 0:01:21 lr: 0.000908 min_lr: 0.000908 loss: 3.8987 (3.7461) weight_decay: 0.0500 (0.0500) time: 0.4657 data: 0.0497 max mem: 21002 Epoch: [211] [180/312] eta: 0:01:15 lr: 0.000907 min_lr: 0.000907 loss: 3.8362 (3.7559) weight_decay: 0.0500 (0.0500) time: 0.4929 data: 0.0610 max mem: 21002 Epoch: [211] [190/312] eta: 0:01:08 lr: 0.000906 min_lr: 0.000906 loss: 3.6810 (3.7471) weight_decay: 0.0500 (0.0500) time: 0.5159 data: 0.0717 max mem: 21002 Epoch: [211] [200/312] eta: 0:01:04 lr: 0.000906 min_lr: 0.000906 loss: 3.5517 (3.7423) weight_decay: 0.0500 (0.0500) time: 0.5549 data: 0.0463 max mem: 21002 Epoch: [211] [210/312] eta: 0:00:58 lr: 0.000905 min_lr: 0.000905 loss: 3.9115 (3.7449) weight_decay: 0.0500 (0.0500) time: 0.6298 data: 0.0799 max mem: 21002 Epoch: [211] [220/312] eta: 0:00:52 lr: 0.000905 min_lr: 0.000905 loss: 3.9115 (3.7409) weight_decay: 0.0500 (0.0500) time: 0.5184 data: 0.0869 max mem: 21002 Epoch: [211] [230/312] eta: 0:00:46 lr: 0.000904 min_lr: 0.000904 loss: 3.8833 (3.7452) weight_decay: 0.0500 (0.0500) time: 0.5449 data: 0.0485 max mem: 21002 Epoch: [211] [240/312] eta: 0:00:40 lr: 0.000903 min_lr: 0.000903 loss: 3.8243 (3.7386) weight_decay: 0.0500 (0.0500) time: 0.5884 data: 0.0814 max mem: 21002 Epoch: [211] [250/312] eta: 0:00:34 lr: 0.000903 min_lr: 0.000903 loss: 3.9176 (3.7511) weight_decay: 0.0500 (0.0500) time: 0.4934 data: 0.0965 max mem: 21002 Epoch: [211] [260/312] eta: 0:00:29 lr: 0.000902 min_lr: 0.000902 loss: 4.0691 (3.7478) weight_decay: 0.0500 (0.0500) time: 0.5012 data: 0.0825 max mem: 21002 Epoch: [211] [270/312] eta: 0:00:23 lr: 0.000902 min_lr: 0.000902 loss: 3.8756 (3.7514) weight_decay: 0.0500 (0.0500) time: 0.4970 data: 0.0819 max mem: 21002 Epoch: [211] [280/312] eta: 0:00:17 lr: 0.000901 min_lr: 0.000901 loss: 3.8756 (3.7580) weight_decay: 0.0500 (0.0500) time: 0.5227 data: 0.0723 max mem: 21002 Epoch: [211] [290/312] eta: 0:00:12 lr: 0.000900 min_lr: 0.000900 loss: 3.8273 (3.7594) weight_decay: 0.0500 (0.0500) time: 0.5727 data: 0.0975 max mem: 21002 Epoch: [211] [300/312] eta: 0:00:06 lr: 0.000900 min_lr: 0.000900 loss: 3.8107 (3.7613) weight_decay: 0.0500 (0.0500) time: 0.4273 data: 0.0657 max mem: 21002 Epoch: [211] [310/312] eta: 0:00:01 lr: 0.000899 min_lr: 0.000899 loss: 3.7749 (3.7532) weight_decay: 0.0500 (0.0500) time: 0.3157 data: 0.0197 max mem: 21002 Epoch: [211] [311/312] eta: 0:00:00 lr: 0.000899 min_lr: 0.000899 loss: 3.7749 (3.7545) weight_decay: 0.0500 (0.0500) time: 0.3154 data: 0.0197 max mem: 21002 Epoch: [211] Total time: 0:02:50 (0.5463 s / it) Averaged stats: lr: 0.000899 min_lr: 0.000899 loss: 3.7749 (3.7534) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:20 loss: 1.5193 (1.5193) acc1: 72.5260 (72.5260) acc5: 91.2760 (91.2760) time: 8.9941 data: 8.8754 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.7705 (1.6917) acc1: 65.8854 (66.2720) acc5: 87.6302 (87.9360) time: 1.0841 data: 0.9862 max mem: 21002 Test: Total time: 0:00:09 (1.1060 s / it) * Acc@1 66.224 Acc@5 88.020 loss 1.703 Accuracy of the model on the 50000 test images: 66.2% Max accuracy: 66.79% Epoch: [212] [ 0/312] eta: 1:02:59 lr: 0.000899 min_lr: 0.000899 loss: 3.6665 (3.6665) weight_decay: 0.0500 (0.0500) time: 12.1146 data: 9.1492 max mem: 21002 Epoch: [212] [ 10/312] eta: 0:08:22 lr: 0.000898 min_lr: 0.000898 loss: 3.6665 (3.5729) weight_decay: 0.0500 (0.0500) time: 1.6647 data: 0.9689 max mem: 21002 Epoch: [212] [ 20/312] eta: 0:05:13 lr: 0.000898 min_lr: 0.000898 loss: 3.9667 (3.7769) weight_decay: 0.0500 (0.0500) time: 0.5224 data: 0.1272 max mem: 21002 Epoch: [212] [ 30/312] eta: 0:03:51 lr: 0.000897 min_lr: 0.000897 loss: 4.0377 (3.8279) weight_decay: 0.0500 (0.0500) time: 0.3585 data: 0.0520 max mem: 21002 Epoch: [212] [ 40/312] eta: 0:03:12 lr: 0.000897 min_lr: 0.000897 loss: 4.0360 (3.8307) weight_decay: 0.0500 (0.0500) time: 0.3191 data: 0.0015 max mem: 21002 Epoch: [212] [ 50/312] eta: 0:03:02 lr: 0.000896 min_lr: 0.000896 loss: 3.6150 (3.7763) weight_decay: 0.0500 (0.0500) time: 0.5010 data: 0.0901 max mem: 21002 Epoch: [212] [ 60/312] eta: 0:02:39 lr: 0.000895 min_lr: 0.000895 loss: 3.4830 (3.7423) weight_decay: 0.0500 (0.0500) time: 0.4855 data: 0.1036 max mem: 21002 Epoch: [212] [ 70/312] eta: 0:02:33 lr: 0.000895 min_lr: 0.000895 loss: 3.7115 (3.7781) weight_decay: 0.0500 (0.0500) time: 0.4825 data: 0.0765 max mem: 21002 Epoch: [212] [ 80/312] eta: 0:02:25 lr: 0.000894 min_lr: 0.000894 loss: 3.9926 (3.7878) weight_decay: 0.0500 (0.0500) time: 0.6157 data: 0.1327 max mem: 21002 Epoch: [212] [ 90/312] eta: 0:02:14 lr: 0.000894 min_lr: 0.000894 loss: 3.9358 (3.7936) weight_decay: 0.0500 (0.0500) time: 0.5072 data: 0.0845 max mem: 21002 Epoch: [212] [100/312] eta: 0:02:09 lr: 0.000893 min_lr: 0.000893 loss: 3.9041 (3.7877) weight_decay: 0.0500 (0.0500) time: 0.5408 data: 0.0747 max mem: 21002 Epoch: [212] [110/312] eta: 0:01:57 lr: 0.000892 min_lr: 0.000892 loss: 3.9396 (3.7937) weight_decay: 0.0500 (0.0500) time: 0.4748 data: 0.0703 max mem: 21002 Epoch: [212] [120/312] eta: 0:01:53 lr: 0.000892 min_lr: 0.000892 loss: 3.9396 (3.8029) weight_decay: 0.0500 (0.0500) time: 0.5005 data: 0.0701 max mem: 21002 Epoch: [212] [130/312] eta: 0:01:49 lr: 0.000891 min_lr: 0.000891 loss: 4.0072 (3.8167) weight_decay: 0.0500 (0.0500) time: 0.6900 data: 0.1109 max mem: 21002 Epoch: [212] [140/312] eta: 0:01:39 lr: 0.000891 min_lr: 0.000891 loss: 4.0287 (3.8150) weight_decay: 0.0500 (0.0500) time: 0.4823 data: 0.0503 max mem: 21002 Epoch: [212] [150/312] eta: 0:01:35 lr: 0.000890 min_lr: 0.000890 loss: 3.8581 (3.8121) weight_decay: 0.0500 (0.0500) time: 0.5019 data: 0.0682 max mem: 21002 Epoch: [212] [160/312] eta: 0:01:28 lr: 0.000889 min_lr: 0.000889 loss: 3.8117 (3.7921) weight_decay: 0.0500 (0.0500) time: 0.6146 data: 0.1116 max mem: 21002 Epoch: [212] [170/312] eta: 0:01:21 lr: 0.000889 min_lr: 0.000889 loss: 3.9758 (3.8090) weight_decay: 0.0500 (0.0500) time: 0.4700 data: 0.0442 max mem: 21002 Epoch: [212] [180/312] eta: 0:01:16 lr: 0.000888 min_lr: 0.000888 loss: 4.0641 (3.8154) weight_decay: 0.0500 (0.0500) time: 0.5743 data: 0.0531 max mem: 21002 Epoch: [212] [190/312] eta: 0:01:09 lr: 0.000888 min_lr: 0.000888 loss: 3.9891 (3.8117) weight_decay: 0.0500 (0.0500) time: 0.5067 data: 0.0591 max mem: 21002 Epoch: [212] [200/312] eta: 0:01:04 lr: 0.000887 min_lr: 0.000887 loss: 3.8884 (3.8198) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.0438 max mem: 21002 Epoch: [212] [210/312] eta: 0:00:59 lr: 0.000886 min_lr: 0.000886 loss: 3.8884 (3.8115) weight_decay: 0.0500 (0.0500) time: 0.7093 data: 0.0905 max mem: 21002 Epoch: [212] [220/312] eta: 0:00:52 lr: 0.000886 min_lr: 0.000886 loss: 3.8952 (3.8168) weight_decay: 0.0500 (0.0500) time: 0.5171 data: 0.0534 max mem: 21002 Epoch: [212] [230/312] eta: 0:00:46 lr: 0.000885 min_lr: 0.000885 loss: 3.8914 (3.8111) weight_decay: 0.0500 (0.0500) time: 0.4873 data: 0.0225 max mem: 21002 Epoch: [212] [240/312] eta: 0:00:40 lr: 0.000885 min_lr: 0.000885 loss: 3.5735 (3.7951) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0425 max mem: 21002 Epoch: [212] [250/312] eta: 0:00:35 lr: 0.000884 min_lr: 0.000884 loss: 3.6702 (3.7965) weight_decay: 0.0500 (0.0500) time: 0.4876 data: 0.0207 max mem: 21002 Epoch: [212] [260/312] eta: 0:00:29 lr: 0.000883 min_lr: 0.000883 loss: 3.6406 (3.7804) weight_decay: 0.0500 (0.0500) time: 0.6058 data: 0.0132 max mem: 21002 Epoch: [212] [270/312] eta: 0:00:23 lr: 0.000883 min_lr: 0.000883 loss: 3.5384 (3.7775) weight_decay: 0.0500 (0.0500) time: 0.5146 data: 0.0132 max mem: 21002 Epoch: [212] [280/312] eta: 0:00:18 lr: 0.000882 min_lr: 0.000882 loss: 3.9472 (3.7831) weight_decay: 0.0500 (0.0500) time: 0.4878 data: 0.0646 max mem: 21002 Epoch: [212] [290/312] eta: 0:00:12 lr: 0.000882 min_lr: 0.000882 loss: 3.7962 (3.7769) weight_decay: 0.0500 (0.0500) time: 0.6332 data: 0.0644 max mem: 21002 Epoch: [212] [300/312] eta: 0:00:06 lr: 0.000881 min_lr: 0.000881 loss: 3.7009 (3.7813) weight_decay: 0.0500 (0.0500) time: 0.4305 data: 0.0002 max mem: 21002 Epoch: [212] [310/312] eta: 0:00:01 lr: 0.000880 min_lr: 0.000880 loss: 3.9568 (3.7799) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [212] [311/312] eta: 0:00:00 lr: 0.000880 min_lr: 0.000880 loss: 3.9568 (3.7810) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [212] Total time: 0:02:51 (0.5506 s / it) Averaged stats: lr: 0.000880 min_lr: 0.000880 loss: 3.9568 (3.7628) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.3871 (1.3871) acc1: 75.7812 (75.7812) acc5: 93.3594 (93.3594) time: 8.5528 data: 8.4361 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.9009 (1.7149) acc1: 64.0625 (66.5120) acc5: 85.8073 (87.8240) time: 1.0420 data: 0.9374 max mem: 21002 Test: Total time: 0:00:09 (1.0699 s / it) * Acc@1 66.774 Acc@5 87.748 loss 1.723 Accuracy of the model on the 50000 test images: 66.8% Max accuracy: 66.79% Epoch: [213] [ 0/312] eta: 1:03:00 lr: 0.000880 min_lr: 0.000880 loss: 4.5409 (4.5409) weight_decay: 0.0500 (0.0500) time: 12.1185 data: 9.6218 max mem: 21002 Epoch: [213] [ 10/312] eta: 0:08:36 lr: 0.000880 min_lr: 0.000880 loss: 3.9741 (3.9194) weight_decay: 0.0500 (0.0500) time: 1.7104 data: 0.9722 max mem: 21002 Epoch: [213] [ 20/312] eta: 0:05:08 lr: 0.000879 min_lr: 0.000879 loss: 3.9673 (3.9266) weight_decay: 0.0500 (0.0500) time: 0.5020 data: 0.0771 max mem: 21002 Epoch: [213] [ 30/312] eta: 0:03:48 lr: 0.000879 min_lr: 0.000879 loss: 3.8710 (3.8975) weight_decay: 0.0500 (0.0500) time: 0.3125 data: 0.0238 max mem: 21002 Epoch: [213] [ 40/312] eta: 0:03:06 lr: 0.000878 min_lr: 0.000878 loss: 3.7361 (3.8776) weight_decay: 0.0500 (0.0500) time: 0.2952 data: 0.0006 max mem: 21002 Epoch: [213] [ 50/312] eta: 0:03:00 lr: 0.000877 min_lr: 0.000877 loss: 3.8990 (3.8603) weight_decay: 0.0500 (0.0500) time: 0.4998 data: 0.1072 max mem: 21002 Epoch: [213] [ 60/312] eta: 0:02:37 lr: 0.000877 min_lr: 0.000877 loss: 3.9042 (3.8615) weight_decay: 0.0500 (0.0500) time: 0.5012 data: 0.1127 max mem: 21002 Epoch: [213] [ 70/312] eta: 0:02:28 lr: 0.000876 min_lr: 0.000876 loss: 3.7601 (3.8219) weight_decay: 0.0500 (0.0500) time: 0.4262 data: 0.1049 max mem: 21002 Epoch: [213] [ 80/312] eta: 0:02:23 lr: 0.000876 min_lr: 0.000876 loss: 3.6264 (3.8163) weight_decay: 0.0500 (0.0500) time: 0.6076 data: 0.1962 max mem: 21002 Epoch: [213] [ 90/312] eta: 0:02:10 lr: 0.000875 min_lr: 0.000875 loss: 3.7626 (3.8143) weight_decay: 0.0500 (0.0500) time: 0.5034 data: 0.0977 max mem: 21002 Epoch: [213] [100/312] eta: 0:02:06 lr: 0.000874 min_lr: 0.000874 loss: 3.8718 (3.8105) weight_decay: 0.0500 (0.0500) time: 0.5113 data: 0.1058 max mem: 21002 Epoch: [213] [110/312] eta: 0:01:55 lr: 0.000874 min_lr: 0.000874 loss: 3.8752 (3.8226) weight_decay: 0.0500 (0.0500) time: 0.5013 data: 0.1103 max mem: 21002 Epoch: [213] [120/312] eta: 0:01:52 lr: 0.000873 min_lr: 0.000873 loss: 3.8490 (3.8112) weight_decay: 0.0500 (0.0500) time: 0.5063 data: 0.0881 max mem: 21002 Epoch: [213] [130/312] eta: 0:01:47 lr: 0.000873 min_lr: 0.000873 loss: 3.7133 (3.8002) weight_decay: 0.0500 (0.0500) time: 0.6796 data: 0.1549 max mem: 21002 Epoch: [213] [140/312] eta: 0:01:37 lr: 0.000872 min_lr: 0.000872 loss: 3.9578 (3.7979) weight_decay: 0.0500 (0.0500) time: 0.4781 data: 0.0722 max mem: 21002 Epoch: [213] [150/312] eta: 0:01:33 lr: 0.000871 min_lr: 0.000871 loss: 3.9234 (3.7943) weight_decay: 0.0500 (0.0500) time: 0.4982 data: 0.0710 max mem: 21002 Epoch: [213] [160/312] eta: 0:01:27 lr: 0.000871 min_lr: 0.000871 loss: 3.8472 (3.7833) weight_decay: 0.0500 (0.0500) time: 0.6458 data: 0.0830 max mem: 21002 Epoch: [213] [170/312] eta: 0:01:20 lr: 0.000870 min_lr: 0.000870 loss: 3.6226 (3.7697) weight_decay: 0.0500 (0.0500) time: 0.4804 data: 0.0443 max mem: 21002 Epoch: [213] [180/312] eta: 0:01:15 lr: 0.000870 min_lr: 0.000870 loss: 3.8536 (3.7800) weight_decay: 0.0500 (0.0500) time: 0.5140 data: 0.0891 max mem: 21002 Epoch: [213] [190/312] eta: 0:01:08 lr: 0.000869 min_lr: 0.000869 loss: 3.9197 (3.7834) weight_decay: 0.0500 (0.0500) time: 0.5043 data: 0.0574 max mem: 21002 Epoch: [213] [200/312] eta: 0:01:03 lr: 0.000868 min_lr: 0.000868 loss: 3.9197 (3.7918) weight_decay: 0.0500 (0.0500) time: 0.5432 data: 0.0556 max mem: 21002 Epoch: [213] [210/312] eta: 0:00:58 lr: 0.000868 min_lr: 0.000868 loss: 3.6943 (3.7822) weight_decay: 0.0500 (0.0500) time: 0.6623 data: 0.0616 max mem: 21002 Epoch: [213] [220/312] eta: 0:00:51 lr: 0.000867 min_lr: 0.000867 loss: 3.5360 (3.7803) weight_decay: 0.0500 (0.0500) time: 0.4554 data: 0.0070 max mem: 21002 Epoch: [213] [230/312] eta: 0:00:46 lr: 0.000867 min_lr: 0.000867 loss: 4.0626 (3.7990) weight_decay: 0.0500 (0.0500) time: 0.4750 data: 0.0322 max mem: 21002 Epoch: [213] [240/312] eta: 0:00:40 lr: 0.000866 min_lr: 0.000866 loss: 4.0163 (3.7917) weight_decay: 0.0500 (0.0500) time: 0.6068 data: 0.0528 max mem: 21002 Epoch: [213] [250/312] eta: 0:00:34 lr: 0.000865 min_lr: 0.000865 loss: 3.8647 (3.7949) weight_decay: 0.0500 (0.0500) time: 0.5114 data: 0.0500 max mem: 21002 Epoch: [213] [260/312] eta: 0:00:29 lr: 0.000865 min_lr: 0.000865 loss: 3.8226 (3.7908) weight_decay: 0.0500 (0.0500) time: 0.5507 data: 0.0482 max mem: 21002 Epoch: [213] [270/312] eta: 0:00:23 lr: 0.000864 min_lr: 0.000864 loss: 3.6531 (3.7749) weight_decay: 0.0500 (0.0500) time: 0.4887 data: 0.0260 max mem: 21002 Epoch: [213] [280/312] eta: 0:00:17 lr: 0.000864 min_lr: 0.000864 loss: 3.6531 (3.7689) weight_decay: 0.0500 (0.0500) time: 0.5083 data: 0.0314 max mem: 21002 Epoch: [213] [290/312] eta: 0:00:12 lr: 0.000863 min_lr: 0.000863 loss: 3.9331 (3.7742) weight_decay: 0.0500 (0.0500) time: 0.6713 data: 0.0396 max mem: 21002 Epoch: [213] [300/312] eta: 0:00:06 lr: 0.000863 min_lr: 0.000863 loss: 3.9741 (3.7760) weight_decay: 0.0500 (0.0500) time: 0.4739 data: 0.0147 max mem: 21002 Epoch: [213] [310/312] eta: 0:00:01 lr: 0.000862 min_lr: 0.000862 loss: 3.5489 (3.7693) weight_decay: 0.0500 (0.0500) time: 0.2907 data: 0.0001 max mem: 21002 Epoch: [213] [311/312] eta: 0:00:00 lr: 0.000862 min_lr: 0.000862 loss: 3.5282 (3.7661) weight_decay: 0.0500 (0.0500) time: 0.2905 data: 0.0001 max mem: 21002 Epoch: [213] Total time: 0:02:50 (0.5461 s / it) Averaged stats: lr: 0.000862 min_lr: 0.000862 loss: 3.5282 (3.7357) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.2928 (1.2928) acc1: 75.5208 (75.5208) acc5: 92.7083 (92.7083) time: 8.4596 data: 8.3414 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6940 (1.5702) acc1: 66.0156 (67.4080) acc5: 88.5417 (88.4000) time: 1.0460 data: 0.9401 max mem: 21002 Test: Total time: 0:00:09 (1.0556 s / it) * Acc@1 67.040 Acc@5 88.372 loss 1.584 Accuracy of the model on the 50000 test images: 67.0% Max accuracy: 67.04% Epoch: [214] [ 0/312] eta: 1:00:30 lr: 0.000862 min_lr: 0.000862 loss: 2.9992 (2.9992) weight_decay: 0.0500 (0.0500) time: 11.6353 data: 10.5535 max mem: 21002 Epoch: [214] [ 10/312] eta: 0:08:13 lr: 0.000861 min_lr: 0.000861 loss: 3.9036 (3.6623) weight_decay: 0.0500 (0.0500) time: 1.6337 data: 1.0273 max mem: 21002 Epoch: [214] [ 20/312] eta: 0:04:53 lr: 0.000861 min_lr: 0.000861 loss: 3.9036 (3.6438) weight_decay: 0.0500 (0.0500) time: 0.4738 data: 0.0376 max mem: 21002 Epoch: [214] [ 30/312] eta: 0:03:38 lr: 0.000860 min_lr: 0.000860 loss: 3.8411 (3.6953) weight_decay: 0.0500 (0.0500) time: 0.3027 data: 0.0015 max mem: 21002 Epoch: [214] [ 40/312] eta: 0:03:09 lr: 0.000859 min_lr: 0.000859 loss: 3.8411 (3.7187) weight_decay: 0.0500 (0.0500) time: 0.3737 data: 0.0232 max mem: 21002 Epoch: [214] [ 50/312] eta: 0:03:01 lr: 0.000859 min_lr: 0.000859 loss: 3.6983 (3.7198) weight_decay: 0.0500 (0.0500) time: 0.5649 data: 0.1196 max mem: 21002 Epoch: [214] [ 60/312] eta: 0:02:39 lr: 0.000858 min_lr: 0.000858 loss: 3.8678 (3.7569) weight_decay: 0.0500 (0.0500) time: 0.4989 data: 0.1102 max mem: 21002 Epoch: [214] [ 70/312] eta: 0:02:32 lr: 0.000858 min_lr: 0.000858 loss: 3.7664 (3.7051) weight_decay: 0.0500 (0.0500) time: 0.4769 data: 0.1184 max mem: 21002 Epoch: [214] [ 80/312] eta: 0:02:25 lr: 0.000857 min_lr: 0.000857 loss: 3.6624 (3.7065) weight_decay: 0.0500 (0.0500) time: 0.6105 data: 0.1784 max mem: 21002 Epoch: [214] [ 90/312] eta: 0:02:13 lr: 0.000857 min_lr: 0.000857 loss: 3.7020 (3.6995) weight_decay: 0.0500 (0.0500) time: 0.4886 data: 0.1228 max mem: 21002 Epoch: [214] [100/312] eta: 0:02:10 lr: 0.000856 min_lr: 0.000856 loss: 3.7254 (3.6970) weight_decay: 0.0500 (0.0500) time: 0.5706 data: 0.1731 max mem: 21002 Epoch: [214] [110/312] eta: 0:01:58 lr: 0.000855 min_lr: 0.000855 loss: 3.5735 (3.6833) weight_decay: 0.0500 (0.0500) time: 0.5226 data: 0.1233 max mem: 21002 Epoch: [214] [120/312] eta: 0:01:55 lr: 0.000855 min_lr: 0.000855 loss: 3.6916 (3.6969) weight_decay: 0.0500 (0.0500) time: 0.5178 data: 0.0899 max mem: 21002 Epoch: [214] [130/312] eta: 0:01:48 lr: 0.000854 min_lr: 0.000854 loss: 3.7806 (3.6808) weight_decay: 0.0500 (0.0500) time: 0.6456 data: 0.1207 max mem: 21002 Epoch: [214] [140/312] eta: 0:01:39 lr: 0.000854 min_lr: 0.000854 loss: 3.6883 (3.6824) weight_decay: 0.0500 (0.0500) time: 0.4435 data: 0.0594 max mem: 21002 Epoch: [214] [150/312] eta: 0:01:34 lr: 0.000853 min_lr: 0.000853 loss: 3.6883 (3.6894) weight_decay: 0.0500 (0.0500) time: 0.5181 data: 0.1273 max mem: 21002 Epoch: [214] [160/312] eta: 0:01:27 lr: 0.000852 min_lr: 0.000852 loss: 3.8261 (3.6879) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.1058 max mem: 21002 Epoch: [214] [170/312] eta: 0:01:21 lr: 0.000852 min_lr: 0.000852 loss: 4.0563 (3.7061) weight_decay: 0.0500 (0.0500) time: 0.4821 data: 0.0983 max mem: 21002 Epoch: [214] [180/312] eta: 0:01:16 lr: 0.000851 min_lr: 0.000851 loss: 4.0341 (3.7087) weight_decay: 0.0500 (0.0500) time: 0.5737 data: 0.1778 max mem: 21002 Epoch: [214] [190/312] eta: 0:01:08 lr: 0.000851 min_lr: 0.000851 loss: 3.6440 (3.7086) weight_decay: 0.0500 (0.0500) time: 0.4749 data: 0.0864 max mem: 21002 Epoch: [214] [200/312] eta: 0:01:03 lr: 0.000850 min_lr: 0.000850 loss: 3.6440 (3.6986) weight_decay: 0.0500 (0.0500) time: 0.4820 data: 0.0954 max mem: 21002 Epoch: [214] [210/312] eta: 0:00:58 lr: 0.000849 min_lr: 0.000849 loss: 3.7233 (3.7016) weight_decay: 0.0500 (0.0500) time: 0.6510 data: 0.1258 max mem: 21002 Epoch: [214] [220/312] eta: 0:00:51 lr: 0.000849 min_lr: 0.000849 loss: 3.9225 (3.7119) weight_decay: 0.0500 (0.0500) time: 0.5101 data: 0.0863 max mem: 21002 Epoch: [214] [230/312] eta: 0:00:46 lr: 0.000848 min_lr: 0.000848 loss: 4.0852 (3.7216) weight_decay: 0.0500 (0.0500) time: 0.5374 data: 0.1305 max mem: 21002 Epoch: [214] [240/312] eta: 0:00:40 lr: 0.000848 min_lr: 0.000848 loss: 3.8930 (3.7210) weight_decay: 0.0500 (0.0500) time: 0.5778 data: 0.0752 max mem: 21002 Epoch: [214] [250/312] eta: 0:00:34 lr: 0.000847 min_lr: 0.000847 loss: 3.7422 (3.7140) weight_decay: 0.0500 (0.0500) time: 0.4660 data: 0.0644 max mem: 21002 Epoch: [214] [260/312] eta: 0:00:29 lr: 0.000846 min_lr: 0.000846 loss: 3.9271 (3.7239) weight_decay: 0.0500 (0.0500) time: 0.5785 data: 0.1580 max mem: 21002 Epoch: [214] [270/312] eta: 0:00:23 lr: 0.000846 min_lr: 0.000846 loss: 3.8985 (3.7174) weight_decay: 0.0500 (0.0500) time: 0.5090 data: 0.0944 max mem: 21002 Epoch: [214] [280/312] eta: 0:00:17 lr: 0.000845 min_lr: 0.000845 loss: 3.6410 (3.7115) weight_decay: 0.0500 (0.0500) time: 0.4496 data: 0.0602 max mem: 21002 Epoch: [214] [290/312] eta: 0:00:12 lr: 0.000845 min_lr: 0.000845 loss: 3.6973 (3.7069) weight_decay: 0.0500 (0.0500) time: 0.6439 data: 0.1325 max mem: 21002 Epoch: [214] [300/312] eta: 0:00:06 lr: 0.000844 min_lr: 0.000844 loss: 3.4967 (3.7014) weight_decay: 0.0500 (0.0500) time: 0.4922 data: 0.0728 max mem: 21002 Epoch: [214] [310/312] eta: 0:00:01 lr: 0.000844 min_lr: 0.000844 loss: 3.7799 (3.7066) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [214] [311/312] eta: 0:00:00 lr: 0.000844 min_lr: 0.000844 loss: 3.7799 (3.7078) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [214] Total time: 0:02:50 (0.5469 s / it) Averaged stats: lr: 0.000844 min_lr: 0.000844 loss: 3.7799 (3.7328) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:17 loss: 1.3327 (1.3327) acc1: 73.3073 (73.3073) acc5: 91.2760 (91.2760) time: 8.6094 data: 8.4906 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.8879 (1.7096) acc1: 62.3698 (64.7840) acc5: 86.7188 (86.6720) time: 1.0496 data: 0.9516 max mem: 21002 Test: Total time: 0:00:09 (1.0799 s / it) * Acc@1 64.976 Acc@5 87.044 loss 1.715 Accuracy of the model on the 50000 test images: 65.0% Max accuracy: 67.04% Epoch: [215] [ 0/312] eta: 1:02:03 lr: 0.000843 min_lr: 0.000843 loss: 4.4022 (4.4022) weight_decay: 0.0500 (0.0500) time: 11.9354 data: 8.7780 max mem: 21002 Epoch: [215] [ 10/312] eta: 0:08:03 lr: 0.000843 min_lr: 0.000843 loss: 4.0177 (3.9700) weight_decay: 0.0500 (0.0500) time: 1.6025 data: 0.9886 max mem: 21002 Epoch: [215] [ 20/312] eta: 0:05:20 lr: 0.000842 min_lr: 0.000842 loss: 3.8800 (3.8295) weight_decay: 0.0500 (0.0500) time: 0.5573 data: 0.1910 max mem: 21002 Epoch: [215] [ 30/312] eta: 0:03:56 lr: 0.000842 min_lr: 0.000842 loss: 3.8172 (3.8307) weight_decay: 0.0500 (0.0500) time: 0.4184 data: 0.0866 max mem: 21002 Epoch: [215] [ 40/312] eta: 0:03:19 lr: 0.000841 min_lr: 0.000841 loss: 3.8828 (3.8092) weight_decay: 0.0500 (0.0500) time: 0.3491 data: 0.0184 max mem: 21002 Epoch: [215] [ 50/312] eta: 0:03:08 lr: 0.000841 min_lr: 0.000841 loss: 4.0199 (3.8334) weight_decay: 0.0500 (0.0500) time: 0.5334 data: 0.1238 max mem: 21002 Epoch: [215] [ 60/312] eta: 0:02:43 lr: 0.000840 min_lr: 0.000840 loss: 4.1039 (3.8592) weight_decay: 0.0500 (0.0500) time: 0.4784 data: 0.1108 max mem: 21002 Epoch: [215] [ 70/312] eta: 0:02:39 lr: 0.000839 min_lr: 0.000839 loss: 4.0270 (3.8792) weight_decay: 0.0500 (0.0500) time: 0.5090 data: 0.1344 max mem: 21002 Epoch: [215] [ 80/312] eta: 0:02:28 lr: 0.000839 min_lr: 0.000839 loss: 4.0085 (3.8619) weight_decay: 0.0500 (0.0500) time: 0.6033 data: 0.1654 max mem: 21002 Epoch: [215] [ 90/312] eta: 0:02:17 lr: 0.000838 min_lr: 0.000838 loss: 3.8184 (3.8569) weight_decay: 0.0500 (0.0500) time: 0.4670 data: 0.1073 max mem: 21002 Epoch: [215] [100/312] eta: 0:02:12 lr: 0.000838 min_lr: 0.000838 loss: 3.7623 (3.8273) weight_decay: 0.0500 (0.0500) time: 0.5682 data: 0.1924 max mem: 21002 Epoch: [215] [110/312] eta: 0:01:59 lr: 0.000837 min_lr: 0.000837 loss: 3.8500 (3.8414) weight_decay: 0.0500 (0.0500) time: 0.4863 data: 0.1214 max mem: 21002 Epoch: [215] [120/312] eta: 0:01:55 lr: 0.000836 min_lr: 0.000836 loss: 3.9840 (3.8500) weight_decay: 0.0500 (0.0500) time: 0.4931 data: 0.0924 max mem: 21002 Epoch: [215] [130/312] eta: 0:01:49 lr: 0.000836 min_lr: 0.000836 loss: 3.9246 (3.8485) weight_decay: 0.0500 (0.0500) time: 0.6373 data: 0.1614 max mem: 21002 Epoch: [215] [140/312] eta: 0:01:39 lr: 0.000835 min_lr: 0.000835 loss: 3.8338 (3.8469) weight_decay: 0.0500 (0.0500) time: 0.4350 data: 0.0748 max mem: 21002 Epoch: [215] [150/312] eta: 0:01:34 lr: 0.000835 min_lr: 0.000835 loss: 3.3277 (3.8014) weight_decay: 0.0500 (0.0500) time: 0.4909 data: 0.1165 max mem: 21002 Epoch: [215] [160/312] eta: 0:01:28 lr: 0.000834 min_lr: 0.000834 loss: 3.2207 (3.7899) weight_decay: 0.0500 (0.0500) time: 0.6062 data: 0.1803 max mem: 21002 Epoch: [215] [170/312] eta: 0:01:21 lr: 0.000833 min_lr: 0.000833 loss: 3.8201 (3.7921) weight_decay: 0.0500 (0.0500) time: 0.4839 data: 0.1354 max mem: 21002 Epoch: [215] [180/312] eta: 0:01:16 lr: 0.000833 min_lr: 0.000833 loss: 3.9717 (3.8041) weight_decay: 0.0500 (0.0500) time: 0.5660 data: 0.2079 max mem: 21002 Epoch: [215] [190/312] eta: 0:01:08 lr: 0.000832 min_lr: 0.000832 loss: 3.8115 (3.7953) weight_decay: 0.0500 (0.0500) time: 0.4900 data: 0.1421 max mem: 21002 Epoch: [215] [200/312] eta: 0:01:04 lr: 0.000832 min_lr: 0.000832 loss: 3.6953 (3.7953) weight_decay: 0.0500 (0.0500) time: 0.4967 data: 0.1419 max mem: 21002 Epoch: [215] [210/312] eta: 0:00:58 lr: 0.000831 min_lr: 0.000831 loss: 3.9382 (3.7948) weight_decay: 0.0500 (0.0500) time: 0.7026 data: 0.2510 max mem: 21002 Epoch: [215] [220/312] eta: 0:00:51 lr: 0.000831 min_lr: 0.000831 loss: 3.6870 (3.7896) weight_decay: 0.0500 (0.0500) time: 0.4929 data: 0.1097 max mem: 21002 Epoch: [215] [230/312] eta: 0:00:46 lr: 0.000830 min_lr: 0.000830 loss: 3.8061 (3.7881) weight_decay: 0.0500 (0.0500) time: 0.4562 data: 0.0890 max mem: 21002 Epoch: [215] [240/312] eta: 0:00:40 lr: 0.000829 min_lr: 0.000829 loss: 3.8502 (3.7816) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.1593 max mem: 21002 Epoch: [215] [250/312] eta: 0:00:34 lr: 0.000829 min_lr: 0.000829 loss: 3.7038 (3.7811) weight_decay: 0.0500 (0.0500) time: 0.5039 data: 0.1218 max mem: 21002 Epoch: [215] [260/312] eta: 0:00:29 lr: 0.000828 min_lr: 0.000828 loss: 3.6518 (3.7708) weight_decay: 0.0500 (0.0500) time: 0.5633 data: 0.1570 max mem: 21002 Epoch: [215] [270/312] eta: 0:00:23 lr: 0.000828 min_lr: 0.000828 loss: 3.8151 (3.7784) weight_decay: 0.0500 (0.0500) time: 0.4714 data: 0.1063 max mem: 21002 Epoch: [215] [280/312] eta: 0:00:18 lr: 0.000827 min_lr: 0.000827 loss: 3.6238 (3.7636) weight_decay: 0.0500 (0.0500) time: 0.5162 data: 0.1196 max mem: 21002 Epoch: [215] [290/312] eta: 0:00:12 lr: 0.000827 min_lr: 0.000827 loss: 3.5888 (3.7647) weight_decay: 0.0500 (0.0500) time: 0.6474 data: 0.1939 max mem: 21002 Epoch: [215] [300/312] eta: 0:00:06 lr: 0.000826 min_lr: 0.000826 loss: 3.9188 (3.7684) weight_decay: 0.0500 (0.0500) time: 0.4140 data: 0.0746 max mem: 21002 Epoch: [215] [310/312] eta: 0:00:01 lr: 0.000825 min_lr: 0.000825 loss: 4.0217 (3.7759) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [215] [311/312] eta: 0:00:00 lr: 0.000825 min_lr: 0.000825 loss: 4.0217 (3.7771) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [215] Total time: 0:02:50 (0.5466 s / it) Averaged stats: lr: 0.000825 min_lr: 0.000825 loss: 4.0217 (3.7392) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.1616 (1.1616) acc1: 76.5625 (76.5625) acc5: 93.2292 (93.2292) time: 8.5033 data: 8.3853 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.7692 (1.6521) acc1: 65.3646 (66.8640) acc5: 87.8906 (88.0640) time: 1.0298 data: 0.9318 max mem: 21002 Test: Total time: 0:00:09 (1.0395 s / it) * Acc@1 66.772 Acc@5 88.034 loss 1.657 Accuracy of the model on the 50000 test images: 66.8% Max accuracy: 67.04% Epoch: [216] [ 0/312] eta: 1:04:38 lr: 0.000825 min_lr: 0.000825 loss: 3.4630 (3.4630) weight_decay: 0.0500 (0.0500) time: 12.4325 data: 10.7914 max mem: 21002 Epoch: [216] [ 10/312] eta: 0:08:28 lr: 0.000825 min_lr: 0.000825 loss: 3.9654 (3.9361) weight_decay: 0.0500 (0.0500) time: 1.6825 data: 0.9955 max mem: 21002 Epoch: [216] [ 20/312] eta: 0:05:06 lr: 0.000824 min_lr: 0.000824 loss: 3.9172 (3.8655) weight_decay: 0.0500 (0.0500) time: 0.4810 data: 0.0407 max mem: 21002 Epoch: [216] [ 30/312] eta: 0:03:47 lr: 0.000823 min_lr: 0.000823 loss: 3.8876 (3.8935) weight_decay: 0.0500 (0.0500) time: 0.3237 data: 0.0330 max mem: 21002 Epoch: [216] [ 40/312] eta: 0:03:08 lr: 0.000823 min_lr: 0.000823 loss: 3.9175 (3.9092) weight_decay: 0.0500 (0.0500) time: 0.3163 data: 0.0229 max mem: 21002 Epoch: [216] [ 50/312] eta: 0:02:59 lr: 0.000822 min_lr: 0.000822 loss: 3.8893 (3.8304) weight_decay: 0.0500 (0.0500) time: 0.5012 data: 0.1936 max mem: 21002 Epoch: [216] [ 60/312] eta: 0:02:36 lr: 0.000822 min_lr: 0.000822 loss: 3.8266 (3.8425) weight_decay: 0.0500 (0.0500) time: 0.4750 data: 0.1722 max mem: 21002 Epoch: [216] [ 70/312] eta: 0:02:31 lr: 0.000821 min_lr: 0.000821 loss: 3.8796 (3.8201) weight_decay: 0.0500 (0.0500) time: 0.4790 data: 0.1743 max mem: 21002 Epoch: [216] [ 80/312] eta: 0:02:26 lr: 0.000821 min_lr: 0.000821 loss: 3.6541 (3.7916) weight_decay: 0.0500 (0.0500) time: 0.6718 data: 0.3284 max mem: 21002 Epoch: [216] [ 90/312] eta: 0:02:13 lr: 0.000820 min_lr: 0.000820 loss: 3.8050 (3.7895) weight_decay: 0.0500 (0.0500) time: 0.4972 data: 0.1731 max mem: 21002 Epoch: [216] [100/312] eta: 0:02:09 lr: 0.000819 min_lr: 0.000819 loss: 3.6522 (3.7574) weight_decay: 0.0500 (0.0500) time: 0.5227 data: 0.1743 max mem: 21002 Epoch: [216] [110/312] eta: 0:01:57 lr: 0.000819 min_lr: 0.000819 loss: 3.6196 (3.7535) weight_decay: 0.0500 (0.0500) time: 0.5060 data: 0.1568 max mem: 21002 Epoch: [216] [120/312] eta: 0:01:53 lr: 0.000818 min_lr: 0.000818 loss: 3.7999 (3.7633) weight_decay: 0.0500 (0.0500) time: 0.4913 data: 0.1309 max mem: 21002 Epoch: [216] [130/312] eta: 0:01:48 lr: 0.000818 min_lr: 0.000818 loss: 3.7889 (3.7596) weight_decay: 0.0500 (0.0500) time: 0.6765 data: 0.2310 max mem: 21002 Epoch: [216] [140/312] eta: 0:01:39 lr: 0.000817 min_lr: 0.000817 loss: 3.7598 (3.7493) weight_decay: 0.0500 (0.0500) time: 0.4797 data: 0.1070 max mem: 21002 Epoch: [216] [150/312] eta: 0:01:34 lr: 0.000817 min_lr: 0.000817 loss: 3.8187 (3.7515) weight_decay: 0.0500 (0.0500) time: 0.5056 data: 0.1273 max mem: 21002 Epoch: [216] [160/312] eta: 0:01:28 lr: 0.000816 min_lr: 0.000816 loss: 3.8677 (3.7493) weight_decay: 0.0500 (0.0500) time: 0.6408 data: 0.1485 max mem: 21002 Epoch: [216] [170/312] eta: 0:01:22 lr: 0.000815 min_lr: 0.000815 loss: 3.8143 (3.7481) weight_decay: 0.0500 (0.0500) time: 0.5261 data: 0.1261 max mem: 21002 Epoch: [216] [180/312] eta: 0:01:16 lr: 0.000815 min_lr: 0.000815 loss: 3.6266 (3.7339) weight_decay: 0.0500 (0.0500) time: 0.5562 data: 0.1745 max mem: 21002 Epoch: [216] [190/312] eta: 0:01:08 lr: 0.000814 min_lr: 0.000814 loss: 3.6870 (3.7302) weight_decay: 0.0500 (0.0500) time: 0.4592 data: 0.0764 max mem: 21002 Epoch: [216] [200/312] eta: 0:01:04 lr: 0.000814 min_lr: 0.000814 loss: 3.6760 (3.7262) weight_decay: 0.0500 (0.0500) time: 0.4931 data: 0.1114 max mem: 21002 Epoch: [216] [210/312] eta: 0:00:58 lr: 0.000813 min_lr: 0.000813 loss: 4.0298 (3.7351) weight_decay: 0.0500 (0.0500) time: 0.6783 data: 0.1674 max mem: 21002 Epoch: [216] [220/312] eta: 0:00:52 lr: 0.000812 min_lr: 0.000812 loss: 4.0298 (3.7402) weight_decay: 0.0500 (0.0500) time: 0.4992 data: 0.0846 max mem: 21002 Epoch: [216] [230/312] eta: 0:00:46 lr: 0.000812 min_lr: 0.000812 loss: 3.7659 (3.7273) weight_decay: 0.0500 (0.0500) time: 0.5143 data: 0.1016 max mem: 21002 Epoch: [216] [240/312] eta: 0:00:40 lr: 0.000811 min_lr: 0.000811 loss: 3.5625 (3.7324) weight_decay: 0.0500 (0.0500) time: 0.5963 data: 0.0738 max mem: 21002 Epoch: [216] [250/312] eta: 0:00:34 lr: 0.000811 min_lr: 0.000811 loss: 4.0168 (3.7350) weight_decay: 0.0500 (0.0500) time: 0.4397 data: 0.0445 max mem: 21002 Epoch: [216] [260/312] eta: 0:00:29 lr: 0.000810 min_lr: 0.000810 loss: 4.0352 (3.7393) weight_decay: 0.0500 (0.0500) time: 0.5478 data: 0.1247 max mem: 21002 Epoch: [216] [270/312] eta: 0:00:23 lr: 0.000810 min_lr: 0.000810 loss: 3.8781 (3.7368) weight_decay: 0.0500 (0.0500) time: 0.5045 data: 0.0811 max mem: 21002 Epoch: [216] [280/312] eta: 0:00:17 lr: 0.000809 min_lr: 0.000809 loss: 3.7501 (3.7367) weight_decay: 0.0500 (0.0500) time: 0.4785 data: 0.0747 max mem: 21002 Epoch: [216] [290/312] eta: 0:00:12 lr: 0.000808 min_lr: 0.000808 loss: 3.6002 (3.7270) weight_decay: 0.0500 (0.0500) time: 0.6369 data: 0.1065 max mem: 21002 Epoch: [216] [300/312] eta: 0:00:06 lr: 0.000808 min_lr: 0.000808 loss: 3.5848 (3.7261) weight_decay: 0.0500 (0.0500) time: 0.4415 data: 0.0323 max mem: 21002 Epoch: [216] [310/312] eta: 0:00:01 lr: 0.000807 min_lr: 0.000807 loss: 3.8955 (3.7328) weight_decay: 0.0500 (0.0500) time: 0.2921 data: 0.0106 max mem: 21002 Epoch: [216] [311/312] eta: 0:00:00 lr: 0.000807 min_lr: 0.000807 loss: 3.8955 (3.7318) weight_decay: 0.0500 (0.0500) time: 0.2919 data: 0.0106 max mem: 21002 Epoch: [216] Total time: 0:02:50 (0.5472 s / it) Averaged stats: lr: 0.000807 min_lr: 0.000807 loss: 3.8955 (3.7334) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:17 loss: 1.3779 (1.3779) acc1: 75.2604 (75.2604) acc5: 92.5781 (92.5781) time: 8.5747 data: 8.4558 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.8140 (1.6745) acc1: 64.4531 (67.0560) acc5: 86.9792 (87.8720) time: 1.0360 data: 0.9396 max mem: 21002 Test: Total time: 0:00:09 (1.0529 s / it) * Acc@1 67.160 Acc@5 88.188 loss 1.674 Accuracy of the model on the 50000 test images: 67.2% Max accuracy: 67.16% Epoch: [217] [ 0/312] eta: 0:57:33 lr: 0.000807 min_lr: 0.000807 loss: 3.2826 (3.2826) weight_decay: 0.0500 (0.0500) time: 11.0698 data: 8.6629 max mem: 21002 Epoch: [217] [ 10/312] eta: 0:07:20 lr: 0.000807 min_lr: 0.000807 loss: 3.6709 (3.5904) weight_decay: 0.0500 (0.0500) time: 1.4574 data: 0.9778 max mem: 21002 Epoch: [217] [ 20/312] eta: 0:05:28 lr: 0.000806 min_lr: 0.000806 loss: 3.7431 (3.6583) weight_decay: 0.0500 (0.0500) time: 0.6263 data: 0.2433 max mem: 21002 Epoch: [217] [ 30/312] eta: 0:04:01 lr: 0.000805 min_lr: 0.000805 loss: 3.7804 (3.7434) weight_decay: 0.0500 (0.0500) time: 0.5257 data: 0.1389 max mem: 21002 Epoch: [217] [ 40/312] eta: 0:03:32 lr: 0.000805 min_lr: 0.000805 loss: 3.8656 (3.7703) weight_decay: 0.0500 (0.0500) time: 0.4197 data: 0.0804 max mem: 21002 Epoch: [217] [ 50/312] eta: 0:03:15 lr: 0.000804 min_lr: 0.000804 loss: 4.0466 (3.8161) weight_decay: 0.0500 (0.0500) time: 0.5722 data: 0.1722 max mem: 21002 Epoch: [217] [ 60/312] eta: 0:02:48 lr: 0.000804 min_lr: 0.000804 loss: 4.0751 (3.8180) weight_decay: 0.0500 (0.0500) time: 0.4448 data: 0.0938 max mem: 21002 Epoch: [217] [ 70/312] eta: 0:02:46 lr: 0.000803 min_lr: 0.000803 loss: 3.8852 (3.7985) weight_decay: 0.0500 (0.0500) time: 0.5437 data: 0.1460 max mem: 21002 Epoch: [217] [ 80/312] eta: 0:02:32 lr: 0.000803 min_lr: 0.000803 loss: 3.9105 (3.8232) weight_decay: 0.0500 (0.0500) time: 0.6145 data: 0.1448 max mem: 21002 Epoch: [217] [ 90/312] eta: 0:02:21 lr: 0.000802 min_lr: 0.000802 loss: 3.9326 (3.8262) weight_decay: 0.0500 (0.0500) time: 0.4619 data: 0.1046 max mem: 21002 Epoch: [217] [100/312] eta: 0:02:15 lr: 0.000801 min_lr: 0.000801 loss: 3.8182 (3.8287) weight_decay: 0.0500 (0.0500) time: 0.5787 data: 0.2030 max mem: 21002 Epoch: [217] [110/312] eta: 0:02:03 lr: 0.000801 min_lr: 0.000801 loss: 3.8847 (3.8244) weight_decay: 0.0500 (0.0500) time: 0.4782 data: 0.0993 max mem: 21002 Epoch: [217] [120/312] eta: 0:01:58 lr: 0.000800 min_lr: 0.000800 loss: 3.8039 (3.8022) weight_decay: 0.0500 (0.0500) time: 0.4823 data: 0.1255 max mem: 21002 Epoch: [217] [130/312] eta: 0:01:53 lr: 0.000800 min_lr: 0.000800 loss: 3.7975 (3.8108) weight_decay: 0.0500 (0.0500) time: 0.6956 data: 0.2303 max mem: 21002 Epoch: [217] [140/312] eta: 0:01:43 lr: 0.000799 min_lr: 0.000799 loss: 3.8938 (3.8132) weight_decay: 0.0500 (0.0500) time: 0.5036 data: 0.1054 max mem: 21002 Epoch: [217] [150/312] eta: 0:01:37 lr: 0.000799 min_lr: 0.000799 loss: 3.8144 (3.8015) weight_decay: 0.0500 (0.0500) time: 0.4729 data: 0.0827 max mem: 21002 Epoch: [217] [160/312] eta: 0:01:30 lr: 0.000798 min_lr: 0.000798 loss: 3.6448 (3.7954) weight_decay: 0.0500 (0.0500) time: 0.5835 data: 0.0892 max mem: 21002 Epoch: [217] [170/312] eta: 0:01:23 lr: 0.000797 min_lr: 0.000797 loss: 3.7254 (3.7874) weight_decay: 0.0500 (0.0500) time: 0.4826 data: 0.0917 max mem: 21002 Epoch: [217] [180/312] eta: 0:01:18 lr: 0.000797 min_lr: 0.000797 loss: 3.5791 (3.7730) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.1960 max mem: 21002 Epoch: [217] [190/312] eta: 0:01:10 lr: 0.000796 min_lr: 0.000796 loss: 3.7884 (3.7861) weight_decay: 0.0500 (0.0500) time: 0.5128 data: 0.1117 max mem: 21002 Epoch: [217] [200/312] eta: 0:01:05 lr: 0.000796 min_lr: 0.000796 loss: 4.0049 (3.7845) weight_decay: 0.0500 (0.0500) time: 0.4361 data: 0.0732 max mem: 21002 Epoch: [217] [210/312] eta: 0:00:59 lr: 0.000795 min_lr: 0.000795 loss: 3.9975 (3.7857) weight_decay: 0.0500 (0.0500) time: 0.6465 data: 0.1961 max mem: 21002 Epoch: [217] [220/312] eta: 0:00:52 lr: 0.000794 min_lr: 0.000794 loss: 3.9541 (3.7936) weight_decay: 0.0500 (0.0500) time: 0.5047 data: 0.1239 max mem: 21002 Epoch: [217] [230/312] eta: 0:00:47 lr: 0.000794 min_lr: 0.000794 loss: 3.8023 (3.7869) weight_decay: 0.0500 (0.0500) time: 0.4828 data: 0.1213 max mem: 21002 Epoch: [217] [240/312] eta: 0:00:41 lr: 0.000793 min_lr: 0.000793 loss: 3.7442 (3.7805) weight_decay: 0.0500 (0.0500) time: 0.5649 data: 0.1270 max mem: 21002 Epoch: [217] [250/312] eta: 0:00:35 lr: 0.000793 min_lr: 0.000793 loss: 3.6509 (3.7709) weight_decay: 0.0500 (0.0500) time: 0.4766 data: 0.1138 max mem: 21002 Epoch: [217] [260/312] eta: 0:00:29 lr: 0.000792 min_lr: 0.000792 loss: 3.7515 (3.7699) weight_decay: 0.0500 (0.0500) time: 0.6152 data: 0.2293 max mem: 21002 Epoch: [217] [270/312] eta: 0:00:23 lr: 0.000792 min_lr: 0.000792 loss: 4.0591 (3.7869) weight_decay: 0.0500 (0.0500) time: 0.5099 data: 0.1222 max mem: 21002 Epoch: [217] [280/312] eta: 0:00:18 lr: 0.000791 min_lr: 0.000791 loss: 4.0591 (3.7952) weight_decay: 0.0500 (0.0500) time: 0.4756 data: 0.1113 max mem: 21002 Epoch: [217] [290/312] eta: 0:00:12 lr: 0.000790 min_lr: 0.000790 loss: 3.8697 (3.7828) weight_decay: 0.0500 (0.0500) time: 0.5472 data: 0.1688 max mem: 21002 Epoch: [217] [300/312] eta: 0:00:06 lr: 0.000790 min_lr: 0.000790 loss: 3.8362 (3.7872) weight_decay: 0.0500 (0.0500) time: 0.3552 data: 0.0580 max mem: 21002 Epoch: [217] [310/312] eta: 0:00:01 lr: 0.000789 min_lr: 0.000789 loss: 3.8362 (3.7830) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [217] [311/312] eta: 0:00:00 lr: 0.000789 min_lr: 0.000789 loss: 3.8534 (3.7857) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [217] Total time: 0:02:51 (0.5486 s / it) Averaged stats: lr: 0.000789 min_lr: 0.000789 loss: 3.8534 (3.7276) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:15 loss: 1.4821 (1.4821) acc1: 75.3906 (75.3906) acc5: 91.9271 (91.9271) time: 8.3526 data: 8.2336 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.8963 (1.7883) acc1: 66.0156 (66.8320) acc5: 86.8490 (88.0000) time: 1.0282 data: 0.9267 max mem: 21002 Test: Total time: 0:00:09 (1.0382 s / it) * Acc@1 66.312 Acc@5 87.780 loss 1.799 Accuracy of the model on the 50000 test images: 66.3% Max accuracy: 67.16% Epoch: [218] [ 0/312] eta: 0:57:53 lr: 0.000789 min_lr: 0.000789 loss: 3.6217 (3.6217) weight_decay: 0.0500 (0.0500) time: 11.1325 data: 10.3290 max mem: 21002 Epoch: [218] [ 10/312] eta: 0:07:46 lr: 0.000789 min_lr: 0.000789 loss: 4.1683 (4.1024) weight_decay: 0.0500 (0.0500) time: 1.5458 data: 1.0141 max mem: 21002 Epoch: [218] [ 20/312] eta: 0:05:09 lr: 0.000788 min_lr: 0.000788 loss: 3.7536 (3.7573) weight_decay: 0.0500 (0.0500) time: 0.5559 data: 0.0913 max mem: 21002 Epoch: [218] [ 30/312] eta: 0:03:49 lr: 0.000788 min_lr: 0.000788 loss: 3.5562 (3.7324) weight_decay: 0.0500 (0.0500) time: 0.4113 data: 0.0543 max mem: 21002 Epoch: [218] [ 40/312] eta: 0:03:20 lr: 0.000787 min_lr: 0.000787 loss: 3.6544 (3.7473) weight_decay: 0.0500 (0.0500) time: 0.3972 data: 0.0936 max mem: 21002 Epoch: [218] [ 50/312] eta: 0:03:04 lr: 0.000786 min_lr: 0.000786 loss: 3.8696 (3.7904) weight_decay: 0.0500 (0.0500) time: 0.5311 data: 0.2261 max mem: 21002 Epoch: [218] [ 60/312] eta: 0:02:42 lr: 0.000786 min_lr: 0.000786 loss: 3.9543 (3.7946) weight_decay: 0.0500 (0.0500) time: 0.4578 data: 0.1589 max mem: 21002 Epoch: [218] [ 70/312] eta: 0:02:37 lr: 0.000785 min_lr: 0.000785 loss: 3.8594 (3.7889) weight_decay: 0.0500 (0.0500) time: 0.5197 data: 0.2132 max mem: 21002 Epoch: [218] [ 80/312] eta: 0:02:29 lr: 0.000785 min_lr: 0.000785 loss: 3.6603 (3.7482) weight_decay: 0.0500 (0.0500) time: 0.6477 data: 0.3082 max mem: 21002 Epoch: [218] [ 90/312] eta: 0:02:17 lr: 0.000784 min_lr: 0.000784 loss: 3.2657 (3.7240) weight_decay: 0.0500 (0.0500) time: 0.5076 data: 0.1804 max mem: 21002 Epoch: [218] [100/312] eta: 0:02:09 lr: 0.000784 min_lr: 0.000784 loss: 3.6686 (3.7345) weight_decay: 0.0500 (0.0500) time: 0.4709 data: 0.1418 max mem: 21002 Epoch: [218] [110/312] eta: 0:02:00 lr: 0.000783 min_lr: 0.000783 loss: 3.5995 (3.7017) weight_decay: 0.0500 (0.0500) time: 0.4919 data: 0.1499 max mem: 21002 Epoch: [218] [120/312] eta: 0:01:56 lr: 0.000782 min_lr: 0.000782 loss: 3.5995 (3.7178) weight_decay: 0.0500 (0.0500) time: 0.5707 data: 0.2013 max mem: 21002 Epoch: [218] [130/312] eta: 0:01:49 lr: 0.000782 min_lr: 0.000782 loss: 3.8391 (3.7115) weight_decay: 0.0500 (0.0500) time: 0.6385 data: 0.2682 max mem: 21002 Epoch: [218] [140/312] eta: 0:01:39 lr: 0.000781 min_lr: 0.000781 loss: 3.6658 (3.7061) weight_decay: 0.0500 (0.0500) time: 0.4412 data: 0.1446 max mem: 21002 Epoch: [218] [150/312] eta: 0:01:36 lr: 0.000781 min_lr: 0.000781 loss: 3.6658 (3.7034) weight_decay: 0.0500 (0.0500) time: 0.5703 data: 0.1941 max mem: 21002 Epoch: [218] [160/312] eta: 0:01:30 lr: 0.000780 min_lr: 0.000780 loss: 3.6343 (3.7056) weight_decay: 0.0500 (0.0500) time: 0.6784 data: 0.2994 max mem: 21002 Epoch: [218] [170/312] eta: 0:01:21 lr: 0.000780 min_lr: 0.000780 loss: 3.5797 (3.6976) weight_decay: 0.0500 (0.0500) time: 0.4039 data: 0.1119 max mem: 21002 Epoch: [218] [180/312] eta: 0:01:16 lr: 0.000779 min_lr: 0.000779 loss: 3.9611 (3.7113) weight_decay: 0.0500 (0.0500) time: 0.5003 data: 0.1649 max mem: 21002 Epoch: [218] [190/312] eta: 0:01:09 lr: 0.000778 min_lr: 0.000778 loss: 3.9888 (3.7122) weight_decay: 0.0500 (0.0500) time: 0.5108 data: 0.1824 max mem: 21002 Epoch: [218] [200/312] eta: 0:01:03 lr: 0.000778 min_lr: 0.000778 loss: 3.6755 (3.7039) weight_decay: 0.0500 (0.0500) time: 0.4636 data: 0.1801 max mem: 21002 Epoch: [218] [210/312] eta: 0:00:58 lr: 0.000777 min_lr: 0.000777 loss: 3.8145 (3.7149) weight_decay: 0.0500 (0.0500) time: 0.6340 data: 0.3491 max mem: 21002 Epoch: [218] [220/312] eta: 0:00:51 lr: 0.000777 min_lr: 0.000777 loss: 3.8411 (3.7177) weight_decay: 0.0500 (0.0500) time: 0.4722 data: 0.1870 max mem: 21002 Epoch: [218] [230/312] eta: 0:00:46 lr: 0.000776 min_lr: 0.000776 loss: 3.7786 (3.7141) weight_decay: 0.0500 (0.0500) time: 0.5189 data: 0.2339 max mem: 21002 Epoch: [218] [240/312] eta: 0:00:41 lr: 0.000776 min_lr: 0.000776 loss: 3.3324 (3.7156) weight_decay: 0.0500 (0.0500) time: 0.6988 data: 0.4113 max mem: 21002 Epoch: [218] [250/312] eta: 0:00:34 lr: 0.000775 min_lr: 0.000775 loss: 3.4315 (3.7102) weight_decay: 0.0500 (0.0500) time: 0.4702 data: 0.1829 max mem: 21002 Epoch: [218] [260/312] eta: 0:00:29 lr: 0.000774 min_lr: 0.000774 loss: 3.9417 (3.7154) weight_decay: 0.0500 (0.0500) time: 0.4859 data: 0.2013 max mem: 21002 Epoch: [218] [270/312] eta: 0:00:23 lr: 0.000774 min_lr: 0.000774 loss: 3.4087 (3.6965) weight_decay: 0.0500 (0.0500) time: 0.4814 data: 0.1964 max mem: 21002 Epoch: [218] [280/312] eta: 0:00:18 lr: 0.000773 min_lr: 0.000773 loss: 3.3332 (3.7000) weight_decay: 0.0500 (0.0500) time: 0.5078 data: 0.2205 max mem: 21002 Epoch: [218] [290/312] eta: 0:00:12 lr: 0.000773 min_lr: 0.000773 loss: 3.9417 (3.7031) weight_decay: 0.0500 (0.0500) time: 0.6225 data: 0.3273 max mem: 21002 Epoch: [218] [300/312] eta: 0:00:06 lr: 0.000772 min_lr: 0.000772 loss: 3.7260 (3.6937) weight_decay: 0.0500 (0.0500) time: 0.4063 data: 0.1177 max mem: 21002 Epoch: [218] [310/312] eta: 0:00:01 lr: 0.000772 min_lr: 0.000772 loss: 3.4911 (3.6916) weight_decay: 0.0500 (0.0500) time: 0.2875 data: 0.0106 max mem: 21002 Epoch: [218] [311/312] eta: 0:00:00 lr: 0.000772 min_lr: 0.000772 loss: 3.4911 (3.6930) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [218] Total time: 0:02:50 (0.5463 s / it) Averaged stats: lr: 0.000772 min_lr: 0.000772 loss: 3.4911 (3.7351) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:17 loss: 1.4196 (1.4196) acc1: 73.1771 (73.1771) acc5: 90.4948 (90.4948) time: 8.6461 data: 8.5271 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6504 (1.5911) acc1: 66.1458 (66.3520) acc5: 87.7604 (87.9200) time: 1.0552 data: 0.9475 max mem: 21002 Test: Total time: 0:00:09 (1.0891 s / it) * Acc@1 66.208 Acc@5 87.704 loss 1.596 Accuracy of the model on the 50000 test images: 66.2% Max accuracy: 67.16% Epoch: [219] [ 0/312] eta: 1:02:00 lr: 0.000771 min_lr: 0.000771 loss: 4.1134 (4.1134) weight_decay: 0.0500 (0.0500) time: 11.9253 data: 11.6397 max mem: 21002 Epoch: [219] [ 10/312] eta: 0:07:30 lr: 0.000771 min_lr: 0.000771 loss: 3.6651 (3.6879) weight_decay: 0.0500 (0.0500) time: 1.4915 data: 1.0587 max mem: 21002 Epoch: [219] [ 20/312] eta: 0:05:00 lr: 0.000770 min_lr: 0.000770 loss: 3.6651 (3.6695) weight_decay: 0.0500 (0.0500) time: 0.4832 data: 0.0765 max mem: 21002 Epoch: [219] [ 30/312] eta: 0:03:42 lr: 0.000770 min_lr: 0.000770 loss: 3.9580 (3.8177) weight_decay: 0.0500 (0.0500) time: 0.4016 data: 0.0766 max mem: 21002 Epoch: [219] [ 40/312] eta: 0:03:22 lr: 0.000769 min_lr: 0.000769 loss: 4.0327 (3.7583) weight_decay: 0.0500 (0.0500) time: 0.4437 data: 0.1564 max mem: 21002 Epoch: [219] [ 50/312] eta: 0:03:10 lr: 0.000769 min_lr: 0.000769 loss: 3.5465 (3.7120) weight_decay: 0.0500 (0.0500) time: 0.6329 data: 0.3064 max mem: 21002 Epoch: [219] [ 60/312] eta: 0:02:45 lr: 0.000768 min_lr: 0.000768 loss: 3.6241 (3.7207) weight_decay: 0.0500 (0.0500) time: 0.4746 data: 0.1508 max mem: 21002 Epoch: [219] [ 70/312] eta: 0:02:38 lr: 0.000767 min_lr: 0.000767 loss: 3.8712 (3.7205) weight_decay: 0.0500 (0.0500) time: 0.4641 data: 0.1340 max mem: 21002 Epoch: [219] [ 80/312] eta: 0:02:30 lr: 0.000767 min_lr: 0.000767 loss: 3.9148 (3.7603) weight_decay: 0.0500 (0.0500) time: 0.6328 data: 0.2742 max mem: 21002 Epoch: [219] [ 90/312] eta: 0:02:15 lr: 0.000766 min_lr: 0.000766 loss: 3.9103 (3.7600) weight_decay: 0.0500 (0.0500) time: 0.4597 data: 0.1411 max mem: 21002 Epoch: [219] [100/312] eta: 0:02:11 lr: 0.000766 min_lr: 0.000766 loss: 3.8744 (3.7620) weight_decay: 0.0500 (0.0500) time: 0.4935 data: 0.1469 max mem: 21002 Epoch: [219] [110/312] eta: 0:01:58 lr: 0.000765 min_lr: 0.000765 loss: 3.8761 (3.7505) weight_decay: 0.0500 (0.0500) time: 0.4894 data: 0.1469 max mem: 21002 Epoch: [219] [120/312] eta: 0:01:55 lr: 0.000765 min_lr: 0.000765 loss: 3.8691 (3.7426) weight_decay: 0.0500 (0.0500) time: 0.5085 data: 0.1095 max mem: 21002 Epoch: [219] [130/312] eta: 0:01:52 lr: 0.000764 min_lr: 0.000764 loss: 3.8870 (3.7555) weight_decay: 0.0500 (0.0500) time: 0.7671 data: 0.1908 max mem: 21002 Epoch: [219] [140/312] eta: 0:01:42 lr: 0.000764 min_lr: 0.000764 loss: 3.8870 (3.7566) weight_decay: 0.0500 (0.0500) time: 0.5610 data: 0.0974 max mem: 21002 Epoch: [219] [150/312] eta: 0:01:36 lr: 0.000763 min_lr: 0.000763 loss: 3.8484 (3.7605) weight_decay: 0.0500 (0.0500) time: 0.4534 data: 0.0620 max mem: 21002 Epoch: [219] [160/312] eta: 0:01:30 lr: 0.000762 min_lr: 0.000762 loss: 3.6927 (3.7530) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0466 max mem: 21002 Epoch: [219] [170/312] eta: 0:01:22 lr: 0.000762 min_lr: 0.000762 loss: 3.6671 (3.7443) weight_decay: 0.0500 (0.0500) time: 0.4909 data: 0.0278 max mem: 21002 Epoch: [219] [180/312] eta: 0:01:17 lr: 0.000761 min_lr: 0.000761 loss: 3.8125 (3.7502) weight_decay: 0.0500 (0.0500) time: 0.5323 data: 0.0819 max mem: 21002 Epoch: [219] [190/312] eta: 0:01:09 lr: 0.000761 min_lr: 0.000761 loss: 3.8066 (3.7307) weight_decay: 0.0500 (0.0500) time: 0.4755 data: 0.0549 max mem: 21002 Epoch: [219] [200/312] eta: 0:01:04 lr: 0.000760 min_lr: 0.000760 loss: 3.8055 (3.7307) weight_decay: 0.0500 (0.0500) time: 0.4522 data: 0.0339 max mem: 21002 Epoch: [219] [210/312] eta: 0:00:59 lr: 0.000760 min_lr: 0.000760 loss: 3.8633 (3.7338) weight_decay: 0.0500 (0.0500) time: 0.6708 data: 0.1031 max mem: 21002 Epoch: [219] [220/312] eta: 0:00:52 lr: 0.000759 min_lr: 0.000759 loss: 3.7135 (3.7223) weight_decay: 0.0500 (0.0500) time: 0.5104 data: 0.0701 max mem: 21002 Epoch: [219] [230/312] eta: 0:00:46 lr: 0.000758 min_lr: 0.000758 loss: 3.7852 (3.7287) weight_decay: 0.0500 (0.0500) time: 0.4407 data: 0.0613 max mem: 21002 Epoch: [219] [240/312] eta: 0:00:40 lr: 0.000758 min_lr: 0.000758 loss: 4.0046 (3.7313) weight_decay: 0.0500 (0.0500) time: 0.5771 data: 0.1512 max mem: 21002 Epoch: [219] [250/312] eta: 0:00:34 lr: 0.000757 min_lr: 0.000757 loss: 3.8944 (3.7349) weight_decay: 0.0500 (0.0500) time: 0.4694 data: 0.0909 max mem: 21002 Epoch: [219] [260/312] eta: 0:00:29 lr: 0.000757 min_lr: 0.000757 loss: 3.8829 (3.7406) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0616 max mem: 21002 Epoch: [219] [270/312] eta: 0:00:23 lr: 0.000756 min_lr: 0.000756 loss: 3.9442 (3.7424) weight_decay: 0.0500 (0.0500) time: 0.5471 data: 0.0615 max mem: 21002 Epoch: [219] [280/312] eta: 0:00:18 lr: 0.000756 min_lr: 0.000756 loss: 3.8810 (3.7412) weight_decay: 0.0500 (0.0500) time: 0.4977 data: 0.0111 max mem: 21002 Epoch: [219] [290/312] eta: 0:00:12 lr: 0.000755 min_lr: 0.000755 loss: 3.8542 (3.7358) weight_decay: 0.0500 (0.0500) time: 0.5735 data: 0.0107 max mem: 21002 Epoch: [219] [300/312] eta: 0:00:06 lr: 0.000755 min_lr: 0.000755 loss: 3.7603 (3.7381) weight_decay: 0.0500 (0.0500) time: 0.3670 data: 0.0002 max mem: 21002 Epoch: [219] [310/312] eta: 0:00:01 lr: 0.000754 min_lr: 0.000754 loss: 3.8155 (3.7311) weight_decay: 0.0500 (0.0500) time: 0.2832 data: 0.0001 max mem: 21002 Epoch: [219] [311/312] eta: 0:00:00 lr: 0.000754 min_lr: 0.000754 loss: 3.8155 (3.7288) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [219] Total time: 0:02:50 (0.5456 s / it) Averaged stats: lr: 0.000754 min_lr: 0.000754 loss: 3.8155 (3.7471) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:15 loss: 1.2494 (1.2494) acc1: 76.1719 (76.1719) acc5: 93.3594 (93.3594) time: 8.3720 data: 8.2544 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5783 (1.5342) acc1: 67.3177 (68.6880) acc5: 89.7135 (89.2640) time: 1.0251 data: 0.9173 max mem: 21002 Test: Total time: 0:00:09 (1.0413 s / it) * Acc@1 68.388 Acc@5 89.290 loss 1.541 Accuracy of the model on the 50000 test images: 68.4% Max accuracy: 68.39% Epoch: [220] [ 0/312] eta: 1:02:01 lr: 0.000754 min_lr: 0.000754 loss: 3.0950 (3.0950) weight_decay: 0.0500 (0.0500) time: 11.9286 data: 10.8271 max mem: 21002 Epoch: [220] [ 10/312] eta: 0:07:16 lr: 0.000753 min_lr: 0.000753 loss: 3.4853 (3.5469) weight_decay: 0.0500 (0.0500) time: 1.4450 data: 1.0805 max mem: 21002 Epoch: [220] [ 20/312] eta: 0:04:56 lr: 0.000753 min_lr: 0.000753 loss: 3.5567 (3.5812) weight_decay: 0.0500 (0.0500) time: 0.4698 data: 0.1648 max mem: 21002 Epoch: [220] [ 30/312] eta: 0:03:39 lr: 0.000752 min_lr: 0.000752 loss: 3.8175 (3.7153) weight_decay: 0.0500 (0.0500) time: 0.4141 data: 0.1122 max mem: 21002 Epoch: [220] [ 40/312] eta: 0:03:24 lr: 0.000752 min_lr: 0.000752 loss: 3.8519 (3.7030) weight_decay: 0.0500 (0.0500) time: 0.4725 data: 0.1864 max mem: 21002 Epoch: [220] [ 50/312] eta: 0:03:12 lr: 0.000751 min_lr: 0.000751 loss: 3.8519 (3.7315) weight_decay: 0.0500 (0.0500) time: 0.6631 data: 0.3722 max mem: 21002 Epoch: [220] [ 60/312] eta: 0:02:46 lr: 0.000750 min_lr: 0.000750 loss: 3.9428 (3.7597) weight_decay: 0.0500 (0.0500) time: 0.4748 data: 0.1864 max mem: 21002 Epoch: [220] [ 70/312] eta: 0:02:40 lr: 0.000750 min_lr: 0.000750 loss: 3.8298 (3.7223) weight_decay: 0.0500 (0.0500) time: 0.4898 data: 0.1976 max mem: 21002 Epoch: [220] [ 80/312] eta: 0:02:32 lr: 0.000749 min_lr: 0.000749 loss: 3.8114 (3.7366) weight_decay: 0.0500 (0.0500) time: 0.6538 data: 0.3356 max mem: 21002 Epoch: [220] [ 90/312] eta: 0:02:18 lr: 0.000749 min_lr: 0.000749 loss: 3.9461 (3.7620) weight_decay: 0.0500 (0.0500) time: 0.4729 data: 0.1640 max mem: 21002 Epoch: [220] [100/312] eta: 0:02:13 lr: 0.000748 min_lr: 0.000748 loss: 3.9623 (3.7797) weight_decay: 0.0500 (0.0500) time: 0.5042 data: 0.1814 max mem: 21002 Epoch: [220] [110/312] eta: 0:02:00 lr: 0.000748 min_lr: 0.000748 loss: 3.8097 (3.7791) weight_decay: 0.0500 (0.0500) time: 0.4821 data: 0.1561 max mem: 21002 Epoch: [220] [120/312] eta: 0:01:54 lr: 0.000747 min_lr: 0.000747 loss: 3.7262 (3.7655) weight_decay: 0.0500 (0.0500) time: 0.4455 data: 0.1457 max mem: 21002 Epoch: [220] [130/312] eta: 0:01:49 lr: 0.000747 min_lr: 0.000747 loss: 3.8027 (3.7739) weight_decay: 0.0500 (0.0500) time: 0.6365 data: 0.3305 max mem: 21002 Epoch: [220] [140/312] eta: 0:01:39 lr: 0.000746 min_lr: 0.000746 loss: 3.8065 (3.7648) weight_decay: 0.0500 (0.0500) time: 0.4797 data: 0.1854 max mem: 21002 Epoch: [220] [150/312] eta: 0:01:35 lr: 0.000745 min_lr: 0.000745 loss: 4.0072 (3.7763) weight_decay: 0.0500 (0.0500) time: 0.4909 data: 0.1787 max mem: 21002 Epoch: [220] [160/312] eta: 0:01:29 lr: 0.000745 min_lr: 0.000745 loss: 3.9642 (3.7660) weight_decay: 0.0500 (0.0500) time: 0.6592 data: 0.3308 max mem: 21002 Epoch: [220] [170/312] eta: 0:01:21 lr: 0.000744 min_lr: 0.000744 loss: 3.5211 (3.7609) weight_decay: 0.0500 (0.0500) time: 0.4585 data: 0.1574 max mem: 21002 Epoch: [220] [180/312] eta: 0:01:16 lr: 0.000744 min_lr: 0.000744 loss: 3.9256 (3.7646) weight_decay: 0.0500 (0.0500) time: 0.5016 data: 0.1843 max mem: 21002 Epoch: [220] [190/312] eta: 0:01:08 lr: 0.000743 min_lr: 0.000743 loss: 3.7528 (3.7548) weight_decay: 0.0500 (0.0500) time: 0.4985 data: 0.1796 max mem: 21002 Epoch: [220] [200/312] eta: 0:01:03 lr: 0.000743 min_lr: 0.000743 loss: 3.7102 (3.7477) weight_decay: 0.0500 (0.0500) time: 0.4722 data: 0.1660 max mem: 21002 Epoch: [220] [210/312] eta: 0:00:58 lr: 0.000742 min_lr: 0.000742 loss: 3.7972 (3.7579) weight_decay: 0.0500 (0.0500) time: 0.6512 data: 0.3446 max mem: 21002 Epoch: [220] [220/312] eta: 0:00:51 lr: 0.000742 min_lr: 0.000742 loss: 3.8447 (3.7539) weight_decay: 0.0500 (0.0500) time: 0.4868 data: 0.1795 max mem: 21002 Epoch: [220] [230/312] eta: 0:00:46 lr: 0.000741 min_lr: 0.000741 loss: 3.8767 (3.7651) weight_decay: 0.0500 (0.0500) time: 0.5090 data: 0.1791 max mem: 21002 Epoch: [220] [240/312] eta: 0:00:41 lr: 0.000740 min_lr: 0.000740 loss: 3.7989 (3.7613) weight_decay: 0.0500 (0.0500) time: 0.6609 data: 0.3501 max mem: 21002 Epoch: [220] [250/312] eta: 0:00:34 lr: 0.000740 min_lr: 0.000740 loss: 3.6708 (3.7586) weight_decay: 0.0500 (0.0500) time: 0.4957 data: 0.1718 max mem: 21002 Epoch: [220] [260/312] eta: 0:00:29 lr: 0.000739 min_lr: 0.000739 loss: 3.6755 (3.7551) weight_decay: 0.0500 (0.0500) time: 0.5104 data: 0.1751 max mem: 21002 Epoch: [220] [270/312] eta: 0:00:23 lr: 0.000739 min_lr: 0.000739 loss: 3.6755 (3.7464) weight_decay: 0.0500 (0.0500) time: 0.4749 data: 0.1752 max mem: 21002 Epoch: [220] [280/312] eta: 0:00:17 lr: 0.000738 min_lr: 0.000738 loss: 3.8109 (3.7492) weight_decay: 0.0500 (0.0500) time: 0.4839 data: 0.1622 max mem: 21002 Epoch: [220] [290/312] eta: 0:00:12 lr: 0.000738 min_lr: 0.000738 loss: 3.9916 (3.7421) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.2592 max mem: 21002 Epoch: [220] [300/312] eta: 0:00:06 lr: 0.000737 min_lr: 0.000737 loss: 3.7821 (3.7448) weight_decay: 0.0500 (0.0500) time: 0.3972 data: 0.0975 max mem: 21002 Epoch: [220] [310/312] eta: 0:00:01 lr: 0.000736 min_lr: 0.000736 loss: 4.1349 (3.7502) weight_decay: 0.0500 (0.0500) time: 0.2868 data: 0.0001 max mem: 21002 Epoch: [220] [311/312] eta: 0:00:00 lr: 0.000736 min_lr: 0.000736 loss: 4.1349 (3.7518) weight_decay: 0.0500 (0.0500) time: 0.2791 data: 0.0001 max mem: 21002 Epoch: [220] Total time: 0:02:49 (0.5436 s / it) Averaged stats: lr: 0.000736 min_lr: 0.000736 loss: 4.1349 (3.7070) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:12 loss: 1.4318 (1.4318) acc1: 75.1302 (75.1302) acc5: 92.5781 (92.5781) time: 8.0704 data: 7.9522 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.7769 (1.6535) acc1: 67.3177 (68.1920) acc5: 88.0208 (88.7040) time: 1.0107 data: 0.9134 max mem: 21002 Test: Total time: 0:00:09 (1.0250 s / it) * Acc@1 67.432 Acc@5 88.486 loss 1.663 Accuracy of the model on the 50000 test images: 67.4% Max accuracy: 68.39% Epoch: [221] [ 0/312] eta: 1:01:13 lr: 0.000736 min_lr: 0.000736 loss: 4.0058 (4.0058) weight_decay: 0.0500 (0.0500) time: 11.7742 data: 9.9685 max mem: 21002 Epoch: [221] [ 10/312] eta: 0:07:42 lr: 0.000736 min_lr: 0.000736 loss: 3.7865 (3.5934) weight_decay: 0.0500 (0.0500) time: 1.5303 data: 1.0057 max mem: 21002 Epoch: [221] [ 20/312] eta: 0:05:13 lr: 0.000735 min_lr: 0.000735 loss: 3.5111 (3.5902) weight_decay: 0.0500 (0.0500) time: 0.5383 data: 0.0980 max mem: 21002 Epoch: [221] [ 30/312] eta: 0:03:51 lr: 0.000735 min_lr: 0.000735 loss: 3.6728 (3.5918) weight_decay: 0.0500 (0.0500) time: 0.4300 data: 0.0436 max mem: 21002 Epoch: [221] [ 40/312] eta: 0:03:30 lr: 0.000734 min_lr: 0.000734 loss: 3.6845 (3.6271) weight_decay: 0.0500 (0.0500) time: 0.4572 data: 0.0720 max mem: 21002 Epoch: [221] [ 50/312] eta: 0:03:19 lr: 0.000734 min_lr: 0.000734 loss: 3.6845 (3.5901) weight_decay: 0.0500 (0.0500) time: 0.6695 data: 0.1727 max mem: 21002 Epoch: [221] [ 60/312] eta: 0:02:52 lr: 0.000733 min_lr: 0.000733 loss: 3.7453 (3.6435) weight_decay: 0.0500 (0.0500) time: 0.5018 data: 0.1013 max mem: 21002 Epoch: [221] [ 70/312] eta: 0:02:48 lr: 0.000732 min_lr: 0.000732 loss: 3.9496 (3.6724) weight_decay: 0.0500 (0.0500) time: 0.5296 data: 0.0390 max mem: 21002 Epoch: [221] [ 80/312] eta: 0:02:34 lr: 0.000732 min_lr: 0.000732 loss: 3.8604 (3.6799) weight_decay: 0.0500 (0.0500) time: 0.6149 data: 0.0390 max mem: 21002 Epoch: [221] [ 90/312] eta: 0:02:20 lr: 0.000731 min_lr: 0.000731 loss: 3.8604 (3.6862) weight_decay: 0.0500 (0.0500) time: 0.4088 data: 0.0163 max mem: 21002 Epoch: [221] [100/312] eta: 0:02:17 lr: 0.000731 min_lr: 0.000731 loss: 3.8143 (3.6835) weight_decay: 0.0500 (0.0500) time: 0.5719 data: 0.0502 max mem: 21002 Epoch: [221] [110/312] eta: 0:02:04 lr: 0.000730 min_lr: 0.000730 loss: 3.8779 (3.7108) weight_decay: 0.0500 (0.0500) time: 0.5407 data: 0.0346 max mem: 21002 Epoch: [221] [120/312] eta: 0:01:59 lr: 0.000730 min_lr: 0.000730 loss: 3.9407 (3.7254) weight_decay: 0.0500 (0.0500) time: 0.4824 data: 0.0008 max mem: 21002 Epoch: [221] [130/312] eta: 0:01:54 lr: 0.000729 min_lr: 0.000729 loss: 3.8823 (3.7212) weight_decay: 0.0500 (0.0500) time: 0.6829 data: 0.0268 max mem: 21002 Epoch: [221] [140/312] eta: 0:01:43 lr: 0.000729 min_lr: 0.000729 loss: 3.8823 (3.7298) weight_decay: 0.0500 (0.0500) time: 0.4947 data: 0.0275 max mem: 21002 Epoch: [221] [150/312] eta: 0:01:38 lr: 0.000728 min_lr: 0.000728 loss: 3.8476 (3.7286) weight_decay: 0.0500 (0.0500) time: 0.4954 data: 0.0289 max mem: 21002 Epoch: [221] [160/312] eta: 0:01:32 lr: 0.000727 min_lr: 0.000727 loss: 3.7387 (3.7221) weight_decay: 0.0500 (0.0500) time: 0.6618 data: 0.0298 max mem: 21002 Epoch: [221] [170/312] eta: 0:01:24 lr: 0.000727 min_lr: 0.000727 loss: 3.7218 (3.7189) weight_decay: 0.0500 (0.0500) time: 0.4894 data: 0.0331 max mem: 21002 Epoch: [221] [180/312] eta: 0:01:19 lr: 0.000726 min_lr: 0.000726 loss: 3.7218 (3.7143) weight_decay: 0.0500 (0.0500) time: 0.5560 data: 0.0734 max mem: 21002 Epoch: [221] [190/312] eta: 0:01:11 lr: 0.000726 min_lr: 0.000726 loss: 4.0159 (3.7312) weight_decay: 0.0500 (0.0500) time: 0.5223 data: 0.0427 max mem: 21002 Epoch: [221] [200/312] eta: 0:01:05 lr: 0.000725 min_lr: 0.000725 loss: 4.0170 (3.7323) weight_decay: 0.0500 (0.0500) time: 0.4180 data: 0.0007 max mem: 21002 Epoch: [221] [210/312] eta: 0:01:00 lr: 0.000725 min_lr: 0.000725 loss: 3.5489 (3.7300) weight_decay: 0.0500 (0.0500) time: 0.6191 data: 0.0017 max mem: 21002 Epoch: [221] [220/312] eta: 0:00:53 lr: 0.000724 min_lr: 0.000724 loss: 3.8173 (3.7339) weight_decay: 0.0500 (0.0500) time: 0.4913 data: 0.0017 max mem: 21002 Epoch: [221] [230/312] eta: 0:00:47 lr: 0.000724 min_lr: 0.000724 loss: 3.9714 (3.7435) weight_decay: 0.0500 (0.0500) time: 0.4930 data: 0.0010 max mem: 21002 Epoch: [221] [240/312] eta: 0:00:42 lr: 0.000723 min_lr: 0.000723 loss: 3.9714 (3.7422) weight_decay: 0.0500 (0.0500) time: 0.6938 data: 0.0081 max mem: 21002 Epoch: [221] [250/312] eta: 0:00:35 lr: 0.000722 min_lr: 0.000722 loss: 3.9401 (3.7455) weight_decay: 0.0500 (0.0500) time: 0.5261 data: 0.0430 max mem: 21002 Epoch: [221] [260/312] eta: 0:00:30 lr: 0.000722 min_lr: 0.000722 loss: 3.9865 (3.7528) weight_decay: 0.0500 (0.0500) time: 0.5499 data: 0.1149 max mem: 21002 Epoch: [221] [270/312] eta: 0:00:24 lr: 0.000721 min_lr: 0.000721 loss: 4.0041 (3.7676) weight_decay: 0.0500 (0.0500) time: 0.5141 data: 0.0798 max mem: 21002 Epoch: [221] [280/312] eta: 0:00:18 lr: 0.000721 min_lr: 0.000721 loss: 4.0051 (3.7748) weight_decay: 0.0500 (0.0500) time: 0.3945 data: 0.0052 max mem: 21002 Epoch: [221] [290/312] eta: 0:00:12 lr: 0.000720 min_lr: 0.000720 loss: 3.9082 (3.7659) weight_decay: 0.0500 (0.0500) time: 0.5278 data: 0.0204 max mem: 21002 Epoch: [221] [300/312] eta: 0:00:06 lr: 0.000720 min_lr: 0.000720 loss: 3.6281 (3.7602) weight_decay: 0.0500 (0.0500) time: 0.4137 data: 0.0155 max mem: 21002 Epoch: [221] [310/312] eta: 0:00:01 lr: 0.000719 min_lr: 0.000719 loss: 3.8141 (3.7627) weight_decay: 0.0500 (0.0500) time: 0.2768 data: 0.0001 max mem: 21002 Epoch: [221] [311/312] eta: 0:00:00 lr: 0.000719 min_lr: 0.000719 loss: 3.6823 (3.7612) weight_decay: 0.0500 (0.0500) time: 0.2768 data: 0.0001 max mem: 21002 Epoch: [221] Total time: 0:02:52 (0.5538 s / it) Averaged stats: lr: 0.000719 min_lr: 0.000719 loss: 3.6823 (3.7550) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.4117 (1.4117) acc1: 74.3490 (74.3490) acc5: 92.7083 (92.7083) time: 8.6857 data: 8.5677 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.8062 (1.6746) acc1: 65.3646 (66.5920) acc5: 87.7604 (87.9200) time: 1.0483 data: 0.9520 max mem: 21002 Test: Total time: 0:00:09 (1.0647 s / it) * Acc@1 65.820 Acc@5 87.652 loss 1.690 Accuracy of the model on the 50000 test images: 65.8% Max accuracy: 68.39% Epoch: [222] [ 0/312] eta: 0:59:07 lr: 0.000719 min_lr: 0.000719 loss: 4.5315 (4.5315) weight_decay: 0.0500 (0.0500) time: 11.3695 data: 11.0696 max mem: 21002 Epoch: [222] [ 10/312] eta: 0:08:50 lr: 0.000719 min_lr: 0.000719 loss: 3.9997 (3.9675) weight_decay: 0.0500 (0.0500) time: 1.7570 data: 1.0070 max mem: 21002 Epoch: [222] [ 20/312] eta: 0:05:09 lr: 0.000718 min_lr: 0.000718 loss: 3.8269 (3.7557) weight_decay: 0.0500 (0.0500) time: 0.5429 data: 0.0007 max mem: 21002 Epoch: [222] [ 30/312] eta: 0:03:48 lr: 0.000717 min_lr: 0.000717 loss: 3.5046 (3.7285) weight_decay: 0.0500 (0.0500) time: 0.2898 data: 0.0016 max mem: 21002 Epoch: [222] [ 40/312] eta: 0:03:05 lr: 0.000717 min_lr: 0.000717 loss: 3.5046 (3.6563) weight_decay: 0.0500 (0.0500) time: 0.2895 data: 0.0023 max mem: 21002 Epoch: [222] [ 50/312] eta: 0:02:49 lr: 0.000716 min_lr: 0.000716 loss: 3.5675 (3.6400) weight_decay: 0.0500 (0.0500) time: 0.3923 data: 0.0494 max mem: 21002 Epoch: [222] [ 60/312] eta: 0:02:37 lr: 0.000716 min_lr: 0.000716 loss: 3.7577 (3.6669) weight_decay: 0.0500 (0.0500) time: 0.5076 data: 0.1205 max mem: 21002 Epoch: [222] [ 70/312] eta: 0:02:29 lr: 0.000715 min_lr: 0.000715 loss: 3.8351 (3.6687) weight_decay: 0.0500 (0.0500) time: 0.5386 data: 0.1208 max mem: 21002 Epoch: [222] [ 80/312] eta: 0:02:19 lr: 0.000715 min_lr: 0.000715 loss: 3.5722 (3.6418) weight_decay: 0.0500 (0.0500) time: 0.5348 data: 0.0789 max mem: 21002 Epoch: [222] [ 90/312] eta: 0:02:11 lr: 0.000714 min_lr: 0.000714 loss: 3.5873 (3.6493) weight_decay: 0.0500 (0.0500) time: 0.5145 data: 0.0675 max mem: 21002 Epoch: [222] [100/312] eta: 0:02:03 lr: 0.000714 min_lr: 0.000714 loss: 3.7680 (3.6587) weight_decay: 0.0500 (0.0500) time: 0.5099 data: 0.0654 max mem: 21002 Epoch: [222] [110/312] eta: 0:01:56 lr: 0.000713 min_lr: 0.000713 loss: 3.9196 (3.6596) weight_decay: 0.0500 (0.0500) time: 0.5018 data: 0.0754 max mem: 21002 Epoch: [222] [120/312] eta: 0:01:51 lr: 0.000712 min_lr: 0.000712 loss: 4.0708 (3.6926) weight_decay: 0.0500 (0.0500) time: 0.5543 data: 0.0876 max mem: 21002 Epoch: [222] [130/312] eta: 0:01:44 lr: 0.000712 min_lr: 0.000712 loss: 3.5677 (3.6774) weight_decay: 0.0500 (0.0500) time: 0.5643 data: 0.0411 max mem: 21002 Epoch: [222] [140/312] eta: 0:01:38 lr: 0.000711 min_lr: 0.000711 loss: 3.4761 (3.6795) weight_decay: 0.0500 (0.0500) time: 0.5448 data: 0.0641 max mem: 21002 Epoch: [222] [150/312] eta: 0:01:32 lr: 0.000711 min_lr: 0.000711 loss: 3.8987 (3.6903) weight_decay: 0.0500 (0.0500) time: 0.5435 data: 0.0899 max mem: 21002 Epoch: [222] [160/312] eta: 0:01:26 lr: 0.000710 min_lr: 0.000710 loss: 3.7952 (3.6933) weight_decay: 0.0500 (0.0500) time: 0.5306 data: 0.0513 max mem: 21002 Epoch: [222] [170/312] eta: 0:01:19 lr: 0.000710 min_lr: 0.000710 loss: 3.8447 (3.6963) weight_decay: 0.0500 (0.0500) time: 0.5058 data: 0.0586 max mem: 21002 Epoch: [222] [180/312] eta: 0:01:14 lr: 0.000709 min_lr: 0.000709 loss: 3.4913 (3.6764) weight_decay: 0.0500 (0.0500) time: 0.5277 data: 0.0530 max mem: 21002 Epoch: [222] [190/312] eta: 0:01:08 lr: 0.000709 min_lr: 0.000709 loss: 3.4913 (3.6748) weight_decay: 0.0500 (0.0500) time: 0.5516 data: 0.0446 max mem: 21002 Epoch: [222] [200/312] eta: 0:01:02 lr: 0.000708 min_lr: 0.000708 loss: 3.7202 (3.6796) weight_decay: 0.0500 (0.0500) time: 0.5246 data: 0.0459 max mem: 21002 Epoch: [222] [210/312] eta: 0:00:57 lr: 0.000708 min_lr: 0.000708 loss: 3.6527 (3.6683) weight_decay: 0.0500 (0.0500) time: 0.5656 data: 0.0343 max mem: 21002 Epoch: [222] [220/312] eta: 0:00:51 lr: 0.000707 min_lr: 0.000707 loss: 3.7574 (3.6756) weight_decay: 0.0500 (0.0500) time: 0.5437 data: 0.0439 max mem: 21002 Epoch: [222] [230/312] eta: 0:00:45 lr: 0.000706 min_lr: 0.000706 loss: 3.8788 (3.6748) weight_decay: 0.0500 (0.0500) time: 0.5158 data: 0.0469 max mem: 21002 Epoch: [222] [240/312] eta: 0:00:40 lr: 0.000706 min_lr: 0.000706 loss: 3.9114 (3.6823) weight_decay: 0.0500 (0.0500) time: 0.5530 data: 0.0621 max mem: 21002 Epoch: [222] [250/312] eta: 0:00:34 lr: 0.000705 min_lr: 0.000705 loss: 3.7291 (3.6763) weight_decay: 0.0500 (0.0500) time: 0.5385 data: 0.0635 max mem: 21002 Epoch: [222] [260/312] eta: 0:00:28 lr: 0.000705 min_lr: 0.000705 loss: 3.6648 (3.6807) weight_decay: 0.0500 (0.0500) time: 0.5078 data: 0.0330 max mem: 21002 Epoch: [222] [270/312] eta: 0:00:23 lr: 0.000704 min_lr: 0.000704 loss: 3.7965 (3.6904) weight_decay: 0.0500 (0.0500) time: 0.5362 data: 0.0472 max mem: 21002 Epoch: [222] [280/312] eta: 0:00:17 lr: 0.000704 min_lr: 0.000704 loss: 3.9510 (3.6911) weight_decay: 0.0500 (0.0500) time: 0.5584 data: 0.0910 max mem: 21002 Epoch: [222] [290/312] eta: 0:00:12 lr: 0.000703 min_lr: 0.000703 loss: 3.7510 (3.6877) weight_decay: 0.0500 (0.0500) time: 0.5499 data: 0.0635 max mem: 21002 Epoch: [222] [300/312] eta: 0:00:06 lr: 0.000703 min_lr: 0.000703 loss: 3.7848 (3.6896) weight_decay: 0.0500 (0.0500) time: 0.4856 data: 0.0421 max mem: 21002 Epoch: [222] [310/312] eta: 0:00:01 lr: 0.000702 min_lr: 0.000702 loss: 3.8768 (3.6984) weight_decay: 0.0500 (0.0500) time: 0.3578 data: 0.0372 max mem: 21002 Epoch: [222] [311/312] eta: 0:00:00 lr: 0.000702 min_lr: 0.000702 loss: 3.8245 (3.6955) weight_decay: 0.0500 (0.0500) time: 0.3574 data: 0.0372 max mem: 21002 Epoch: [222] Total time: 0:02:50 (0.5451 s / it) Averaged stats: lr: 0.000702 min_lr: 0.000702 loss: 3.8245 (3.7327) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:13 loss: 1.3395 (1.3395) acc1: 75.7812 (75.7812) acc5: 92.9688 (92.9688) time: 8.1930 data: 8.0742 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6346 (1.5199) acc1: 66.7969 (68.3040) acc5: 88.4115 (88.5280) time: 1.0050 data: 0.9072 max mem: 21002 Test: Total time: 0:00:09 (1.0143 s / it) * Acc@1 67.954 Acc@5 88.550 loss 1.527 Accuracy of the model on the 50000 test images: 68.0% Max accuracy: 68.39% Epoch: [223] [ 0/312] eta: 1:01:03 lr: 0.000702 min_lr: 0.000702 loss: 2.9327 (2.9327) weight_decay: 0.0500 (0.0500) time: 11.7422 data: 8.7269 max mem: 21002 Epoch: [223] [ 10/312] eta: 0:07:53 lr: 0.000701 min_lr: 0.000701 loss: 3.6464 (3.5077) weight_decay: 0.0500 (0.0500) time: 1.5670 data: 0.9524 max mem: 21002 Epoch: [223] [ 20/312] eta: 0:05:11 lr: 0.000701 min_lr: 0.000701 loss: 3.7731 (3.6127) weight_decay: 0.0500 (0.0500) time: 0.5323 data: 0.0877 max mem: 21002 Epoch: [223] [ 30/312] eta: 0:03:50 lr: 0.000700 min_lr: 0.000700 loss: 3.7731 (3.5891) weight_decay: 0.0500 (0.0500) time: 0.4031 data: 0.0006 max mem: 21002 Epoch: [223] [ 40/312] eta: 0:03:17 lr: 0.000700 min_lr: 0.000700 loss: 3.9279 (3.6418) weight_decay: 0.0500 (0.0500) time: 0.3717 data: 0.0176 max mem: 21002 Epoch: [223] [ 50/312] eta: 0:03:02 lr: 0.000699 min_lr: 0.000699 loss: 3.8880 (3.6841) weight_decay: 0.0500 (0.0500) time: 0.5157 data: 0.1307 max mem: 21002 Epoch: [223] [ 60/312] eta: 0:02:39 lr: 0.000699 min_lr: 0.000699 loss: 3.8201 (3.6840) weight_decay: 0.0500 (0.0500) time: 0.4343 data: 0.1137 max mem: 21002 Epoch: [223] [ 70/312] eta: 0:02:36 lr: 0.000698 min_lr: 0.000698 loss: 3.8638 (3.7100) weight_decay: 0.0500 (0.0500) time: 0.5206 data: 0.1394 max mem: 21002 Epoch: [223] [ 80/312] eta: 0:02:26 lr: 0.000698 min_lr: 0.000698 loss: 3.6016 (3.6804) weight_decay: 0.0500 (0.0500) time: 0.6351 data: 0.1979 max mem: 21002 Epoch: [223] [ 90/312] eta: 0:02:15 lr: 0.000697 min_lr: 0.000697 loss: 3.8170 (3.6950) weight_decay: 0.0500 (0.0500) time: 0.4690 data: 0.0906 max mem: 21002 Epoch: [223] [100/312] eta: 0:02:12 lr: 0.000696 min_lr: 0.000696 loss: 3.8721 (3.6899) weight_decay: 0.0500 (0.0500) time: 0.6046 data: 0.1278 max mem: 21002 Epoch: [223] [110/312] eta: 0:02:00 lr: 0.000696 min_lr: 0.000696 loss: 3.8721 (3.6978) weight_decay: 0.0500 (0.0500) time: 0.5514 data: 0.1078 max mem: 21002 Epoch: [223] [120/312] eta: 0:01:54 lr: 0.000695 min_lr: 0.000695 loss: 3.8067 (3.6992) weight_decay: 0.0500 (0.0500) time: 0.4534 data: 0.0813 max mem: 21002 Epoch: [223] [130/312] eta: 0:01:47 lr: 0.000695 min_lr: 0.000695 loss: 3.5614 (3.6813) weight_decay: 0.0500 (0.0500) time: 0.5635 data: 0.1502 max mem: 21002 Epoch: [223] [140/312] eta: 0:01:39 lr: 0.000694 min_lr: 0.000694 loss: 3.5614 (3.6801) weight_decay: 0.0500 (0.0500) time: 0.4768 data: 0.1091 max mem: 21002 Epoch: [223] [150/312] eta: 0:01:35 lr: 0.000694 min_lr: 0.000694 loss: 3.9844 (3.6944) weight_decay: 0.0500 (0.0500) time: 0.5528 data: 0.1554 max mem: 21002 Epoch: [223] [160/312] eta: 0:01:28 lr: 0.000693 min_lr: 0.000693 loss: 3.9563 (3.6983) weight_decay: 0.0500 (0.0500) time: 0.5764 data: 0.1807 max mem: 21002 Epoch: [223] [170/312] eta: 0:01:21 lr: 0.000693 min_lr: 0.000693 loss: 3.5830 (3.6936) weight_decay: 0.0500 (0.0500) time: 0.4815 data: 0.1266 max mem: 21002 Epoch: [223] [180/312] eta: 0:01:16 lr: 0.000692 min_lr: 0.000692 loss: 3.7949 (3.6977) weight_decay: 0.0500 (0.0500) time: 0.5628 data: 0.1922 max mem: 21002 Epoch: [223] [190/312] eta: 0:01:08 lr: 0.000692 min_lr: 0.000692 loss: 3.7949 (3.7021) weight_decay: 0.0500 (0.0500) time: 0.4585 data: 0.1196 max mem: 21002 Epoch: [223] [200/312] eta: 0:01:03 lr: 0.000691 min_lr: 0.000691 loss: 3.8879 (3.7097) weight_decay: 0.0500 (0.0500) time: 0.4909 data: 0.1884 max mem: 21002 Epoch: [223] [210/312] eta: 0:00:57 lr: 0.000690 min_lr: 0.000690 loss: 3.8879 (3.7116) weight_decay: 0.0500 (0.0500) time: 0.5963 data: 0.2838 max mem: 21002 Epoch: [223] [220/312] eta: 0:00:51 lr: 0.000690 min_lr: 0.000690 loss: 3.9596 (3.7234) weight_decay: 0.0500 (0.0500) time: 0.5164 data: 0.1803 max mem: 21002 Epoch: [223] [230/312] eta: 0:00:46 lr: 0.000689 min_lr: 0.000689 loss: 3.8643 (3.7184) weight_decay: 0.0500 (0.0500) time: 0.6035 data: 0.2486 max mem: 21002 Epoch: [223] [240/312] eta: 0:00:40 lr: 0.000689 min_lr: 0.000689 loss: 3.5398 (3.7038) weight_decay: 0.0500 (0.0500) time: 0.6182 data: 0.2283 max mem: 21002 Epoch: [223] [250/312] eta: 0:00:35 lr: 0.000688 min_lr: 0.000688 loss: 3.3931 (3.6985) weight_decay: 0.0500 (0.0500) time: 0.5497 data: 0.1680 max mem: 21002 Epoch: [223] [260/312] eta: 0:00:29 lr: 0.000688 min_lr: 0.000688 loss: 3.6589 (3.7015) weight_decay: 0.0500 (0.0500) time: 0.6114 data: 0.2278 max mem: 21002 Epoch: [223] [270/312] eta: 0:00:23 lr: 0.000687 min_lr: 0.000687 loss: 3.6640 (3.6951) weight_decay: 0.0500 (0.0500) time: 0.4903 data: 0.1255 max mem: 21002 Epoch: [223] [280/312] eta: 0:00:18 lr: 0.000687 min_lr: 0.000687 loss: 3.5531 (3.6842) weight_decay: 0.0500 (0.0500) time: 0.4779 data: 0.0979 max mem: 21002 Epoch: [223] [290/312] eta: 0:00:12 lr: 0.000686 min_lr: 0.000686 loss: 3.5531 (3.6833) weight_decay: 0.0500 (0.0500) time: 0.5550 data: 0.0963 max mem: 21002 Epoch: [223] [300/312] eta: 0:00:06 lr: 0.000686 min_lr: 0.000686 loss: 3.5585 (3.6805) weight_decay: 0.0500 (0.0500) time: 0.4042 data: 0.0429 max mem: 21002 Epoch: [223] [310/312] eta: 0:00:01 lr: 0.000685 min_lr: 0.000685 loss: 3.8189 (3.6844) weight_decay: 0.0500 (0.0500) time: 0.3191 data: 0.0428 max mem: 21002 Epoch: [223] [311/312] eta: 0:00:00 lr: 0.000685 min_lr: 0.000685 loss: 3.8189 (3.6842) weight_decay: 0.0500 (0.0500) time: 0.3191 data: 0.0428 max mem: 21002 Epoch: [223] Total time: 0:02:51 (0.5487 s / it) Averaged stats: lr: 0.000685 min_lr: 0.000685 loss: 3.8189 (3.7194) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:12 loss: 1.2609 (1.2609) acc1: 75.9115 (75.9115) acc5: 94.1406 (94.1406) time: 8.0601 data: 7.9420 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5423 (1.4697) acc1: 67.8385 (68.5920) acc5: 89.4531 (89.6480) time: 1.0306 data: 0.9328 max mem: 21002 Test: Total time: 0:00:09 (1.0439 s / it) * Acc@1 68.976 Acc@5 89.148 loss 1.479 Accuracy of the model on the 50000 test images: 69.0% Max accuracy: 68.98% Epoch: [224] [ 0/312] eta: 1:00:52 lr: 0.000685 min_lr: 0.000685 loss: 4.0429 (4.0429) weight_decay: 0.0500 (0.0500) time: 11.7079 data: 10.1437 max mem: 21002 Epoch: [224] [ 10/312] eta: 0:08:14 lr: 0.000684 min_lr: 0.000684 loss: 3.3992 (3.5829) weight_decay: 0.0500 (0.0500) time: 1.6390 data: 1.0362 max mem: 21002 Epoch: [224] [ 20/312] eta: 0:05:12 lr: 0.000684 min_lr: 0.000684 loss: 3.3992 (3.5144) weight_decay: 0.0500 (0.0500) time: 0.5385 data: 0.1033 max mem: 21002 Epoch: [224] [ 30/312] eta: 0:03:50 lr: 0.000683 min_lr: 0.000683 loss: 3.4941 (3.5116) weight_decay: 0.0500 (0.0500) time: 0.3670 data: 0.0409 max mem: 21002 Epoch: [224] [ 40/312] eta: 0:03:12 lr: 0.000683 min_lr: 0.000683 loss: 3.4079 (3.5111) weight_decay: 0.0500 (0.0500) time: 0.3255 data: 0.0014 max mem: 21002 Epoch: [224] [ 50/312] eta: 0:03:05 lr: 0.000682 min_lr: 0.000682 loss: 3.6080 (3.5442) weight_decay: 0.0500 (0.0500) time: 0.5390 data: 0.1073 max mem: 21002 Epoch: [224] [ 60/312] eta: 0:02:41 lr: 0.000682 min_lr: 0.000682 loss: 3.6591 (3.5640) weight_decay: 0.0500 (0.0500) time: 0.5102 data: 0.1165 max mem: 21002 Epoch: [224] [ 70/312] eta: 0:02:32 lr: 0.000681 min_lr: 0.000681 loss: 3.7793 (3.5874) weight_decay: 0.0500 (0.0500) time: 0.4278 data: 0.1274 max mem: 21002 Epoch: [224] [ 80/312] eta: 0:02:27 lr: 0.000681 min_lr: 0.000681 loss: 3.9200 (3.6058) weight_decay: 0.0500 (0.0500) time: 0.6237 data: 0.2897 max mem: 21002 Epoch: [224] [ 90/312] eta: 0:02:13 lr: 0.000680 min_lr: 0.000680 loss: 3.8083 (3.6321) weight_decay: 0.0500 (0.0500) time: 0.4949 data: 0.1741 max mem: 21002 Epoch: [224] [100/312] eta: 0:02:07 lr: 0.000680 min_lr: 0.000680 loss: 3.9697 (3.6550) weight_decay: 0.0500 (0.0500) time: 0.4546 data: 0.1391 max mem: 21002 Epoch: [224] [110/312] eta: 0:01:55 lr: 0.000679 min_lr: 0.000679 loss: 3.7250 (3.6298) weight_decay: 0.0500 (0.0500) time: 0.4518 data: 0.1379 max mem: 21002 Epoch: [224] [120/312] eta: 0:01:52 lr: 0.000678 min_lr: 0.000678 loss: 3.7250 (3.6564) weight_decay: 0.0500 (0.0500) time: 0.4924 data: 0.1530 max mem: 21002 Epoch: [224] [130/312] eta: 0:01:47 lr: 0.000678 min_lr: 0.000678 loss: 3.9385 (3.6687) weight_decay: 0.0500 (0.0500) time: 0.6780 data: 0.2892 max mem: 21002 Epoch: [224] [140/312] eta: 0:01:37 lr: 0.000677 min_lr: 0.000677 loss: 3.8387 (3.6720) weight_decay: 0.0500 (0.0500) time: 0.4731 data: 0.1368 max mem: 21002 Epoch: [224] [150/312] eta: 0:01:33 lr: 0.000677 min_lr: 0.000677 loss: 3.7220 (3.6765) weight_decay: 0.0500 (0.0500) time: 0.4788 data: 0.1357 max mem: 21002 Epoch: [224] [160/312] eta: 0:01:27 lr: 0.000676 min_lr: 0.000676 loss: 3.9610 (3.6867) weight_decay: 0.0500 (0.0500) time: 0.6491 data: 0.2518 max mem: 21002 Epoch: [224] [170/312] eta: 0:01:20 lr: 0.000676 min_lr: 0.000676 loss: 3.8976 (3.6802) weight_decay: 0.0500 (0.0500) time: 0.5162 data: 0.1486 max mem: 21002 Epoch: [224] [180/312] eta: 0:01:16 lr: 0.000675 min_lr: 0.000675 loss: 3.8976 (3.6890) weight_decay: 0.0500 (0.0500) time: 0.5615 data: 0.1388 max mem: 21002 Epoch: [224] [190/312] eta: 0:01:08 lr: 0.000675 min_lr: 0.000675 loss: 3.8401 (3.6849) weight_decay: 0.0500 (0.0500) time: 0.5013 data: 0.1068 max mem: 21002 Epoch: [224] [200/312] eta: 0:01:03 lr: 0.000674 min_lr: 0.000674 loss: 3.4365 (3.6669) weight_decay: 0.0500 (0.0500) time: 0.4551 data: 0.0784 max mem: 21002 Epoch: [224] [210/312] eta: 0:00:58 lr: 0.000674 min_lr: 0.000674 loss: 3.4181 (3.6623) weight_decay: 0.0500 (0.0500) time: 0.6550 data: 0.1721 max mem: 21002 Epoch: [224] [220/312] eta: 0:00:51 lr: 0.000673 min_lr: 0.000673 loss: 3.8659 (3.6660) weight_decay: 0.0500 (0.0500) time: 0.4863 data: 0.0943 max mem: 21002 Epoch: [224] [230/312] eta: 0:00:46 lr: 0.000673 min_lr: 0.000673 loss: 3.7217 (3.6580) weight_decay: 0.0500 (0.0500) time: 0.4927 data: 0.1045 max mem: 21002 Epoch: [224] [240/312] eta: 0:00:40 lr: 0.000672 min_lr: 0.000672 loss: 3.6973 (3.6634) weight_decay: 0.0500 (0.0500) time: 0.6448 data: 0.2066 max mem: 21002 Epoch: [224] [250/312] eta: 0:00:34 lr: 0.000671 min_lr: 0.000671 loss: 3.6064 (3.6546) weight_decay: 0.0500 (0.0500) time: 0.4452 data: 0.1029 max mem: 21002 Epoch: [224] [260/312] eta: 0:00:29 lr: 0.000671 min_lr: 0.000671 loss: 3.6777 (3.6571) weight_decay: 0.0500 (0.0500) time: 0.4983 data: 0.1285 max mem: 21002 Epoch: [224] [270/312] eta: 0:00:23 lr: 0.000670 min_lr: 0.000670 loss: 3.5504 (3.6468) weight_decay: 0.0500 (0.0500) time: 0.4923 data: 0.1284 max mem: 21002 Epoch: [224] [280/312] eta: 0:00:17 lr: 0.000670 min_lr: 0.000670 loss: 3.6025 (3.6494) weight_decay: 0.0500 (0.0500) time: 0.4913 data: 0.1191 max mem: 21002 Epoch: [224] [290/312] eta: 0:00:12 lr: 0.000669 min_lr: 0.000669 loss: 3.7203 (3.6438) weight_decay: 0.0500 (0.0500) time: 0.6882 data: 0.2210 max mem: 21002 Epoch: [224] [300/312] eta: 0:00:06 lr: 0.000669 min_lr: 0.000669 loss: 3.8423 (3.6516) weight_decay: 0.0500 (0.0500) time: 0.4789 data: 0.1023 max mem: 21002 Epoch: [224] [310/312] eta: 0:00:01 lr: 0.000668 min_lr: 0.000668 loss: 3.7531 (3.6529) weight_decay: 0.0500 (0.0500) time: 0.2795 data: 0.0001 max mem: 21002 Epoch: [224] [311/312] eta: 0:00:00 lr: 0.000668 min_lr: 0.000668 loss: 3.7896 (3.6534) weight_decay: 0.0500 (0.0500) time: 0.2794 data: 0.0001 max mem: 21002 Epoch: [224] Total time: 0:02:49 (0.5435 s / it) Averaged stats: lr: 0.000668 min_lr: 0.000668 loss: 3.7896 (3.7116) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.3300 (1.3300) acc1: 76.0417 (76.0417) acc5: 92.5781 (92.5781) time: 8.7747 data: 8.6565 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5581 (1.5165) acc1: 67.7083 (68.5440) acc5: 88.8021 (89.0400) time: 1.0605 data: 0.9619 max mem: 21002 Test: Total time: 0:00:09 (1.0766 s / it) * Acc@1 68.956 Acc@5 89.320 loss 1.522 Accuracy of the model on the 50000 test images: 69.0% Max accuracy: 68.98% Epoch: [225] [ 0/312] eta: 1:04:03 lr: 0.000668 min_lr: 0.000668 loss: 3.3149 (3.3149) weight_decay: 0.0500 (0.0500) time: 12.3192 data: 8.6555 max mem: 21002 Epoch: [225] [ 10/312] eta: 0:08:23 lr: 0.000668 min_lr: 0.000668 loss: 3.7891 (3.5525) weight_decay: 0.0500 (0.0500) time: 1.6685 data: 1.0796 max mem: 21002 Epoch: [225] [ 20/312] eta: 0:05:05 lr: 0.000667 min_lr: 0.000667 loss: 3.8153 (3.6562) weight_decay: 0.0500 (0.0500) time: 0.4836 data: 0.1796 max mem: 21002 Epoch: [225] [ 30/312] eta: 0:03:46 lr: 0.000667 min_lr: 0.000667 loss: 3.8669 (3.7088) weight_decay: 0.0500 (0.0500) time: 0.3263 data: 0.0194 max mem: 21002 Epoch: [225] [ 40/312] eta: 0:03:11 lr: 0.000666 min_lr: 0.000666 loss: 3.9443 (3.7660) weight_decay: 0.0500 (0.0500) time: 0.3475 data: 0.0082 max mem: 21002 Epoch: [225] [ 50/312] eta: 0:03:06 lr: 0.000665 min_lr: 0.000665 loss: 3.9443 (3.7730) weight_decay: 0.0500 (0.0500) time: 0.5755 data: 0.1161 max mem: 21002 Epoch: [225] [ 60/312] eta: 0:02:42 lr: 0.000665 min_lr: 0.000665 loss: 3.8029 (3.7493) weight_decay: 0.0500 (0.0500) time: 0.5167 data: 0.1092 max mem: 21002 Epoch: [225] [ 70/312] eta: 0:02:32 lr: 0.000664 min_lr: 0.000664 loss: 3.6577 (3.7474) weight_decay: 0.0500 (0.0500) time: 0.4231 data: 0.0698 max mem: 21002 Epoch: [225] [ 80/312] eta: 0:02:24 lr: 0.000664 min_lr: 0.000664 loss: 3.7811 (3.7265) weight_decay: 0.0500 (0.0500) time: 0.5624 data: 0.1102 max mem: 21002 Epoch: [225] [ 90/312] eta: 0:02:15 lr: 0.000663 min_lr: 0.000663 loss: 3.5194 (3.7105) weight_decay: 0.0500 (0.0500) time: 0.5250 data: 0.1371 max mem: 21002 Epoch: [225] [100/312] eta: 0:02:09 lr: 0.000663 min_lr: 0.000663 loss: 3.8608 (3.7284) weight_decay: 0.0500 (0.0500) time: 0.5508 data: 0.1801 max mem: 21002 Epoch: [225] [110/312] eta: 0:01:57 lr: 0.000662 min_lr: 0.000662 loss: 3.9903 (3.7470) weight_decay: 0.0500 (0.0500) time: 0.4543 data: 0.0844 max mem: 21002 Epoch: [225] [120/312] eta: 0:01:53 lr: 0.000662 min_lr: 0.000662 loss: 3.7554 (3.7395) weight_decay: 0.0500 (0.0500) time: 0.4852 data: 0.1083 max mem: 21002 Epoch: [225] [130/312] eta: 0:01:49 lr: 0.000661 min_lr: 0.000661 loss: 3.5651 (3.7265) weight_decay: 0.0500 (0.0500) time: 0.7169 data: 0.2272 max mem: 21002 Epoch: [225] [140/312] eta: 0:01:39 lr: 0.000661 min_lr: 0.000661 loss: 3.9720 (3.7268) weight_decay: 0.0500 (0.0500) time: 0.5218 data: 0.1201 max mem: 21002 Epoch: [225] [150/312] eta: 0:01:34 lr: 0.000660 min_lr: 0.000660 loss: 3.9720 (3.7289) weight_decay: 0.0500 (0.0500) time: 0.4783 data: 0.1066 max mem: 21002 Epoch: [225] [160/312] eta: 0:01:27 lr: 0.000660 min_lr: 0.000660 loss: 3.7309 (3.7225) weight_decay: 0.0500 (0.0500) time: 0.5672 data: 0.1065 max mem: 21002 Epoch: [225] [170/312] eta: 0:01:21 lr: 0.000659 min_lr: 0.000659 loss: 3.8547 (3.7252) weight_decay: 0.0500 (0.0500) time: 0.4872 data: 0.1099 max mem: 21002 Epoch: [225] [180/312] eta: 0:01:16 lr: 0.000659 min_lr: 0.000659 loss: 3.9133 (3.7312) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.1854 max mem: 21002 Epoch: [225] [190/312] eta: 0:01:08 lr: 0.000658 min_lr: 0.000658 loss: 3.9272 (3.7346) weight_decay: 0.0500 (0.0500) time: 0.4775 data: 0.0763 max mem: 21002 Epoch: [225] [200/312] eta: 0:01:04 lr: 0.000657 min_lr: 0.000657 loss: 3.9950 (3.7444) weight_decay: 0.0500 (0.0500) time: 0.5172 data: 0.0766 max mem: 21002 Epoch: [225] [210/312] eta: 0:00:58 lr: 0.000657 min_lr: 0.000657 loss: 3.8446 (3.7439) weight_decay: 0.0500 (0.0500) time: 0.6725 data: 0.1189 max mem: 21002 Epoch: [225] [220/312] eta: 0:00:51 lr: 0.000656 min_lr: 0.000656 loss: 3.8446 (3.7479) weight_decay: 0.0500 (0.0500) time: 0.4465 data: 0.0432 max mem: 21002 Epoch: [225] [230/312] eta: 0:00:46 lr: 0.000656 min_lr: 0.000656 loss: 3.9291 (3.7437) weight_decay: 0.0500 (0.0500) time: 0.4953 data: 0.0725 max mem: 21002 Epoch: [225] [240/312] eta: 0:00:40 lr: 0.000655 min_lr: 0.000655 loss: 3.9291 (3.7521) weight_decay: 0.0500 (0.0500) time: 0.5410 data: 0.0727 max mem: 21002 Epoch: [225] [250/312] eta: 0:00:34 lr: 0.000655 min_lr: 0.000655 loss: 3.9655 (3.7537) weight_decay: 0.0500 (0.0500) time: 0.4815 data: 0.0575 max mem: 21002 Epoch: [225] [260/312] eta: 0:00:29 lr: 0.000654 min_lr: 0.000654 loss: 3.8919 (3.7546) weight_decay: 0.0500 (0.0500) time: 0.6436 data: 0.0987 max mem: 21002 Epoch: [225] [270/312] eta: 0:00:23 lr: 0.000654 min_lr: 0.000654 loss: 3.3630 (3.7398) weight_decay: 0.0500 (0.0500) time: 0.4985 data: 0.0421 max mem: 21002 Epoch: [225] [280/312] eta: 0:00:17 lr: 0.000653 min_lr: 0.000653 loss: 3.4857 (3.7403) weight_decay: 0.0500 (0.0500) time: 0.4863 data: 0.0393 max mem: 21002 Epoch: [225] [290/312] eta: 0:00:12 lr: 0.000653 min_lr: 0.000653 loss: 3.8673 (3.7504) weight_decay: 0.0500 (0.0500) time: 0.6444 data: 0.0551 max mem: 21002 Epoch: [225] [300/312] eta: 0:00:06 lr: 0.000652 min_lr: 0.000652 loss: 3.8185 (3.7408) weight_decay: 0.0500 (0.0500) time: 0.4436 data: 0.0198 max mem: 21002 Epoch: [225] [310/312] eta: 0:00:01 lr: 0.000652 min_lr: 0.000652 loss: 3.5028 (3.7344) weight_decay: 0.0500 (0.0500) time: 0.2806 data: 0.0036 max mem: 21002 Epoch: [225] [311/312] eta: 0:00:00 lr: 0.000652 min_lr: 0.000652 loss: 3.5028 (3.7345) weight_decay: 0.0500 (0.0500) time: 0.2804 data: 0.0036 max mem: 21002 Epoch: [225] Total time: 0:02:50 (0.5465 s / it) Averaged stats: lr: 0.000652 min_lr: 0.000652 loss: 3.5028 (3.7252) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:14 loss: 1.2245 (1.2245) acc1: 77.0833 (77.0833) acc5: 92.7083 (92.7083) time: 8.3065 data: 8.1882 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6032 (1.4717) acc1: 68.0990 (69.5200) acc5: 89.4531 (89.3760) time: 1.0391 data: 0.9376 max mem: 21002 Test: Total time: 0:00:09 (1.0501 s / it) * Acc@1 69.258 Acc@5 89.448 loss 1.482 Accuracy of the model on the 50000 test images: 69.3% Max accuracy: 69.26% Epoch: [226] [ 0/312] eta: 1:00:36 lr: 0.000651 min_lr: 0.000651 loss: 3.0311 (3.0311) weight_decay: 0.0500 (0.0500) time: 11.6540 data: 11.3159 max mem: 21002 Epoch: [226] [ 10/312] eta: 0:07:42 lr: 0.000651 min_lr: 0.000651 loss: 3.0311 (3.4373) weight_decay: 0.0500 (0.0500) time: 1.5318 data: 1.0293 max mem: 21002 Epoch: [226] [ 20/312] eta: 0:04:57 lr: 0.000650 min_lr: 0.000650 loss: 3.8635 (3.6448) weight_decay: 0.0500 (0.0500) time: 0.4877 data: 0.0681 max mem: 21002 Epoch: [226] [ 30/312] eta: 0:03:40 lr: 0.000650 min_lr: 0.000650 loss: 3.9393 (3.6510) weight_decay: 0.0500 (0.0500) time: 0.3717 data: 0.0682 max mem: 21002 Epoch: [226] [ 40/312] eta: 0:03:18 lr: 0.000649 min_lr: 0.000649 loss: 3.9060 (3.7219) weight_decay: 0.0500 (0.0500) time: 0.4229 data: 0.1183 max mem: 21002 Epoch: [226] [ 50/312] eta: 0:03:15 lr: 0.000649 min_lr: 0.000649 loss: 3.8500 (3.6858) weight_decay: 0.0500 (0.0500) time: 0.6850 data: 0.2592 max mem: 21002 Epoch: [226] [ 60/312] eta: 0:02:49 lr: 0.000648 min_lr: 0.000648 loss: 3.6779 (3.7224) weight_decay: 0.0500 (0.0500) time: 0.5569 data: 0.1497 max mem: 21002 Epoch: [226] [ 70/312] eta: 0:02:37 lr: 0.000648 min_lr: 0.000648 loss: 3.5938 (3.6726) weight_decay: 0.0500 (0.0500) time: 0.4159 data: 0.1013 max mem: 21002 Epoch: [226] [ 80/312] eta: 0:02:30 lr: 0.000647 min_lr: 0.000647 loss: 3.7147 (3.7005) weight_decay: 0.0500 (0.0500) time: 0.5843 data: 0.2165 max mem: 21002 Epoch: [226] [ 90/312] eta: 0:02:16 lr: 0.000647 min_lr: 0.000647 loss: 3.8109 (3.7039) weight_decay: 0.0500 (0.0500) time: 0.4885 data: 0.1515 max mem: 21002 Epoch: [226] [100/312] eta: 0:02:13 lr: 0.000646 min_lr: 0.000646 loss: 3.9307 (3.7071) weight_decay: 0.0500 (0.0500) time: 0.5529 data: 0.1597 max mem: 21002 Epoch: [226] [110/312] eta: 0:02:01 lr: 0.000646 min_lr: 0.000646 loss: 3.9988 (3.7253) weight_decay: 0.0500 (0.0500) time: 0.5281 data: 0.1323 max mem: 21002 Epoch: [226] [120/312] eta: 0:01:57 lr: 0.000645 min_lr: 0.000645 loss: 4.0087 (3.7394) weight_decay: 0.0500 (0.0500) time: 0.5026 data: 0.0908 max mem: 21002 Epoch: [226] [130/312] eta: 0:01:52 lr: 0.000645 min_lr: 0.000645 loss: 3.9204 (3.7575) weight_decay: 0.0500 (0.0500) time: 0.7002 data: 0.1606 max mem: 21002 Epoch: [226] [140/312] eta: 0:01:41 lr: 0.000644 min_lr: 0.000644 loss: 3.8079 (3.7381) weight_decay: 0.0500 (0.0500) time: 0.4906 data: 0.0750 max mem: 21002 Epoch: [226] [150/312] eta: 0:01:35 lr: 0.000644 min_lr: 0.000644 loss: 3.5694 (3.7403) weight_decay: 0.0500 (0.0500) time: 0.4370 data: 0.0562 max mem: 21002 Epoch: [226] [160/312] eta: 0:01:30 lr: 0.000643 min_lr: 0.000643 loss: 3.8783 (3.7358) weight_decay: 0.0500 (0.0500) time: 0.5871 data: 0.0525 max mem: 21002 Epoch: [226] [170/312] eta: 0:01:23 lr: 0.000642 min_lr: 0.000642 loss: 3.8418 (3.7446) weight_decay: 0.0500 (0.0500) time: 0.5370 data: 0.0962 max mem: 21002 Epoch: [226] [180/312] eta: 0:01:17 lr: 0.000642 min_lr: 0.000642 loss: 3.8418 (3.7427) weight_decay: 0.0500 (0.0500) time: 0.5203 data: 0.1442 max mem: 21002 Epoch: [226] [190/312] eta: 0:01:09 lr: 0.000641 min_lr: 0.000641 loss: 3.7749 (3.7288) weight_decay: 0.0500 (0.0500) time: 0.4262 data: 0.0498 max mem: 21002 Epoch: [226] [200/312] eta: 0:01:04 lr: 0.000641 min_lr: 0.000641 loss: 3.6451 (3.7210) weight_decay: 0.0500 (0.0500) time: 0.5250 data: 0.1180 max mem: 21002 Epoch: [226] [210/312] eta: 0:00:59 lr: 0.000640 min_lr: 0.000640 loss: 3.6451 (3.7092) weight_decay: 0.0500 (0.0500) time: 0.6857 data: 0.1968 max mem: 21002 Epoch: [226] [220/312] eta: 0:00:52 lr: 0.000640 min_lr: 0.000640 loss: 3.9112 (3.7182) weight_decay: 0.0500 (0.0500) time: 0.4483 data: 0.0799 max mem: 21002 Epoch: [226] [230/312] eta: 0:00:46 lr: 0.000639 min_lr: 0.000639 loss: 4.1432 (3.7390) weight_decay: 0.0500 (0.0500) time: 0.4945 data: 0.0763 max mem: 21002 Epoch: [226] [240/312] eta: 0:00:41 lr: 0.000639 min_lr: 0.000639 loss: 3.9750 (3.7377) weight_decay: 0.0500 (0.0500) time: 0.6260 data: 0.1160 max mem: 21002 Epoch: [226] [250/312] eta: 0:00:34 lr: 0.000638 min_lr: 0.000638 loss: 3.9150 (3.7394) weight_decay: 0.0500 (0.0500) time: 0.4685 data: 0.0917 max mem: 21002 Epoch: [226] [260/312] eta: 0:00:29 lr: 0.000638 min_lr: 0.000638 loss: 3.9226 (3.7456) weight_decay: 0.0500 (0.0500) time: 0.5401 data: 0.1611 max mem: 21002 Epoch: [226] [270/312] eta: 0:00:23 lr: 0.000637 min_lr: 0.000637 loss: 3.8004 (3.7418) weight_decay: 0.0500 (0.0500) time: 0.4890 data: 0.1099 max mem: 21002 Epoch: [226] [280/312] eta: 0:00:18 lr: 0.000637 min_lr: 0.000637 loss: 3.3838 (3.7319) weight_decay: 0.0500 (0.0500) time: 0.4834 data: 0.0976 max mem: 21002 Epoch: [226] [290/312] eta: 0:00:12 lr: 0.000636 min_lr: 0.000636 loss: 3.6085 (3.7274) weight_decay: 0.0500 (0.0500) time: 0.6120 data: 0.1588 max mem: 21002 Epoch: [226] [300/312] eta: 0:00:06 lr: 0.000636 min_lr: 0.000636 loss: 3.9791 (3.7330) weight_decay: 0.0500 (0.0500) time: 0.4103 data: 0.0616 max mem: 21002 Epoch: [226] [310/312] eta: 0:00:01 lr: 0.000635 min_lr: 0.000635 loss: 4.0246 (3.7385) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [226] [311/312] eta: 0:00:00 lr: 0.000635 min_lr: 0.000635 loss: 4.0246 (3.7403) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [226] Total time: 0:02:50 (0.5471 s / it) Averaged stats: lr: 0.000635 min_lr: 0.000635 loss: 4.0246 (3.7006) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.2947 (1.2947) acc1: 77.0833 (77.0833) acc5: 93.8802 (93.8802) time: 8.8852 data: 8.7666 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6844 (1.5903) acc1: 67.3177 (68.4000) acc5: 88.2812 (89.0240) time: 1.0723 data: 0.9741 max mem: 21002 Test: Total time: 0:00:09 (1.0849 s / it) * Acc@1 68.402 Acc@5 89.106 loss 1.599 Accuracy of the model on the 50000 test images: 68.4% Max accuracy: 69.26% Epoch: [227] [ 0/312] eta: 1:03:14 lr: 0.000635 min_lr: 0.000635 loss: 4.6478 (4.6478) weight_decay: 0.0500 (0.0500) time: 12.1626 data: 11.8578 max mem: 21002 Epoch: [227] [ 10/312] eta: 0:07:59 lr: 0.000634 min_lr: 0.000634 loss: 3.5541 (3.5816) weight_decay: 0.0500 (0.0500) time: 1.5885 data: 1.0785 max mem: 21002 Epoch: [227] [ 20/312] eta: 0:05:06 lr: 0.000634 min_lr: 0.000634 loss: 3.5541 (3.6714) weight_decay: 0.0500 (0.0500) time: 0.4942 data: 0.0876 max mem: 21002 Epoch: [227] [ 30/312] eta: 0:03:47 lr: 0.000633 min_lr: 0.000633 loss: 3.7922 (3.6571) weight_decay: 0.0500 (0.0500) time: 0.3742 data: 0.0877 max mem: 21002 Epoch: [227] [ 40/312] eta: 0:03:14 lr: 0.000633 min_lr: 0.000633 loss: 3.8779 (3.6829) weight_decay: 0.0500 (0.0500) time: 0.3631 data: 0.0739 max mem: 21002 Epoch: [227] [ 50/312] eta: 0:03:04 lr: 0.000632 min_lr: 0.000632 loss: 4.0170 (3.7642) weight_decay: 0.0500 (0.0500) time: 0.5469 data: 0.2606 max mem: 21002 Epoch: [227] [ 60/312] eta: 0:02:40 lr: 0.000632 min_lr: 0.000632 loss: 3.9220 (3.7104) weight_decay: 0.0500 (0.0500) time: 0.4727 data: 0.1874 max mem: 21002 Epoch: [227] [ 70/312] eta: 0:02:35 lr: 0.000631 min_lr: 0.000631 loss: 3.5198 (3.7024) weight_decay: 0.0500 (0.0500) time: 0.4919 data: 0.1607 max mem: 21002 Epoch: [227] [ 80/312] eta: 0:02:28 lr: 0.000631 min_lr: 0.000631 loss: 3.9061 (3.7291) weight_decay: 0.0500 (0.0500) time: 0.6606 data: 0.3153 max mem: 21002 Epoch: [227] [ 90/312] eta: 0:02:14 lr: 0.000630 min_lr: 0.000630 loss: 3.5964 (3.6977) weight_decay: 0.0500 (0.0500) time: 0.4775 data: 0.1552 max mem: 21002 Epoch: [227] [100/312] eta: 0:02:10 lr: 0.000630 min_lr: 0.000630 loss: 3.5056 (3.6908) weight_decay: 0.0500 (0.0500) time: 0.4981 data: 0.1503 max mem: 21002 Epoch: [227] [110/312] eta: 0:01:57 lr: 0.000629 min_lr: 0.000629 loss: 3.9538 (3.7311) weight_decay: 0.0500 (0.0500) time: 0.4771 data: 0.1508 max mem: 21002 Epoch: [227] [120/312] eta: 0:01:54 lr: 0.000629 min_lr: 0.000629 loss: 3.9907 (3.7342) weight_decay: 0.0500 (0.0500) time: 0.5049 data: 0.1461 max mem: 21002 Epoch: [227] [130/312] eta: 0:01:47 lr: 0.000628 min_lr: 0.000628 loss: 3.6244 (3.7358) weight_decay: 0.0500 (0.0500) time: 0.6405 data: 0.2653 max mem: 21002 Epoch: [227] [140/312] eta: 0:01:39 lr: 0.000628 min_lr: 0.000628 loss: 3.5104 (3.7080) weight_decay: 0.0500 (0.0500) time: 0.4918 data: 0.1213 max mem: 21002 Epoch: [227] [150/312] eta: 0:01:34 lr: 0.000627 min_lr: 0.000627 loss: 3.5977 (3.7093) weight_decay: 0.0500 (0.0500) time: 0.5445 data: 0.1102 max mem: 21002 Epoch: [227] [160/312] eta: 0:01:28 lr: 0.000627 min_lr: 0.000627 loss: 3.7820 (3.7109) weight_decay: 0.0500 (0.0500) time: 0.5874 data: 0.2102 max mem: 21002 Epoch: [227] [170/312] eta: 0:01:21 lr: 0.000626 min_lr: 0.000626 loss: 3.5715 (3.6950) weight_decay: 0.0500 (0.0500) time: 0.4993 data: 0.1015 max mem: 21002 Epoch: [227] [180/312] eta: 0:01:16 lr: 0.000626 min_lr: 0.000626 loss: 3.5278 (3.6952) weight_decay: 0.0500 (0.0500) time: 0.5840 data: 0.0516 max mem: 21002 Epoch: [227] [190/312] eta: 0:01:09 lr: 0.000625 min_lr: 0.000625 loss: 3.8884 (3.7058) weight_decay: 0.0500 (0.0500) time: 0.4851 data: 0.0516 max mem: 21002 Epoch: [227] [200/312] eta: 0:01:03 lr: 0.000625 min_lr: 0.000625 loss: 3.7695 (3.6923) weight_decay: 0.0500 (0.0500) time: 0.4758 data: 0.0500 max mem: 21002 Epoch: [227] [210/312] eta: 0:00:58 lr: 0.000624 min_lr: 0.000624 loss: 3.4898 (3.6807) weight_decay: 0.0500 (0.0500) time: 0.6514 data: 0.0835 max mem: 21002 Epoch: [227] [220/312] eta: 0:00:51 lr: 0.000624 min_lr: 0.000624 loss: 3.6654 (3.6816) weight_decay: 0.0500 (0.0500) time: 0.4961 data: 0.0538 max mem: 21002 Epoch: [227] [230/312] eta: 0:00:46 lr: 0.000623 min_lr: 0.000623 loss: 3.7105 (3.6790) weight_decay: 0.0500 (0.0500) time: 0.5178 data: 0.0536 max mem: 21002 Epoch: [227] [240/312] eta: 0:00:40 lr: 0.000622 min_lr: 0.000622 loss: 3.7105 (3.6822) weight_decay: 0.0500 (0.0500) time: 0.5738 data: 0.0693 max mem: 21002 Epoch: [227] [250/312] eta: 0:00:35 lr: 0.000622 min_lr: 0.000622 loss: 3.9071 (3.6856) weight_decay: 0.0500 (0.0500) time: 0.5053 data: 0.0457 max mem: 21002 Epoch: [227] [260/312] eta: 0:00:29 lr: 0.000621 min_lr: 0.000621 loss: 3.8726 (3.6870) weight_decay: 0.0500 (0.0500) time: 0.6099 data: 0.0163 max mem: 21002 Epoch: [227] [270/312] eta: 0:00:23 lr: 0.000621 min_lr: 0.000621 loss: 3.9999 (3.6995) weight_decay: 0.0500 (0.0500) time: 0.5053 data: 0.0324 max mem: 21002 Epoch: [227] [280/312] eta: 0:00:18 lr: 0.000620 min_lr: 0.000620 loss: 4.0236 (3.7049) weight_decay: 0.0500 (0.0500) time: 0.4985 data: 0.0311 max mem: 21002 Epoch: [227] [290/312] eta: 0:00:12 lr: 0.000620 min_lr: 0.000620 loss: 3.7497 (3.6968) weight_decay: 0.0500 (0.0500) time: 0.5444 data: 0.0141 max mem: 21002 Epoch: [227] [300/312] eta: 0:00:06 lr: 0.000619 min_lr: 0.000619 loss: 3.0951 (3.6797) weight_decay: 0.0500 (0.0500) time: 0.3959 data: 0.0185 max mem: 21002 Epoch: [227] [310/312] eta: 0:00:01 lr: 0.000619 min_lr: 0.000619 loss: 3.1983 (3.6790) weight_decay: 0.0500 (0.0500) time: 0.3184 data: 0.0095 max mem: 21002 Epoch: [227] [311/312] eta: 0:00:00 lr: 0.000619 min_lr: 0.000619 loss: 3.2496 (3.6782) weight_decay: 0.0500 (0.0500) time: 0.3183 data: 0.0095 max mem: 21002 Epoch: [227] Total time: 0:02:50 (0.5463 s / it) Averaged stats: lr: 0.000619 min_lr: 0.000619 loss: 3.2496 (3.7229) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.2306 (1.2306) acc1: 76.6927 (76.6927) acc5: 93.7500 (93.7500) time: 8.7157 data: 8.5970 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6280 (1.4882) acc1: 67.4479 (69.2480) acc5: 87.6302 (89.1200) time: 1.0538 data: 0.9553 max mem: 21002 Test: Total time: 0:00:09 (1.0693 s / it) * Acc@1 68.862 Acc@5 89.098 loss 1.503 Accuracy of the model on the 50000 test images: 68.9% Max accuracy: 69.26% Epoch: [228] [ 0/312] eta: 0:59:04 lr: 0.000619 min_lr: 0.000619 loss: 3.9566 (3.9566) weight_decay: 0.0500 (0.0500) time: 11.3613 data: 8.8577 max mem: 21002 Epoch: [228] [ 10/312] eta: 0:07:45 lr: 0.000618 min_lr: 0.000618 loss: 3.6850 (3.5527) weight_decay: 0.0500 (0.0500) time: 1.5413 data: 1.0508 max mem: 21002 Epoch: [228] [ 20/312] eta: 0:05:22 lr: 0.000618 min_lr: 0.000618 loss: 3.9731 (3.8502) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.2169 max mem: 21002 Epoch: [228] [ 30/312] eta: 0:03:57 lr: 0.000617 min_lr: 0.000617 loss: 4.0349 (3.8674) weight_decay: 0.0500 (0.0500) time: 0.4598 data: 0.0821 max mem: 21002 Epoch: [228] [ 40/312] eta: 0:03:26 lr: 0.000617 min_lr: 0.000617 loss: 3.8137 (3.8130) weight_decay: 0.0500 (0.0500) time: 0.3968 data: 0.0173 max mem: 21002 Epoch: [228] [ 50/312] eta: 0:03:15 lr: 0.000616 min_lr: 0.000616 loss: 3.9277 (3.8239) weight_decay: 0.0500 (0.0500) time: 0.5975 data: 0.0777 max mem: 21002 Epoch: [228] [ 60/312] eta: 0:02:49 lr: 0.000616 min_lr: 0.000616 loss: 3.9202 (3.7818) weight_decay: 0.0500 (0.0500) time: 0.4919 data: 0.0613 max mem: 21002 Epoch: [228] [ 70/312] eta: 0:02:45 lr: 0.000615 min_lr: 0.000615 loss: 3.4573 (3.7435) weight_decay: 0.0500 (0.0500) time: 0.5192 data: 0.0856 max mem: 21002 Epoch: [228] [ 80/312] eta: 0:02:33 lr: 0.000615 min_lr: 0.000615 loss: 3.8068 (3.7565) weight_decay: 0.0500 (0.0500) time: 0.6261 data: 0.0853 max mem: 21002 Epoch: [228] [ 90/312] eta: 0:02:19 lr: 0.000614 min_lr: 0.000614 loss: 3.9611 (3.7558) weight_decay: 0.0500 (0.0500) time: 0.4308 data: 0.0381 max mem: 21002 Epoch: [228] [100/312] eta: 0:02:15 lr: 0.000614 min_lr: 0.000614 loss: 3.7540 (3.7440) weight_decay: 0.0500 (0.0500) time: 0.5506 data: 0.1262 max mem: 21002 Epoch: [228] [110/312] eta: 0:02:02 lr: 0.000613 min_lr: 0.000613 loss: 3.3909 (3.6882) weight_decay: 0.0500 (0.0500) time: 0.5166 data: 0.0896 max mem: 21002 Epoch: [228] [120/312] eta: 0:01:57 lr: 0.000612 min_lr: 0.000612 loss: 3.5045 (3.6967) weight_decay: 0.0500 (0.0500) time: 0.4667 data: 0.0682 max mem: 21002 Epoch: [228] [130/312] eta: 0:01:53 lr: 0.000612 min_lr: 0.000612 loss: 3.6595 (3.6854) weight_decay: 0.0500 (0.0500) time: 0.7076 data: 0.1583 max mem: 21002 Epoch: [228] [140/312] eta: 0:01:43 lr: 0.000611 min_lr: 0.000611 loss: 3.6802 (3.6882) weight_decay: 0.0500 (0.0500) time: 0.5324 data: 0.0916 max mem: 21002 Epoch: [228] [150/312] eta: 0:01:36 lr: 0.000611 min_lr: 0.000611 loss: 3.6802 (3.6760) weight_decay: 0.0500 (0.0500) time: 0.4187 data: 0.0312 max mem: 21002 Epoch: [228] [160/312] eta: 0:01:30 lr: 0.000610 min_lr: 0.000610 loss: 3.6244 (3.6780) weight_decay: 0.0500 (0.0500) time: 0.5523 data: 0.0312 max mem: 21002 Epoch: [228] [170/312] eta: 0:01:23 lr: 0.000610 min_lr: 0.000610 loss: 3.7139 (3.6776) weight_decay: 0.0500 (0.0500) time: 0.5286 data: 0.1030 max mem: 21002 Epoch: [228] [180/312] eta: 0:01:17 lr: 0.000609 min_lr: 0.000609 loss: 3.5511 (3.6683) weight_decay: 0.0500 (0.0500) time: 0.5600 data: 0.1761 max mem: 21002 Epoch: [228] [190/312] eta: 0:01:10 lr: 0.000609 min_lr: 0.000609 loss: 3.4271 (3.6640) weight_decay: 0.0500 (0.0500) time: 0.4569 data: 0.0746 max mem: 21002 Epoch: [228] [200/312] eta: 0:01:05 lr: 0.000608 min_lr: 0.000608 loss: 3.9537 (3.6671) weight_decay: 0.0500 (0.0500) time: 0.5288 data: 0.1152 max mem: 21002 Epoch: [228] [210/312] eta: 0:00:59 lr: 0.000608 min_lr: 0.000608 loss: 3.9297 (3.6708) weight_decay: 0.0500 (0.0500) time: 0.6824 data: 0.1890 max mem: 21002 Epoch: [228] [220/312] eta: 0:00:52 lr: 0.000607 min_lr: 0.000607 loss: 3.8264 (3.6683) weight_decay: 0.0500 (0.0500) time: 0.4426 data: 0.0753 max mem: 21002 Epoch: [228] [230/312] eta: 0:00:47 lr: 0.000607 min_lr: 0.000607 loss: 3.3418 (3.6515) weight_decay: 0.0500 (0.0500) time: 0.4997 data: 0.0990 max mem: 21002 Epoch: [228] [240/312] eta: 0:00:41 lr: 0.000606 min_lr: 0.000606 loss: 3.1415 (3.6345) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0990 max mem: 21002 Epoch: [228] [250/312] eta: 0:00:35 lr: 0.000606 min_lr: 0.000606 loss: 3.6019 (3.6389) weight_decay: 0.0500 (0.0500) time: 0.4564 data: 0.0889 max mem: 21002 Epoch: [228] [260/312] eta: 0:00:29 lr: 0.000605 min_lr: 0.000605 loss: 3.9246 (3.6459) weight_decay: 0.0500 (0.0500) time: 0.6081 data: 0.2032 max mem: 21002 Epoch: [228] [270/312] eta: 0:00:23 lr: 0.000605 min_lr: 0.000605 loss: 3.9749 (3.6500) weight_decay: 0.0500 (0.0500) time: 0.5234 data: 0.1166 max mem: 21002 Epoch: [228] [280/312] eta: 0:00:18 lr: 0.000604 min_lr: 0.000604 loss: 3.9804 (3.6502) weight_decay: 0.0500 (0.0500) time: 0.4547 data: 0.0850 max mem: 21002 Epoch: [228] [290/312] eta: 0:00:12 lr: 0.000604 min_lr: 0.000604 loss: 3.5768 (3.6451) weight_decay: 0.0500 (0.0500) time: 0.5672 data: 0.1381 max mem: 21002 Epoch: [228] [300/312] eta: 0:00:06 lr: 0.000603 min_lr: 0.000603 loss: 3.5768 (3.6527) weight_decay: 0.0500 (0.0500) time: 0.3965 data: 0.0552 max mem: 21002 Epoch: [228] [310/312] eta: 0:00:01 lr: 0.000603 min_lr: 0.000603 loss: 3.8325 (3.6565) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [228] [311/312] eta: 0:00:00 lr: 0.000603 min_lr: 0.000603 loss: 3.7369 (3.6533) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [228] Total time: 0:02:51 (0.5488 s / it) Averaged stats: lr: 0.000603 min_lr: 0.000603 loss: 3.7369 (3.6957) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:14 loss: 1.3006 (1.3006) acc1: 77.7344 (77.7344) acc5: 93.0990 (93.0990) time: 8.3043 data: 8.1858 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6252 (1.5357) acc1: 66.6667 (68.6080) acc5: 88.6719 (89.1360) time: 1.0241 data: 0.9218 max mem: 21002 Test: Total time: 0:00:09 (1.0381 s / it) * Acc@1 69.098 Acc@5 89.410 loss 1.538 Accuracy of the model on the 50000 test images: 69.1% Max accuracy: 69.26% Epoch: [229] [ 0/312] eta: 1:02:02 lr: 0.000603 min_lr: 0.000603 loss: 3.8982 (3.8982) weight_decay: 0.0500 (0.0500) time: 11.9320 data: 11.0703 max mem: 21002 Epoch: [229] [ 10/312] eta: 0:08:20 lr: 0.000602 min_lr: 0.000602 loss: 3.6443 (3.6174) weight_decay: 0.0500 (0.0500) time: 1.6570 data: 1.0989 max mem: 21002 Epoch: [229] [ 20/312] eta: 0:05:04 lr: 0.000602 min_lr: 0.000602 loss: 3.7695 (3.7098) weight_decay: 0.0500 (0.0500) time: 0.4967 data: 0.0828 max mem: 21002 Epoch: [229] [ 30/312] eta: 0:03:45 lr: 0.000601 min_lr: 0.000601 loss: 3.7855 (3.7474) weight_decay: 0.0500 (0.0500) time: 0.3257 data: 0.0322 max mem: 21002 Epoch: [229] [ 40/312] eta: 0:03:11 lr: 0.000601 min_lr: 0.000601 loss: 3.7063 (3.7586) weight_decay: 0.0500 (0.0500) time: 0.3485 data: 0.0318 max mem: 21002 Epoch: [229] [ 50/312] eta: 0:03:01 lr: 0.000600 min_lr: 0.000600 loss: 4.0220 (3.8166) weight_decay: 0.0500 (0.0500) time: 0.5261 data: 0.1806 max mem: 21002 Epoch: [229] [ 60/312] eta: 0:02:37 lr: 0.000600 min_lr: 0.000600 loss: 4.0220 (3.7985) weight_decay: 0.0500 (0.0500) time: 0.4673 data: 0.1494 max mem: 21002 Epoch: [229] [ 70/312] eta: 0:02:31 lr: 0.000599 min_lr: 0.000599 loss: 3.6869 (3.7934) weight_decay: 0.0500 (0.0500) time: 0.4635 data: 0.1622 max mem: 21002 Epoch: [229] [ 80/312] eta: 0:02:27 lr: 0.000598 min_lr: 0.000598 loss: 3.5308 (3.7452) weight_decay: 0.0500 (0.0500) time: 0.6641 data: 0.3014 max mem: 21002 Epoch: [229] [ 90/312] eta: 0:02:12 lr: 0.000598 min_lr: 0.000598 loss: 3.5308 (3.7255) weight_decay: 0.0500 (0.0500) time: 0.4939 data: 0.1419 max mem: 21002 Epoch: [229] [100/312] eta: 0:02:08 lr: 0.000597 min_lr: 0.000597 loss: 3.6471 (3.7104) weight_decay: 0.0500 (0.0500) time: 0.4931 data: 0.1316 max mem: 21002 Epoch: [229] [110/312] eta: 0:01:57 lr: 0.000597 min_lr: 0.000597 loss: 3.6964 (3.7106) weight_decay: 0.0500 (0.0500) time: 0.5012 data: 0.1297 max mem: 21002 Epoch: [229] [120/312] eta: 0:01:53 lr: 0.000596 min_lr: 0.000596 loss: 3.8234 (3.7130) weight_decay: 0.0500 (0.0500) time: 0.5173 data: 0.1405 max mem: 21002 Epoch: [229] [130/312] eta: 0:01:48 lr: 0.000596 min_lr: 0.000596 loss: 3.8571 (3.7326) weight_decay: 0.0500 (0.0500) time: 0.6696 data: 0.2484 max mem: 21002 Epoch: [229] [140/312] eta: 0:01:38 lr: 0.000595 min_lr: 0.000595 loss: 3.8571 (3.7320) weight_decay: 0.0500 (0.0500) time: 0.4537 data: 0.1087 max mem: 21002 Epoch: [229] [150/312] eta: 0:01:33 lr: 0.000595 min_lr: 0.000595 loss: 3.7220 (3.7255) weight_decay: 0.0500 (0.0500) time: 0.4801 data: 0.1321 max mem: 21002 Epoch: [229] [160/312] eta: 0:01:28 lr: 0.000594 min_lr: 0.000594 loss: 3.7545 (3.7114) weight_decay: 0.0500 (0.0500) time: 0.6649 data: 0.2328 max mem: 21002 Epoch: [229] [170/312] eta: 0:01:21 lr: 0.000594 min_lr: 0.000594 loss: 3.8826 (3.7228) weight_decay: 0.0500 (0.0500) time: 0.5160 data: 0.1426 max mem: 21002 Epoch: [229] [180/312] eta: 0:01:16 lr: 0.000593 min_lr: 0.000593 loss: 3.8554 (3.7152) weight_decay: 0.0500 (0.0500) time: 0.5099 data: 0.1219 max mem: 21002 Epoch: [229] [190/312] eta: 0:01:08 lr: 0.000593 min_lr: 0.000593 loss: 3.6837 (3.7109) weight_decay: 0.0500 (0.0500) time: 0.4756 data: 0.0885 max mem: 21002 Epoch: [229] [200/312] eta: 0:01:03 lr: 0.000592 min_lr: 0.000592 loss: 3.6837 (3.7038) weight_decay: 0.0500 (0.0500) time: 0.5021 data: 0.1265 max mem: 21002 Epoch: [229] [210/312] eta: 0:00:58 lr: 0.000592 min_lr: 0.000592 loss: 3.7506 (3.6998) weight_decay: 0.0500 (0.0500) time: 0.6688 data: 0.1873 max mem: 21002 Epoch: [229] [220/312] eta: 0:00:51 lr: 0.000591 min_lr: 0.000591 loss: 3.7818 (3.6920) weight_decay: 0.0500 (0.0500) time: 0.4662 data: 0.0738 max mem: 21002 Epoch: [229] [230/312] eta: 0:00:46 lr: 0.000591 min_lr: 0.000591 loss: 3.7311 (3.6847) weight_decay: 0.0500 (0.0500) time: 0.4838 data: 0.0732 max mem: 21002 Epoch: [229] [240/312] eta: 0:00:40 lr: 0.000590 min_lr: 0.000590 loss: 3.7322 (3.6865) weight_decay: 0.0500 (0.0500) time: 0.6422 data: 0.1011 max mem: 21002 Epoch: [229] [250/312] eta: 0:00:34 lr: 0.000590 min_lr: 0.000590 loss: 3.7175 (3.6786) weight_decay: 0.0500 (0.0500) time: 0.5058 data: 0.0881 max mem: 21002 Epoch: [229] [260/312] eta: 0:00:29 lr: 0.000589 min_lr: 0.000589 loss: 3.8215 (3.6863) weight_decay: 0.0500 (0.0500) time: 0.5174 data: 0.1241 max mem: 21002 Epoch: [229] [270/312] eta: 0:00:23 lr: 0.000589 min_lr: 0.000589 loss: 3.9536 (3.7017) weight_decay: 0.0500 (0.0500) time: 0.4798 data: 0.0856 max mem: 21002 Epoch: [229] [280/312] eta: 0:00:17 lr: 0.000588 min_lr: 0.000588 loss: 4.0088 (3.7045) weight_decay: 0.0500 (0.0500) time: 0.4813 data: 0.0733 max mem: 21002 Epoch: [229] [290/312] eta: 0:00:12 lr: 0.000588 min_lr: 0.000588 loss: 3.7961 (3.7091) weight_decay: 0.0500 (0.0500) time: 0.6334 data: 0.1238 max mem: 21002 Epoch: [229] [300/312] eta: 0:00:06 lr: 0.000587 min_lr: 0.000587 loss: 3.7642 (3.7118) weight_decay: 0.0500 (0.0500) time: 0.4600 data: 0.0674 max mem: 21002 Epoch: [229] [310/312] eta: 0:00:01 lr: 0.000587 min_lr: 0.000587 loss: 3.7546 (3.7110) weight_decay: 0.0500 (0.0500) time: 0.2915 data: 0.0001 max mem: 21002 Epoch: [229] [311/312] eta: 0:00:00 lr: 0.000587 min_lr: 0.000587 loss: 3.7692 (3.7122) weight_decay: 0.0500 (0.0500) time: 0.2914 data: 0.0001 max mem: 21002 Epoch: [229] Total time: 0:02:49 (0.5447 s / it) Averaged stats: lr: 0.000587 min_lr: 0.000587 loss: 3.7692 (3.6838) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.3930 (1.3930) acc1: 76.4323 (76.4323) acc5: 93.2292 (93.2292) time: 8.8576 data: 8.7389 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6518 (1.5752) acc1: 67.4479 (69.3760) acc5: 89.7135 (89.3440) time: 1.0704 data: 0.9711 max mem: 21002 Test: Total time: 0:00:09 (1.1058 s / it) * Acc@1 69.004 Acc@5 89.432 loss 1.579 Accuracy of the model on the 50000 test images: 69.0% Max accuracy: 69.26% Epoch: [230] [ 0/312] eta: 1:02:28 lr: 0.000587 min_lr: 0.000587 loss: 2.8009 (2.8009) weight_decay: 0.0500 (0.0500) time: 12.0141 data: 10.3898 max mem: 21002 Epoch: [230] [ 10/312] eta: 0:07:23 lr: 0.000586 min_lr: 0.000586 loss: 3.7219 (3.7025) weight_decay: 0.0500 (0.0500) time: 1.4683 data: 1.0609 max mem: 21002 Epoch: [230] [ 20/312] eta: 0:05:08 lr: 0.000586 min_lr: 0.000586 loss: 3.6408 (3.6501) weight_decay: 0.0500 (0.0500) time: 0.5069 data: 0.1772 max mem: 21002 Epoch: [230] [ 30/312] eta: 0:03:47 lr: 0.000585 min_lr: 0.000585 loss: 3.8080 (3.7654) weight_decay: 0.0500 (0.0500) time: 0.4451 data: 0.1140 max mem: 21002 Epoch: [230] [ 40/312] eta: 0:03:27 lr: 0.000585 min_lr: 0.000585 loss: 3.8968 (3.7824) weight_decay: 0.0500 (0.0500) time: 0.4548 data: 0.1357 max mem: 21002 Epoch: [230] [ 50/312] eta: 0:03:12 lr: 0.000584 min_lr: 0.000584 loss: 3.8068 (3.8183) weight_decay: 0.0500 (0.0500) time: 0.6176 data: 0.2833 max mem: 21002 Epoch: [230] [ 60/312] eta: 0:02:47 lr: 0.000584 min_lr: 0.000584 loss: 3.7870 (3.7925) weight_decay: 0.0500 (0.0500) time: 0.4604 data: 0.1486 max mem: 21002 Epoch: [230] [ 70/312] eta: 0:02:41 lr: 0.000583 min_lr: 0.000583 loss: 3.4269 (3.7547) weight_decay: 0.0500 (0.0500) time: 0.5059 data: 0.1254 max mem: 21002 Epoch: [230] [ 80/312] eta: 0:02:34 lr: 0.000583 min_lr: 0.000583 loss: 3.4108 (3.7335) weight_decay: 0.0500 (0.0500) time: 0.6671 data: 0.2023 max mem: 21002 Epoch: [230] [ 90/312] eta: 0:02:21 lr: 0.000582 min_lr: 0.000582 loss: 3.7671 (3.7387) weight_decay: 0.0500 (0.0500) time: 0.5256 data: 0.1274 max mem: 21002 Epoch: [230] [100/312] eta: 0:02:16 lr: 0.000582 min_lr: 0.000582 loss: 3.8533 (3.7370) weight_decay: 0.0500 (0.0500) time: 0.5523 data: 0.1139 max mem: 21002 Epoch: [230] [110/312] eta: 0:02:03 lr: 0.000581 min_lr: 0.000581 loss: 3.9597 (3.7505) weight_decay: 0.0500 (0.0500) time: 0.4918 data: 0.0682 max mem: 21002 Epoch: [230] [120/312] eta: 0:01:56 lr: 0.000581 min_lr: 0.000581 loss: 3.9084 (3.7442) weight_decay: 0.0500 (0.0500) time: 0.4247 data: 0.0550 max mem: 21002 Epoch: [230] [130/312] eta: 0:01:51 lr: 0.000580 min_lr: 0.000580 loss: 3.8248 (3.7282) weight_decay: 0.0500 (0.0500) time: 0.6041 data: 0.1408 max mem: 21002 Epoch: [230] [140/312] eta: 0:01:41 lr: 0.000580 min_lr: 0.000580 loss: 3.5231 (3.7165) weight_decay: 0.0500 (0.0500) time: 0.5108 data: 0.1284 max mem: 21002 Epoch: [230] [150/312] eta: 0:01:37 lr: 0.000579 min_lr: 0.000579 loss: 3.9000 (3.7363) weight_decay: 0.0500 (0.0500) time: 0.5583 data: 0.1535 max mem: 21002 Epoch: [230] [160/312] eta: 0:01:30 lr: 0.000579 min_lr: 0.000579 loss: 3.9513 (3.7484) weight_decay: 0.0500 (0.0500) time: 0.5981 data: 0.1337 max mem: 21002 Epoch: [230] [170/312] eta: 0:01:24 lr: 0.000578 min_lr: 0.000578 loss: 3.6435 (3.7167) weight_decay: 0.0500 (0.0500) time: 0.5036 data: 0.0888 max mem: 21002 Epoch: [230] [180/312] eta: 0:01:17 lr: 0.000578 min_lr: 0.000578 loss: 3.2923 (3.7080) weight_decay: 0.0500 (0.0500) time: 0.5446 data: 0.1044 max mem: 21002 Epoch: [230] [190/312] eta: 0:01:09 lr: 0.000577 min_lr: 0.000577 loss: 3.6902 (3.6989) weight_decay: 0.0500 (0.0500) time: 0.4078 data: 0.0388 max mem: 21002 Epoch: [230] [200/312] eta: 0:01:05 lr: 0.000577 min_lr: 0.000577 loss: 3.7631 (3.7063) weight_decay: 0.0500 (0.0500) time: 0.5304 data: 0.0906 max mem: 21002 Epoch: [230] [210/312] eta: 0:00:59 lr: 0.000576 min_lr: 0.000576 loss: 3.8131 (3.7129) weight_decay: 0.0500 (0.0500) time: 0.7087 data: 0.1088 max mem: 21002 Epoch: [230] [220/312] eta: 0:00:53 lr: 0.000575 min_lr: 0.000575 loss: 3.7396 (3.7104) weight_decay: 0.0500 (0.0500) time: 0.5204 data: 0.0733 max mem: 21002 Epoch: [230] [230/312] eta: 0:00:47 lr: 0.000575 min_lr: 0.000575 loss: 3.6010 (3.7055) weight_decay: 0.0500 (0.0500) time: 0.4791 data: 0.1066 max mem: 21002 Epoch: [230] [240/312] eta: 0:00:41 lr: 0.000574 min_lr: 0.000574 loss: 3.4284 (3.7002) weight_decay: 0.0500 (0.0500) time: 0.5062 data: 0.0624 max mem: 21002 Epoch: [230] [250/312] eta: 0:00:35 lr: 0.000574 min_lr: 0.000574 loss: 3.5866 (3.7059) weight_decay: 0.0500 (0.0500) time: 0.5149 data: 0.1107 max mem: 21002 Epoch: [230] [260/312] eta: 0:00:29 lr: 0.000573 min_lr: 0.000573 loss: 3.7604 (3.7053) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.1769 max mem: 21002 Epoch: [230] [270/312] eta: 0:00:23 lr: 0.000573 min_lr: 0.000573 loss: 3.5200 (3.6939) weight_decay: 0.0500 (0.0500) time: 0.4448 data: 0.0723 max mem: 21002 Epoch: [230] [280/312] eta: 0:00:18 lr: 0.000572 min_lr: 0.000572 loss: 3.6457 (3.6980) weight_decay: 0.0500 (0.0500) time: 0.5397 data: 0.0854 max mem: 21002 Epoch: [230] [290/312] eta: 0:00:12 lr: 0.000572 min_lr: 0.000572 loss: 3.8196 (3.6998) weight_decay: 0.0500 (0.0500) time: 0.6202 data: 0.1067 max mem: 21002 Epoch: [230] [300/312] eta: 0:00:06 lr: 0.000571 min_lr: 0.000571 loss: 3.7950 (3.6999) weight_decay: 0.0500 (0.0500) time: 0.3736 data: 0.0297 max mem: 21002 Epoch: [230] [310/312] eta: 0:00:01 lr: 0.000571 min_lr: 0.000571 loss: 3.7950 (3.7028) weight_decay: 0.0500 (0.0500) time: 0.2845 data: 0.0080 max mem: 21002 Epoch: [230] [311/312] eta: 0:00:00 lr: 0.000571 min_lr: 0.000571 loss: 3.7950 (3.7043) weight_decay: 0.0500 (0.0500) time: 0.2844 data: 0.0080 max mem: 21002 Epoch: [230] Total time: 0:02:51 (0.5505 s / it) Averaged stats: lr: 0.000571 min_lr: 0.000571 loss: 3.7950 (3.7020) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.3655 (1.3655) acc1: 75.7812 (75.7812) acc5: 94.0104 (94.0104) time: 8.7133 data: 8.5946 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6867 (1.6067) acc1: 69.4010 (69.2800) acc5: 89.4531 (89.3760) time: 1.0530 data: 0.9550 max mem: 21002 Test: Total time: 0:00:09 (1.0742 s / it) * Acc@1 69.002 Acc@5 89.282 loss 1.613 Accuracy of the model on the 50000 test images: 69.0% Max accuracy: 69.26% Epoch: [231] [ 0/312] eta: 1:01:22 lr: 0.000571 min_lr: 0.000571 loss: 3.6878 (3.6878) weight_decay: 0.0500 (0.0500) time: 11.8038 data: 9.5010 max mem: 21002 Epoch: [231] [ 10/312] eta: 0:08:32 lr: 0.000570 min_lr: 0.000570 loss: 3.9084 (3.6213) weight_decay: 0.0500 (0.0500) time: 1.6981 data: 1.0794 max mem: 21002 Epoch: [231] [ 20/312] eta: 0:04:59 lr: 0.000570 min_lr: 0.000570 loss: 3.9084 (3.6844) weight_decay: 0.0500 (0.0500) time: 0.4879 data: 0.1189 max mem: 21002 Epoch: [231] [ 30/312] eta: 0:03:42 lr: 0.000569 min_lr: 0.000569 loss: 3.9325 (3.7336) weight_decay: 0.0500 (0.0500) time: 0.2886 data: 0.0013 max mem: 21002 Epoch: [231] [ 40/312] eta: 0:03:06 lr: 0.000569 min_lr: 0.000569 loss: 3.9247 (3.7357) weight_decay: 0.0500 (0.0500) time: 0.3299 data: 0.0074 max mem: 21002 Epoch: [231] [ 50/312] eta: 0:03:02 lr: 0.000568 min_lr: 0.000568 loss: 3.8507 (3.7894) weight_decay: 0.0500 (0.0500) time: 0.5504 data: 0.1627 max mem: 21002 Epoch: [231] [ 60/312] eta: 0:02:38 lr: 0.000568 min_lr: 0.000568 loss: 3.8744 (3.7887) weight_decay: 0.0500 (0.0500) time: 0.5126 data: 0.1571 max mem: 21002 Epoch: [231] [ 70/312] eta: 0:02:32 lr: 0.000567 min_lr: 0.000567 loss: 3.8942 (3.7772) weight_decay: 0.0500 (0.0500) time: 0.4661 data: 0.1260 max mem: 21002 Epoch: [231] [ 80/312] eta: 0:02:25 lr: 0.000567 min_lr: 0.000567 loss: 3.8942 (3.7939) weight_decay: 0.0500 (0.0500) time: 0.6136 data: 0.2377 max mem: 21002 Epoch: [231] [ 90/312] eta: 0:02:12 lr: 0.000566 min_lr: 0.000566 loss: 3.8595 (3.7780) weight_decay: 0.0500 (0.0500) time: 0.4677 data: 0.1300 max mem: 21002 Epoch: [231] [100/312] eta: 0:02:07 lr: 0.000566 min_lr: 0.000566 loss: 3.8616 (3.7865) weight_decay: 0.0500 (0.0500) time: 0.4991 data: 0.1709 max mem: 21002 Epoch: [231] [110/312] eta: 0:01:56 lr: 0.000565 min_lr: 0.000565 loss: 3.8912 (3.7859) weight_decay: 0.0500 (0.0500) time: 0.4970 data: 0.1637 max mem: 21002 Epoch: [231] [120/312] eta: 0:01:50 lr: 0.000565 min_lr: 0.000565 loss: 4.0333 (3.8144) weight_decay: 0.0500 (0.0500) time: 0.4586 data: 0.1445 max mem: 21002 Epoch: [231] [130/312] eta: 0:01:46 lr: 0.000564 min_lr: 0.000564 loss: 3.9317 (3.8053) weight_decay: 0.0500 (0.0500) time: 0.6442 data: 0.2915 max mem: 21002 Epoch: [231] [140/312] eta: 0:01:38 lr: 0.000564 min_lr: 0.000564 loss: 3.9069 (3.8188) weight_decay: 0.0500 (0.0500) time: 0.5335 data: 0.1762 max mem: 21002 Epoch: [231] [150/312] eta: 0:01:33 lr: 0.000563 min_lr: 0.000563 loss: 3.7937 (3.8056) weight_decay: 0.0500 (0.0500) time: 0.5185 data: 0.1502 max mem: 21002 Epoch: [231] [160/312] eta: 0:01:27 lr: 0.000563 min_lr: 0.000563 loss: 3.7754 (3.7998) weight_decay: 0.0500 (0.0500) time: 0.6197 data: 0.2659 max mem: 21002 Epoch: [231] [170/312] eta: 0:01:20 lr: 0.000562 min_lr: 0.000562 loss: 3.8057 (3.8063) weight_decay: 0.0500 (0.0500) time: 0.4662 data: 0.1676 max mem: 21002 Epoch: [231] [180/312] eta: 0:01:15 lr: 0.000562 min_lr: 0.000562 loss: 3.9822 (3.8043) weight_decay: 0.0500 (0.0500) time: 0.5225 data: 0.1371 max mem: 21002 Epoch: [231] [190/312] eta: 0:01:08 lr: 0.000561 min_lr: 0.000561 loss: 3.9688 (3.8001) weight_decay: 0.0500 (0.0500) time: 0.5397 data: 0.1399 max mem: 21002 Epoch: [231] [200/312] eta: 0:01:02 lr: 0.000561 min_lr: 0.000561 loss: 3.9233 (3.7995) weight_decay: 0.0500 (0.0500) time: 0.4789 data: 0.1271 max mem: 21002 Epoch: [231] [210/312] eta: 0:00:57 lr: 0.000560 min_lr: 0.000560 loss: 3.7342 (3.7915) weight_decay: 0.0500 (0.0500) time: 0.6000 data: 0.1797 max mem: 21002 Epoch: [231] [220/312] eta: 0:00:51 lr: 0.000560 min_lr: 0.000560 loss: 3.7995 (3.7878) weight_decay: 0.0500 (0.0500) time: 0.5003 data: 0.0990 max mem: 21002 Epoch: [231] [230/312] eta: 0:00:46 lr: 0.000559 min_lr: 0.000559 loss: 3.8352 (3.7874) weight_decay: 0.0500 (0.0500) time: 0.5176 data: 0.0631 max mem: 21002 Epoch: [231] [240/312] eta: 0:00:40 lr: 0.000559 min_lr: 0.000559 loss: 3.8165 (3.7849) weight_decay: 0.0500 (0.0500) time: 0.6194 data: 0.0883 max mem: 21002 Epoch: [231] [250/312] eta: 0:00:34 lr: 0.000558 min_lr: 0.000558 loss: 3.9786 (3.7889) weight_decay: 0.0500 (0.0500) time: 0.5298 data: 0.1043 max mem: 21002 Epoch: [231] [260/312] eta: 0:00:29 lr: 0.000558 min_lr: 0.000558 loss: 4.0307 (3.7992) weight_decay: 0.0500 (0.0500) time: 0.5490 data: 0.1053 max mem: 21002 Epoch: [231] [270/312] eta: 0:00:23 lr: 0.000557 min_lr: 0.000557 loss: 3.9892 (3.7968) weight_decay: 0.0500 (0.0500) time: 0.5411 data: 0.0800 max mem: 21002 Epoch: [231] [280/312] eta: 0:00:17 lr: 0.000557 min_lr: 0.000557 loss: 3.8255 (3.7927) weight_decay: 0.0500 (0.0500) time: 0.5458 data: 0.0940 max mem: 21002 Epoch: [231] [290/312] eta: 0:00:12 lr: 0.000556 min_lr: 0.000556 loss: 3.8045 (3.7844) weight_decay: 0.0500 (0.0500) time: 0.5555 data: 0.1074 max mem: 21002 Epoch: [231] [300/312] eta: 0:00:06 lr: 0.000556 min_lr: 0.000556 loss: 3.7691 (3.7759) weight_decay: 0.0500 (0.0500) time: 0.4635 data: 0.0714 max mem: 21002 Epoch: [231] [310/312] eta: 0:00:01 lr: 0.000555 min_lr: 0.000555 loss: 3.7691 (3.7737) weight_decay: 0.0500 (0.0500) time: 0.3466 data: 0.0149 max mem: 21002 Epoch: [231] [311/312] eta: 0:00:00 lr: 0.000555 min_lr: 0.000555 loss: 3.8172 (3.7744) weight_decay: 0.0500 (0.0500) time: 0.3093 data: 0.0148 max mem: 21002 Epoch: [231] Total time: 0:02:50 (0.5470 s / it) Averaged stats: lr: 0.000555 min_lr: 0.000555 loss: 3.8172 (3.6936) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.2347 (1.2347) acc1: 77.8646 (77.8646) acc5: 93.6198 (93.6198) time: 8.8280 data: 8.7094 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6704 (1.5536) acc1: 68.4896 (69.9200) acc5: 89.7135 (89.6960) time: 1.0662 data: 0.9678 max mem: 21002 Test: Total time: 0:00:09 (1.0831 s / it) * Acc@1 69.458 Acc@5 89.760 loss 1.552 Accuracy of the model on the 50000 test images: 69.5% Max accuracy: 69.46% Epoch: [232] [ 0/312] eta: 1:02:15 lr: 0.000555 min_lr: 0.000555 loss: 4.0054 (4.0054) weight_decay: 0.0500 (0.0500) time: 11.9730 data: 8.8616 max mem: 21002 Epoch: [232] [ 10/312] eta: 0:08:02 lr: 0.000555 min_lr: 0.000555 loss: 3.8400 (3.6414) weight_decay: 0.0500 (0.0500) time: 1.5979 data: 0.9883 max mem: 21002 Epoch: [232] [ 20/312] eta: 0:05:11 lr: 0.000554 min_lr: 0.000554 loss: 3.7015 (3.5489) weight_decay: 0.0500 (0.0500) time: 0.5217 data: 0.1619 max mem: 21002 Epoch: [232] [ 30/312] eta: 0:03:50 lr: 0.000554 min_lr: 0.000554 loss: 3.2429 (3.4229) weight_decay: 0.0500 (0.0500) time: 0.3881 data: 0.0624 max mem: 21002 Epoch: [232] [ 40/312] eta: 0:03:13 lr: 0.000553 min_lr: 0.000553 loss: 3.3785 (3.4457) weight_decay: 0.0500 (0.0500) time: 0.3400 data: 0.0482 max mem: 21002 Epoch: [232] [ 50/312] eta: 0:03:00 lr: 0.000553 min_lr: 0.000553 loss: 3.7502 (3.4926) weight_decay: 0.0500 (0.0500) time: 0.4898 data: 0.1891 max mem: 21002 Epoch: [232] [ 60/312] eta: 0:02:39 lr: 0.000552 min_lr: 0.000552 loss: 3.8099 (3.5067) weight_decay: 0.0500 (0.0500) time: 0.4644 data: 0.1422 max mem: 21002 Epoch: [232] [ 70/312] eta: 0:02:37 lr: 0.000552 min_lr: 0.000552 loss: 3.7772 (3.5221) weight_decay: 0.0500 (0.0500) time: 0.5521 data: 0.1793 max mem: 21002 Epoch: [232] [ 80/312] eta: 0:02:27 lr: 0.000551 min_lr: 0.000551 loss: 3.7772 (3.5412) weight_decay: 0.0500 (0.0500) time: 0.6518 data: 0.2810 max mem: 21002 Epoch: [232] [ 90/312] eta: 0:02:15 lr: 0.000551 min_lr: 0.000551 loss: 3.6865 (3.5602) weight_decay: 0.0500 (0.0500) time: 0.4609 data: 0.1359 max mem: 21002 Epoch: [232] [100/312] eta: 0:02:11 lr: 0.000550 min_lr: 0.000550 loss: 3.8601 (3.5604) weight_decay: 0.0500 (0.0500) time: 0.5537 data: 0.1966 max mem: 21002 Epoch: [232] [110/312] eta: 0:01:59 lr: 0.000550 min_lr: 0.000550 loss: 3.2851 (3.5499) weight_decay: 0.0500 (0.0500) time: 0.5056 data: 0.1631 max mem: 21002 Epoch: [232] [120/312] eta: 0:01:54 lr: 0.000549 min_lr: 0.000549 loss: 3.5059 (3.5496) weight_decay: 0.0500 (0.0500) time: 0.4915 data: 0.1315 max mem: 21002 Epoch: [232] [130/312] eta: 0:01:49 lr: 0.000549 min_lr: 0.000549 loss: 3.6853 (3.5776) weight_decay: 0.0500 (0.0500) time: 0.6659 data: 0.2585 max mem: 21002 Epoch: [232] [140/312] eta: 0:01:40 lr: 0.000548 min_lr: 0.000548 loss: 3.8423 (3.5930) weight_decay: 0.0500 (0.0500) time: 0.4773 data: 0.1277 max mem: 21002 Epoch: [232] [150/312] eta: 0:01:35 lr: 0.000548 min_lr: 0.000548 loss: 3.7516 (3.5970) weight_decay: 0.0500 (0.0500) time: 0.5202 data: 0.1368 max mem: 21002 Epoch: [232] [160/312] eta: 0:01:29 lr: 0.000547 min_lr: 0.000547 loss: 3.3974 (3.5781) weight_decay: 0.0500 (0.0500) time: 0.6385 data: 0.2220 max mem: 21002 Epoch: [232] [170/312] eta: 0:01:21 lr: 0.000547 min_lr: 0.000547 loss: 3.6345 (3.5985) weight_decay: 0.0500 (0.0500) time: 0.4636 data: 0.1310 max mem: 21002 Epoch: [232] [180/312] eta: 0:01:16 lr: 0.000546 min_lr: 0.000546 loss: 3.9508 (3.6094) weight_decay: 0.0500 (0.0500) time: 0.4825 data: 0.1787 max mem: 21002 Epoch: [232] [190/312] eta: 0:01:08 lr: 0.000546 min_lr: 0.000546 loss: 3.8619 (3.6139) weight_decay: 0.0500 (0.0500) time: 0.4554 data: 0.1336 max mem: 21002 Epoch: [232] [200/312] eta: 0:01:03 lr: 0.000545 min_lr: 0.000545 loss: 3.7877 (3.6214) weight_decay: 0.0500 (0.0500) time: 0.5003 data: 0.1769 max mem: 21002 Epoch: [232] [210/312] eta: 0:00:58 lr: 0.000545 min_lr: 0.000545 loss: 3.8046 (3.6242) weight_decay: 0.0500 (0.0500) time: 0.6602 data: 0.3480 max mem: 21002 Epoch: [232] [220/312] eta: 0:00:51 lr: 0.000544 min_lr: 0.000544 loss: 3.8046 (3.6271) weight_decay: 0.0500 (0.0500) time: 0.5002 data: 0.1718 max mem: 21002 Epoch: [232] [230/312] eta: 0:00:46 lr: 0.000544 min_lr: 0.000544 loss: 3.7465 (3.6257) weight_decay: 0.0500 (0.0500) time: 0.5348 data: 0.1584 max mem: 21002 Epoch: [232] [240/312] eta: 0:00:40 lr: 0.000543 min_lr: 0.000543 loss: 3.7465 (3.6274) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.2499 max mem: 21002 Epoch: [232] [250/312] eta: 0:00:34 lr: 0.000543 min_lr: 0.000543 loss: 3.9637 (3.6390) weight_decay: 0.0500 (0.0500) time: 0.4966 data: 0.1369 max mem: 21002 Epoch: [232] [260/312] eta: 0:00:29 lr: 0.000542 min_lr: 0.000542 loss: 3.9693 (3.6413) weight_decay: 0.0500 (0.0500) time: 0.5772 data: 0.1778 max mem: 21002 Epoch: [232] [270/312] eta: 0:00:23 lr: 0.000542 min_lr: 0.000542 loss: 3.6191 (3.6329) weight_decay: 0.0500 (0.0500) time: 0.4958 data: 0.1333 max mem: 21002 Epoch: [232] [280/312] eta: 0:00:18 lr: 0.000541 min_lr: 0.000541 loss: 3.6191 (3.6361) weight_decay: 0.0500 (0.0500) time: 0.5277 data: 0.0927 max mem: 21002 Epoch: [232] [290/312] eta: 0:00:12 lr: 0.000541 min_lr: 0.000541 loss: 3.7801 (3.6413) weight_decay: 0.0500 (0.0500) time: 0.5749 data: 0.1462 max mem: 21002 Epoch: [232] [300/312] eta: 0:00:06 lr: 0.000540 min_lr: 0.000540 loss: 3.7801 (3.6450) weight_decay: 0.0500 (0.0500) time: 0.4028 data: 0.0541 max mem: 21002 Epoch: [232] [310/312] eta: 0:00:01 lr: 0.000540 min_lr: 0.000540 loss: 3.5105 (3.6376) weight_decay: 0.0500 (0.0500) time: 0.3142 data: 0.0001 max mem: 21002 Epoch: [232] [311/312] eta: 0:00:00 lr: 0.000540 min_lr: 0.000540 loss: 3.5105 (3.6400) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [232] Total time: 0:02:50 (0.5466 s / it) Averaged stats: lr: 0.000540 min_lr: 0.000540 loss: 3.5105 (3.6791) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:20 loss: 1.2230 (1.2230) acc1: 76.5625 (76.5625) acc5: 93.6198 (93.6198) time: 8.9323 data: 8.8137 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5813 (1.4709) acc1: 68.4896 (69.9840) acc5: 89.1927 (89.4400) time: 1.0775 data: 0.9794 max mem: 21002 Test: Total time: 0:00:09 (1.1079 s / it) * Acc@1 69.570 Acc@5 89.614 loss 1.477 Accuracy of the model on the 50000 test images: 69.6% Max accuracy: 69.57% Epoch: [233] [ 0/312] eta: 1:01:06 lr: 0.000540 min_lr: 0.000540 loss: 3.4559 (3.4559) weight_decay: 0.0500 (0.0500) time: 11.7509 data: 10.5196 max mem: 21002 Epoch: [233] [ 10/312] eta: 0:08:09 lr: 0.000539 min_lr: 0.000539 loss: 3.9633 (3.7126) weight_decay: 0.0500 (0.0500) time: 1.6199 data: 1.0128 max mem: 21002 Epoch: [233] [ 20/312] eta: 0:05:10 lr: 0.000539 min_lr: 0.000539 loss: 3.9633 (3.7062) weight_decay: 0.0500 (0.0500) time: 0.5307 data: 0.1160 max mem: 21002 Epoch: [233] [ 30/312] eta: 0:03:50 lr: 0.000538 min_lr: 0.000538 loss: 3.9973 (3.7937) weight_decay: 0.0500 (0.0500) time: 0.3765 data: 0.0856 max mem: 21002 Epoch: [233] [ 40/312] eta: 0:03:20 lr: 0.000538 min_lr: 0.000538 loss: 3.9705 (3.7565) weight_decay: 0.0500 (0.0500) time: 0.3917 data: 0.0985 max mem: 21002 Epoch: [233] [ 50/312] eta: 0:03:06 lr: 0.000537 min_lr: 0.000537 loss: 3.6824 (3.7243) weight_decay: 0.0500 (0.0500) time: 0.5452 data: 0.2576 max mem: 21002 Epoch: [233] [ 60/312] eta: 0:02:41 lr: 0.000537 min_lr: 0.000537 loss: 3.7488 (3.7208) weight_decay: 0.0500 (0.0500) time: 0.4468 data: 0.1600 max mem: 21002 Epoch: [233] [ 70/312] eta: 0:02:36 lr: 0.000536 min_lr: 0.000536 loss: 3.7515 (3.7003) weight_decay: 0.0500 (0.0500) time: 0.4833 data: 0.1929 max mem: 21002 Epoch: [233] [ 80/312] eta: 0:02:30 lr: 0.000536 min_lr: 0.000536 loss: 3.5905 (3.6831) weight_decay: 0.0500 (0.0500) time: 0.6666 data: 0.3782 max mem: 21002 Epoch: [233] [ 90/312] eta: 0:02:15 lr: 0.000535 min_lr: 0.000535 loss: 3.5777 (3.6455) weight_decay: 0.0500 (0.0500) time: 0.4742 data: 0.1859 max mem: 21002 Epoch: [233] [100/312] eta: 0:02:10 lr: 0.000535 min_lr: 0.000535 loss: 3.7167 (3.6616) weight_decay: 0.0500 (0.0500) time: 0.4833 data: 0.1932 max mem: 21002 Epoch: [233] [110/312] eta: 0:01:58 lr: 0.000534 min_lr: 0.000534 loss: 3.8558 (3.6567) weight_decay: 0.0500 (0.0500) time: 0.4798 data: 0.1933 max mem: 21002 Epoch: [233] [120/312] eta: 0:01:53 lr: 0.000534 min_lr: 0.000534 loss: 3.8972 (3.6810) weight_decay: 0.0500 (0.0500) time: 0.4790 data: 0.1930 max mem: 21002 Epoch: [233] [130/312] eta: 0:01:49 lr: 0.000533 min_lr: 0.000533 loss: 3.9283 (3.6614) weight_decay: 0.0500 (0.0500) time: 0.6929 data: 0.3976 max mem: 21002 Epoch: [233] [140/312] eta: 0:01:39 lr: 0.000533 min_lr: 0.000533 loss: 3.8927 (3.6849) weight_decay: 0.0500 (0.0500) time: 0.5009 data: 0.2052 max mem: 21002 Epoch: [233] [150/312] eta: 0:01:35 lr: 0.000533 min_lr: 0.000533 loss: 3.8927 (3.6854) weight_decay: 0.0500 (0.0500) time: 0.4982 data: 0.1865 max mem: 21002 Epoch: [233] [160/312] eta: 0:01:29 lr: 0.000532 min_lr: 0.000532 loss: 3.6082 (3.6722) weight_decay: 0.0500 (0.0500) time: 0.6778 data: 0.3598 max mem: 21002 Epoch: [233] [170/312] eta: 0:01:21 lr: 0.000532 min_lr: 0.000532 loss: 3.6969 (3.6859) weight_decay: 0.0500 (0.0500) time: 0.4669 data: 0.1740 max mem: 21002 Epoch: [233] [180/312] eta: 0:01:16 lr: 0.000531 min_lr: 0.000531 loss: 3.8684 (3.6959) weight_decay: 0.0500 (0.0500) time: 0.5015 data: 0.1999 max mem: 21002 Epoch: [233] [190/312] eta: 0:01:09 lr: 0.000531 min_lr: 0.000531 loss: 3.8273 (3.6978) weight_decay: 0.0500 (0.0500) time: 0.5029 data: 0.2003 max mem: 21002 Epoch: [233] [200/312] eta: 0:01:04 lr: 0.000530 min_lr: 0.000530 loss: 3.7291 (3.6993) weight_decay: 0.0500 (0.0500) time: 0.4871 data: 0.1811 max mem: 21002 Epoch: [233] [210/312] eta: 0:00:58 lr: 0.000530 min_lr: 0.000530 loss: 3.8555 (3.7067) weight_decay: 0.0500 (0.0500) time: 0.6822 data: 0.3737 max mem: 21002 Epoch: [233] [220/312] eta: 0:00:51 lr: 0.000529 min_lr: 0.000529 loss: 3.8555 (3.7001) weight_decay: 0.0500 (0.0500) time: 0.4828 data: 0.1946 max mem: 21002 Epoch: [233] [230/312] eta: 0:00:46 lr: 0.000529 min_lr: 0.000529 loss: 3.8118 (3.6990) weight_decay: 0.0500 (0.0500) time: 0.5056 data: 0.2042 max mem: 21002 Epoch: [233] [240/312] eta: 0:00:41 lr: 0.000528 min_lr: 0.000528 loss: 3.8118 (3.6964) weight_decay: 0.0500 (0.0500) time: 0.7036 data: 0.3978 max mem: 21002 Epoch: [233] [250/312] eta: 0:00:35 lr: 0.000528 min_lr: 0.000528 loss: 3.8587 (3.7011) weight_decay: 0.0500 (0.0500) time: 0.5030 data: 0.1950 max mem: 21002 Epoch: [233] [260/312] eta: 0:00:29 lr: 0.000527 min_lr: 0.000527 loss: 3.8587 (3.7006) weight_decay: 0.0500 (0.0500) time: 0.4863 data: 0.1795 max mem: 21002 Epoch: [233] [270/312] eta: 0:00:23 lr: 0.000527 min_lr: 0.000527 loss: 3.8304 (3.7026) weight_decay: 0.0500 (0.0500) time: 0.5070 data: 0.1796 max mem: 21002 Epoch: [233] [280/312] eta: 0:00:18 lr: 0.000526 min_lr: 0.000526 loss: 3.8304 (3.7015) weight_decay: 0.0500 (0.0500) time: 0.4867 data: 0.1621 max mem: 21002 Epoch: [233] [290/312] eta: 0:00:12 lr: 0.000526 min_lr: 0.000526 loss: 3.6125 (3.7007) weight_decay: 0.0500 (0.0500) time: 0.5966 data: 0.2704 max mem: 21002 Epoch: [233] [300/312] eta: 0:00:06 lr: 0.000525 min_lr: 0.000525 loss: 3.9008 (3.7073) weight_decay: 0.0500 (0.0500) time: 0.4363 data: 0.1087 max mem: 21002 Epoch: [233] [310/312] eta: 0:00:01 lr: 0.000525 min_lr: 0.000525 loss: 3.9692 (3.7063) weight_decay: 0.0500 (0.0500) time: 0.2878 data: 0.0065 max mem: 21002 Epoch: [233] [311/312] eta: 0:00:00 lr: 0.000525 min_lr: 0.000525 loss: 3.9008 (3.7054) weight_decay: 0.0500 (0.0500) time: 0.2877 data: 0.0065 max mem: 21002 Epoch: [233] Total time: 0:02:51 (0.5489 s / it) Averaged stats: lr: 0.000525 min_lr: 0.000525 loss: 3.9008 (3.6981) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:14 loss: 1.1907 (1.1907) acc1: 77.9948 (77.9948) acc5: 93.3594 (93.3594) time: 8.3196 data: 8.2015 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5492 (1.4786) acc1: 69.2708 (69.8240) acc5: 89.4531 (89.7440) time: 1.0077 data: 0.9114 max mem: 21002 Test: Total time: 0:00:09 (1.0199 s / it) * Acc@1 69.872 Acc@5 89.726 loss 1.480 Accuracy of the model on the 50000 test images: 69.9% Max accuracy: 69.87% Epoch: [234] [ 0/312] eta: 0:59:06 lr: 0.000525 min_lr: 0.000525 loss: 4.1573 (4.1573) weight_decay: 0.0500 (0.0500) time: 11.3659 data: 8.6691 max mem: 21002 Epoch: [234] [ 10/312] eta: 0:07:54 lr: 0.000524 min_lr: 0.000524 loss: 3.8661 (3.8917) weight_decay: 0.0500 (0.0500) time: 1.5711 data: 0.9958 max mem: 21002 Epoch: [234] [ 20/312] eta: 0:05:17 lr: 0.000524 min_lr: 0.000524 loss: 3.8104 (3.6277) weight_decay: 0.0500 (0.0500) time: 0.5726 data: 0.1649 max mem: 21002 Epoch: [234] [ 30/312] eta: 0:03:54 lr: 0.000523 min_lr: 0.000523 loss: 3.6748 (3.6522) weight_decay: 0.0500 (0.0500) time: 0.4265 data: 0.0511 max mem: 21002 Epoch: [234] [ 40/312] eta: 0:03:30 lr: 0.000523 min_lr: 0.000523 loss: 3.7312 (3.5968) weight_decay: 0.0500 (0.0500) time: 0.4421 data: 0.0109 max mem: 21002 Epoch: [234] [ 50/312] eta: 0:03:15 lr: 0.000522 min_lr: 0.000522 loss: 3.5590 (3.5982) weight_decay: 0.0500 (0.0500) time: 0.6163 data: 0.0362 max mem: 21002 Epoch: [234] [ 60/312] eta: 0:02:51 lr: 0.000522 min_lr: 0.000522 loss: 3.4892 (3.5871) weight_decay: 0.0500 (0.0500) time: 0.4885 data: 0.0260 max mem: 21002 Epoch: [234] [ 70/312] eta: 0:02:42 lr: 0.000521 min_lr: 0.000521 loss: 3.9514 (3.6526) weight_decay: 0.0500 (0.0500) time: 0.4833 data: 0.0264 max mem: 21002 Epoch: [234] [ 80/312] eta: 0:02:33 lr: 0.000521 min_lr: 0.000521 loss: 3.9752 (3.6704) weight_decay: 0.0500 (0.0500) time: 0.6014 data: 0.0449 max mem: 21002 Epoch: [234] [ 90/312] eta: 0:02:20 lr: 0.000520 min_lr: 0.000520 loss: 3.8545 (3.6945) weight_decay: 0.0500 (0.0500) time: 0.4828 data: 0.0193 max mem: 21002 Epoch: [234] [100/312] eta: 0:02:15 lr: 0.000520 min_lr: 0.000520 loss: 3.8104 (3.6916) weight_decay: 0.0500 (0.0500) time: 0.5483 data: 0.0458 max mem: 21002 Epoch: [234] [110/312] eta: 0:02:02 lr: 0.000519 min_lr: 0.000519 loss: 3.5067 (3.6621) weight_decay: 0.0500 (0.0500) time: 0.4936 data: 0.0457 max mem: 21002 Epoch: [234] [120/312] eta: 0:01:56 lr: 0.000519 min_lr: 0.000519 loss: 3.3832 (3.6611) weight_decay: 0.0500 (0.0500) time: 0.4494 data: 0.0592 max mem: 21002 Epoch: [234] [130/312] eta: 0:01:50 lr: 0.000518 min_lr: 0.000518 loss: 3.7493 (3.6759) weight_decay: 0.0500 (0.0500) time: 0.6103 data: 0.1597 max mem: 21002 Epoch: [234] [140/312] eta: 0:01:41 lr: 0.000518 min_lr: 0.000518 loss: 3.7493 (3.6626) weight_decay: 0.0500 (0.0500) time: 0.4741 data: 0.1011 max mem: 21002 Epoch: [234] [150/312] eta: 0:01:36 lr: 0.000517 min_lr: 0.000517 loss: 3.4078 (3.6623) weight_decay: 0.0500 (0.0500) time: 0.5225 data: 0.0943 max mem: 21002 Epoch: [234] [160/312] eta: 0:01:30 lr: 0.000517 min_lr: 0.000517 loss: 3.8192 (3.6704) weight_decay: 0.0500 (0.0500) time: 0.6444 data: 0.1540 max mem: 21002 Epoch: [234] [170/312] eta: 0:01:23 lr: 0.000516 min_lr: 0.000516 loss: 3.8192 (3.6588) weight_decay: 0.0500 (0.0500) time: 0.5040 data: 0.0665 max mem: 21002 Epoch: [234] [180/312] eta: 0:01:18 lr: 0.000516 min_lr: 0.000516 loss: 3.5389 (3.6520) weight_decay: 0.0500 (0.0500) time: 0.5621 data: 0.0809 max mem: 21002 Epoch: [234] [190/312] eta: 0:01:10 lr: 0.000515 min_lr: 0.000515 loss: 3.5389 (3.6437) weight_decay: 0.0500 (0.0500) time: 0.4929 data: 0.0747 max mem: 21002 Epoch: [234] [200/312] eta: 0:01:05 lr: 0.000515 min_lr: 0.000515 loss: 3.6351 (3.6550) weight_decay: 0.0500 (0.0500) time: 0.4989 data: 0.0823 max mem: 21002 Epoch: [234] [210/312] eta: 0:00:59 lr: 0.000514 min_lr: 0.000514 loss: 3.9881 (3.6689) weight_decay: 0.0500 (0.0500) time: 0.6841 data: 0.1526 max mem: 21002 Epoch: [234] [220/312] eta: 0:00:52 lr: 0.000514 min_lr: 0.000514 loss: 3.8968 (3.6637) weight_decay: 0.0500 (0.0500) time: 0.4999 data: 0.0716 max mem: 21002 Epoch: [234] [230/312] eta: 0:00:47 lr: 0.000514 min_lr: 0.000514 loss: 3.7986 (3.6688) weight_decay: 0.0500 (0.0500) time: 0.5195 data: 0.0737 max mem: 21002 Epoch: [234] [240/312] eta: 0:00:41 lr: 0.000513 min_lr: 0.000513 loss: 3.7986 (3.6747) weight_decay: 0.0500 (0.0500) time: 0.5831 data: 0.0739 max mem: 21002 Epoch: [234] [250/312] eta: 0:00:35 lr: 0.000513 min_lr: 0.000513 loss: 3.7508 (3.6709) weight_decay: 0.0500 (0.0500) time: 0.4594 data: 0.0427 max mem: 21002 Epoch: [234] [260/312] eta: 0:00:29 lr: 0.000512 min_lr: 0.000512 loss: 3.8110 (3.6764) weight_decay: 0.0500 (0.0500) time: 0.5835 data: 0.0727 max mem: 21002 Epoch: [234] [270/312] eta: 0:00:23 lr: 0.000512 min_lr: 0.000512 loss: 3.7862 (3.6739) weight_decay: 0.0500 (0.0500) time: 0.5049 data: 0.0315 max mem: 21002 Epoch: [234] [280/312] eta: 0:00:18 lr: 0.000511 min_lr: 0.000511 loss: 3.8567 (3.6848) weight_decay: 0.0500 (0.0500) time: 0.4992 data: 0.0360 max mem: 21002 Epoch: [234] [290/312] eta: 0:00:12 lr: 0.000511 min_lr: 0.000511 loss: 3.9874 (3.6919) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0357 max mem: 21002 Epoch: [234] [300/312] eta: 0:00:06 lr: 0.000510 min_lr: 0.000510 loss: 3.9042 (3.6836) weight_decay: 0.0500 (0.0500) time: 0.3905 data: 0.0002 max mem: 21002 Epoch: [234] [310/312] eta: 0:00:01 lr: 0.000510 min_lr: 0.000510 loss: 3.3474 (3.6768) weight_decay: 0.0500 (0.0500) time: 0.2978 data: 0.0001 max mem: 21002 Epoch: [234] [311/312] eta: 0:00:00 lr: 0.000510 min_lr: 0.000510 loss: 3.4600 (3.6777) weight_decay: 0.0500 (0.0500) time: 0.2839 data: 0.0001 max mem: 21002 Epoch: [234] Total time: 0:02:52 (0.5525 s / it) Averaged stats: lr: 0.000510 min_lr: 0.000510 loss: 3.4600 (3.6872) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.2347 (1.2347) acc1: 76.1719 (76.1719) acc5: 93.0990 (93.0990) time: 8.4538 data: 8.3354 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5250 (1.4604) acc1: 69.0104 (69.4240) acc5: 89.4531 (89.5520) time: 1.0236 data: 0.9262 max mem: 21002 Test: Total time: 0:00:09 (1.0334 s / it) * Acc@1 69.228 Acc@5 89.618 loss 1.465 Accuracy of the model on the 50000 test images: 69.2% Max accuracy: 69.87% Epoch: [235] [ 0/312] eta: 1:08:13 lr: 0.000510 min_lr: 0.000510 loss: 3.5407 (3.5407) weight_decay: 0.0500 (0.0500) time: 13.1213 data: 10.8381 max mem: 21002 Epoch: [235] [ 10/312] eta: 0:09:25 lr: 0.000509 min_lr: 0.000509 loss: 3.8565 (3.6839) weight_decay: 0.0500 (0.0500) time: 1.8725 data: 0.9932 max mem: 21002 Epoch: [235] [ 20/312] eta: 0:05:35 lr: 0.000509 min_lr: 0.000509 loss: 3.8990 (3.7927) weight_decay: 0.0500 (0.0500) time: 0.5514 data: 0.0408 max mem: 21002 Epoch: [235] [ 30/312] eta: 0:04:05 lr: 0.000508 min_lr: 0.000508 loss: 4.0882 (3.8345) weight_decay: 0.0500 (0.0500) time: 0.3218 data: 0.0370 max mem: 21002 Epoch: [235] [ 40/312] eta: 0:03:19 lr: 0.000508 min_lr: 0.000508 loss: 3.6856 (3.7893) weight_decay: 0.0500 (0.0500) time: 0.2938 data: 0.0009 max mem: 21002 Epoch: [235] [ 50/312] eta: 0:02:53 lr: 0.000507 min_lr: 0.000507 loss: 3.3520 (3.7089) weight_decay: 0.0500 (0.0500) time: 0.3362 data: 0.0011 max mem: 21002 Epoch: [235] [ 60/312] eta: 0:02:42 lr: 0.000507 min_lr: 0.000507 loss: 3.6570 (3.7333) weight_decay: 0.0500 (0.0500) time: 0.4714 data: 0.0809 max mem: 21002 Epoch: [235] [ 70/312] eta: 0:02:34 lr: 0.000506 min_lr: 0.000506 loss: 3.8602 (3.7273) weight_decay: 0.0500 (0.0500) time: 0.5747 data: 0.1272 max mem: 21002 Epoch: [235] [ 80/312] eta: 0:02:20 lr: 0.000506 min_lr: 0.000506 loss: 3.8073 (3.7207) weight_decay: 0.0500 (0.0500) time: 0.4799 data: 0.0751 max mem: 21002 Epoch: [235] [ 90/312] eta: 0:02:15 lr: 0.000505 min_lr: 0.000505 loss: 3.4599 (3.6910) weight_decay: 0.0500 (0.0500) time: 0.5228 data: 0.0984 max mem: 21002 Epoch: [235] [100/312] eta: 0:02:09 lr: 0.000505 min_lr: 0.000505 loss: 3.8647 (3.6958) weight_decay: 0.0500 (0.0500) time: 0.6286 data: 0.0918 max mem: 21002 Epoch: [235] [110/312] eta: 0:02:00 lr: 0.000504 min_lr: 0.000504 loss: 3.8730 (3.7012) weight_decay: 0.0500 (0.0500) time: 0.5310 data: 0.0882 max mem: 21002 Epoch: [235] [120/312] eta: 0:01:56 lr: 0.000504 min_lr: 0.000504 loss: 3.8446 (3.7100) weight_decay: 0.0500 (0.0500) time: 0.5959 data: 0.1617 max mem: 21002 Epoch: [235] [130/312] eta: 0:01:46 lr: 0.000503 min_lr: 0.000503 loss: 3.6292 (3.6900) weight_decay: 0.0500 (0.0500) time: 0.5083 data: 0.0954 max mem: 21002 Epoch: [235] [140/312] eta: 0:01:41 lr: 0.000503 min_lr: 0.000503 loss: 3.7259 (3.6902) weight_decay: 0.0500 (0.0500) time: 0.4731 data: 0.0965 max mem: 21002 Epoch: [235] [150/312] eta: 0:01:35 lr: 0.000502 min_lr: 0.000502 loss: 3.8506 (3.7018) weight_decay: 0.0500 (0.0500) time: 0.6083 data: 0.1217 max mem: 21002 Epoch: [235] [160/312] eta: 0:01:27 lr: 0.000502 min_lr: 0.000502 loss: 3.8584 (3.7061) weight_decay: 0.0500 (0.0500) time: 0.5024 data: 0.1023 max mem: 21002 Epoch: [235] [170/312] eta: 0:01:22 lr: 0.000501 min_lr: 0.000501 loss: 3.8584 (3.6998) weight_decay: 0.0500 (0.0500) time: 0.5551 data: 0.1777 max mem: 21002 Epoch: [235] [180/312] eta: 0:01:16 lr: 0.000501 min_lr: 0.000501 loss: 3.6679 (3.6911) weight_decay: 0.0500 (0.0500) time: 0.6257 data: 0.1454 max mem: 21002 Epoch: [235] [190/312] eta: 0:01:10 lr: 0.000501 min_lr: 0.000501 loss: 3.5773 (3.6872) weight_decay: 0.0500 (0.0500) time: 0.5039 data: 0.1169 max mem: 21002 Epoch: [235] [200/312] eta: 0:01:05 lr: 0.000500 min_lr: 0.000500 loss: 3.8780 (3.6965) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.1758 max mem: 21002 Epoch: [235] [210/312] eta: 0:00:58 lr: 0.000500 min_lr: 0.000500 loss: 3.7812 (3.6928) weight_decay: 0.0500 (0.0500) time: 0.5179 data: 0.1043 max mem: 21002 Epoch: [235] [220/312] eta: 0:00:52 lr: 0.000499 min_lr: 0.000499 loss: 3.7020 (3.6983) weight_decay: 0.0500 (0.0500) time: 0.4956 data: 0.0859 max mem: 21002 Epoch: [235] [230/312] eta: 0:00:47 lr: 0.000499 min_lr: 0.000499 loss: 3.6767 (3.7044) weight_decay: 0.0500 (0.0500) time: 0.6251 data: 0.0854 max mem: 21002 Epoch: [235] [240/312] eta: 0:00:41 lr: 0.000498 min_lr: 0.000498 loss: 3.8416 (3.7104) weight_decay: 0.0500 (0.0500) time: 0.5256 data: 0.1089 max mem: 21002 Epoch: [235] [250/312] eta: 0:00:35 lr: 0.000498 min_lr: 0.000498 loss: 3.8416 (3.7065) weight_decay: 0.0500 (0.0500) time: 0.5952 data: 0.2113 max mem: 21002 Epoch: [235] [260/312] eta: 0:00:29 lr: 0.000497 min_lr: 0.000497 loss: 3.8838 (3.7141) weight_decay: 0.0500 (0.0500) time: 0.5400 data: 0.1031 max mem: 21002 Epoch: [235] [270/312] eta: 0:00:23 lr: 0.000497 min_lr: 0.000497 loss: 3.8838 (3.7174) weight_decay: 0.0500 (0.0500) time: 0.4981 data: 0.0989 max mem: 21002 Epoch: [235] [280/312] eta: 0:00:18 lr: 0.000496 min_lr: 0.000496 loss: 3.8702 (3.7251) weight_decay: 0.0500 (0.0500) time: 0.6439 data: 0.2038 max mem: 21002 Epoch: [235] [290/312] eta: 0:00:12 lr: 0.000496 min_lr: 0.000496 loss: 3.8922 (3.7251) weight_decay: 0.0500 (0.0500) time: 0.4837 data: 0.1054 max mem: 21002 Epoch: [235] [300/312] eta: 0:00:06 lr: 0.000495 min_lr: 0.000495 loss: 3.8922 (3.7263) weight_decay: 0.0500 (0.0500) time: 0.4026 data: 0.0514 max mem: 21002 Epoch: [235] [310/312] eta: 0:00:01 lr: 0.000495 min_lr: 0.000495 loss: 3.9323 (3.7248) weight_decay: 0.0500 (0.0500) time: 0.3981 data: 0.0513 max mem: 21002 Epoch: [235] [311/312] eta: 0:00:00 lr: 0.000495 min_lr: 0.000495 loss: 3.5339 (3.7207) weight_decay: 0.0500 (0.0500) time: 0.3692 data: 0.0513 max mem: 21002 Epoch: [235] Total time: 0:02:53 (0.5565 s / it) Averaged stats: lr: 0.000495 min_lr: 0.000495 loss: 3.5339 (3.6934) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.1387 (1.1387) acc1: 77.7344 (77.7344) acc5: 93.4896 (93.4896) time: 8.7013 data: 8.5825 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4772 (1.4169) acc1: 69.5312 (68.9760) acc5: 90.1042 (89.6160) time: 1.0791 data: 0.9818 max mem: 21002 Test: Total time: 0:00:09 (1.0894 s / it) * Acc@1 69.022 Acc@5 89.470 loss 1.428 Accuracy of the model on the 50000 test images: 69.0% Max accuracy: 69.87% Epoch: [236] [ 0/312] eta: 1:02:29 lr: 0.000495 min_lr: 0.000495 loss: 4.0137 (4.0137) weight_decay: 0.0500 (0.0500) time: 12.0164 data: 11.1782 max mem: 21002 Epoch: [236] [ 10/312] eta: 0:07:30 lr: 0.000494 min_lr: 0.000494 loss: 3.5156 (3.5803) weight_decay: 0.0500 (0.0500) time: 1.4913 data: 1.0639 max mem: 21002 Epoch: [236] [ 20/312] eta: 0:05:12 lr: 0.000494 min_lr: 0.000494 loss: 3.2590 (3.3710) weight_decay: 0.0500 (0.0500) time: 0.5234 data: 0.1428 max mem: 21002 Epoch: [236] [ 30/312] eta: 0:03:50 lr: 0.000493 min_lr: 0.000493 loss: 3.4744 (3.4930) weight_decay: 0.0500 (0.0500) time: 0.4485 data: 0.1169 max mem: 21002 Epoch: [236] [ 40/312] eta: 0:03:27 lr: 0.000493 min_lr: 0.000493 loss: 3.6846 (3.4869) weight_decay: 0.0500 (0.0500) time: 0.4369 data: 0.1027 max mem: 21002 Epoch: [236] [ 50/312] eta: 0:03:14 lr: 0.000492 min_lr: 0.000492 loss: 3.6272 (3.5283) weight_decay: 0.0500 (0.0500) time: 0.6201 data: 0.2145 max mem: 21002 Epoch: [236] [ 60/312] eta: 0:02:47 lr: 0.000492 min_lr: 0.000492 loss: 3.9257 (3.5373) weight_decay: 0.0500 (0.0500) time: 0.4717 data: 0.1125 max mem: 21002 Epoch: [236] [ 70/312] eta: 0:02:41 lr: 0.000491 min_lr: 0.000491 loss: 3.8635 (3.5331) weight_decay: 0.0500 (0.0500) time: 0.4746 data: 0.0957 max mem: 21002 Epoch: [236] [ 80/312] eta: 0:02:34 lr: 0.000491 min_lr: 0.000491 loss: 3.4227 (3.5230) weight_decay: 0.0500 (0.0500) time: 0.6577 data: 0.2207 max mem: 21002 Epoch: [236] [ 90/312] eta: 0:02:18 lr: 0.000490 min_lr: 0.000490 loss: 3.7801 (3.5644) weight_decay: 0.0500 (0.0500) time: 0.4749 data: 0.1257 max mem: 21002 Epoch: [236] [100/312] eta: 0:02:13 lr: 0.000490 min_lr: 0.000490 loss: 3.8727 (3.5585) weight_decay: 0.0500 (0.0500) time: 0.4970 data: 0.1338 max mem: 21002 Epoch: [236] [110/312] eta: 0:02:01 lr: 0.000490 min_lr: 0.000490 loss: 3.7087 (3.5742) weight_decay: 0.0500 (0.0500) time: 0.4944 data: 0.1346 max mem: 21002 Epoch: [236] [120/312] eta: 0:01:56 lr: 0.000489 min_lr: 0.000489 loss: 3.8332 (3.5753) weight_decay: 0.0500 (0.0500) time: 0.4716 data: 0.1215 max mem: 21002 Epoch: [236] [130/312] eta: 0:01:51 lr: 0.000489 min_lr: 0.000489 loss: 3.7880 (3.5825) weight_decay: 0.0500 (0.0500) time: 0.6722 data: 0.2364 max mem: 21002 Epoch: [236] [140/312] eta: 0:01:41 lr: 0.000488 min_lr: 0.000488 loss: 3.7880 (3.5841) weight_decay: 0.0500 (0.0500) time: 0.4896 data: 0.1171 max mem: 21002 Epoch: [236] [150/312] eta: 0:01:35 lr: 0.000488 min_lr: 0.000488 loss: 3.6056 (3.5832) weight_decay: 0.0500 (0.0500) time: 0.4613 data: 0.1048 max mem: 21002 Epoch: [236] [160/312] eta: 0:01:30 lr: 0.000487 min_lr: 0.000487 loss: 3.9413 (3.5929) weight_decay: 0.0500 (0.0500) time: 0.6339 data: 0.2436 max mem: 21002 Epoch: [236] [170/312] eta: 0:01:22 lr: 0.000487 min_lr: 0.000487 loss: 3.9762 (3.6087) weight_decay: 0.0500 (0.0500) time: 0.5173 data: 0.1401 max mem: 21002 Epoch: [236] [180/312] eta: 0:01:17 lr: 0.000486 min_lr: 0.000486 loss: 3.8252 (3.6046) weight_decay: 0.0500 (0.0500) time: 0.5238 data: 0.1032 max mem: 21002 Epoch: [236] [190/312] eta: 0:01:09 lr: 0.000486 min_lr: 0.000486 loss: 3.7174 (3.5981) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.1032 max mem: 21002 Epoch: [236] [200/312] eta: 0:01:04 lr: 0.000485 min_lr: 0.000485 loss: 3.6670 (3.6001) weight_decay: 0.0500 (0.0500) time: 0.4665 data: 0.1129 max mem: 21002 Epoch: [236] [210/312] eta: 0:00:59 lr: 0.000485 min_lr: 0.000485 loss: 3.6670 (3.6021) weight_decay: 0.0500 (0.0500) time: 0.6488 data: 0.2415 max mem: 21002 Epoch: [236] [220/312] eta: 0:00:52 lr: 0.000484 min_lr: 0.000484 loss: 3.6043 (3.6046) weight_decay: 0.0500 (0.0500) time: 0.4889 data: 0.1293 max mem: 21002 Epoch: [236] [230/312] eta: 0:00:46 lr: 0.000484 min_lr: 0.000484 loss: 3.8842 (3.6144) weight_decay: 0.0500 (0.0500) time: 0.4901 data: 0.1022 max mem: 21002 Epoch: [236] [240/312] eta: 0:00:41 lr: 0.000483 min_lr: 0.000483 loss: 3.9006 (3.6255) weight_decay: 0.0500 (0.0500) time: 0.6808 data: 0.2122 max mem: 21002 Epoch: [236] [250/312] eta: 0:00:35 lr: 0.000483 min_lr: 0.000483 loss: 3.7357 (3.6278) weight_decay: 0.0500 (0.0500) time: 0.4795 data: 0.1106 max mem: 21002 Epoch: [236] [260/312] eta: 0:00:29 lr: 0.000482 min_lr: 0.000482 loss: 3.7582 (3.6263) weight_decay: 0.0500 (0.0500) time: 0.4837 data: 0.0995 max mem: 21002 Epoch: [236] [270/312] eta: 0:00:23 lr: 0.000482 min_lr: 0.000482 loss: 3.4497 (3.6148) weight_decay: 0.0500 (0.0500) time: 0.5800 data: 0.1003 max mem: 21002 Epoch: [236] [280/312] eta: 0:00:18 lr: 0.000482 min_lr: 0.000482 loss: 3.4497 (3.6204) weight_decay: 0.0500 (0.0500) time: 0.4883 data: 0.0808 max mem: 21002 Epoch: [236] [290/312] eta: 0:00:12 lr: 0.000481 min_lr: 0.000481 loss: 3.9968 (3.6293) weight_decay: 0.0500 (0.0500) time: 0.5267 data: 0.1254 max mem: 21002 Epoch: [236] [300/312] eta: 0:00:06 lr: 0.000481 min_lr: 0.000481 loss: 3.9198 (3.6338) weight_decay: 0.0500 (0.0500) time: 0.4176 data: 0.0458 max mem: 21002 Epoch: [236] [310/312] eta: 0:00:01 lr: 0.000480 min_lr: 0.000480 loss: 3.8630 (3.6333) weight_decay: 0.0500 (0.0500) time: 0.2835 data: 0.0001 max mem: 21002 Epoch: [236] [311/312] eta: 0:00:00 lr: 0.000480 min_lr: 0.000480 loss: 3.8630 (3.6332) weight_decay: 0.0500 (0.0500) time: 0.2835 data: 0.0001 max mem: 21002 Epoch: [236] Total time: 0:02:50 (0.5480 s / it) Averaged stats: lr: 0.000480 min_lr: 0.000480 loss: 3.8630 (3.6870) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:10 loss: 1.2194 (1.2194) acc1: 75.5208 (75.5208) acc5: 93.7500 (93.7500) time: 7.7846 data: 7.6662 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6184 (1.4793) acc1: 68.4896 (70.0000) acc5: 88.9323 (89.6480) time: 1.0187 data: 0.9224 max mem: 21002 Test: Total time: 0:00:09 (1.0360 s / it) * Acc@1 69.628 Acc@5 89.790 loss 1.482 Accuracy of the model on the 50000 test images: 69.6% Max accuracy: 69.87% Epoch: [237] [ 0/312] eta: 1:03:28 lr: 0.000480 min_lr: 0.000480 loss: 3.5954 (3.5954) weight_decay: 0.0500 (0.0500) time: 12.2063 data: 11.9151 max mem: 21002 Epoch: [237] [ 10/312] eta: 0:07:33 lr: 0.000480 min_lr: 0.000480 loss: 3.7325 (3.6144) weight_decay: 0.0500 (0.0500) time: 1.5033 data: 1.0839 max mem: 21002 Epoch: [237] [ 20/312] eta: 0:05:04 lr: 0.000479 min_lr: 0.000479 loss: 3.7148 (3.5910) weight_decay: 0.0500 (0.0500) time: 0.4853 data: 0.1040 max mem: 21002 Epoch: [237] [ 30/312] eta: 0:03:45 lr: 0.000479 min_lr: 0.000479 loss: 3.3782 (3.5671) weight_decay: 0.0500 (0.0500) time: 0.4109 data: 0.1040 max mem: 21002 Epoch: [237] [ 40/312] eta: 0:03:20 lr: 0.000478 min_lr: 0.000478 loss: 3.6979 (3.6280) weight_decay: 0.0500 (0.0500) time: 0.4168 data: 0.0845 max mem: 21002 Epoch: [237] [ 50/312] eta: 0:03:19 lr: 0.000478 min_lr: 0.000478 loss: 3.6982 (3.5952) weight_decay: 0.0500 (0.0500) time: 0.7011 data: 0.2399 max mem: 21002 Epoch: [237] [ 60/312] eta: 0:02:52 lr: 0.000477 min_lr: 0.000477 loss: 3.7059 (3.6180) weight_decay: 0.0500 (0.0500) time: 0.5698 data: 0.1564 max mem: 21002 Epoch: [237] [ 70/312] eta: 0:02:42 lr: 0.000477 min_lr: 0.000477 loss: 3.6055 (3.6113) weight_decay: 0.0500 (0.0500) time: 0.4510 data: 0.0628 max mem: 21002 Epoch: [237] [ 80/312] eta: 0:02:31 lr: 0.000476 min_lr: 0.000476 loss: 3.5028 (3.5945) weight_decay: 0.0500 (0.0500) time: 0.5648 data: 0.1303 max mem: 21002 Epoch: [237] [ 90/312] eta: 0:02:20 lr: 0.000476 min_lr: 0.000476 loss: 3.7250 (3.6175) weight_decay: 0.0500 (0.0500) time: 0.4827 data: 0.1020 max mem: 21002 Epoch: [237] [100/312] eta: 0:02:12 lr: 0.000475 min_lr: 0.000475 loss: 3.7315 (3.6232) weight_decay: 0.0500 (0.0500) time: 0.5088 data: 0.1401 max mem: 21002 Epoch: [237] [110/312] eta: 0:02:00 lr: 0.000475 min_lr: 0.000475 loss: 3.7032 (3.6218) weight_decay: 0.0500 (0.0500) time: 0.4277 data: 0.1068 max mem: 21002 Epoch: [237] [120/312] eta: 0:01:56 lr: 0.000474 min_lr: 0.000474 loss: 3.7131 (3.6144) weight_decay: 0.0500 (0.0500) time: 0.5162 data: 0.1842 max mem: 21002 Epoch: [237] [130/312] eta: 0:01:51 lr: 0.000474 min_lr: 0.000474 loss: 3.4206 (3.5965) weight_decay: 0.0500 (0.0500) time: 0.7276 data: 0.3346 max mem: 21002 Epoch: [237] [140/312] eta: 0:01:41 lr: 0.000474 min_lr: 0.000474 loss: 3.7550 (3.6036) weight_decay: 0.0500 (0.0500) time: 0.4993 data: 0.1511 max mem: 21002 Epoch: [237] [150/312] eta: 0:01:37 lr: 0.000473 min_lr: 0.000473 loss: 3.8395 (3.6054) weight_decay: 0.0500 (0.0500) time: 0.5322 data: 0.1142 max mem: 21002 Epoch: [237] [160/312] eta: 0:01:30 lr: 0.000473 min_lr: 0.000473 loss: 3.7198 (3.5999) weight_decay: 0.0500 (0.0500) time: 0.6100 data: 0.1199 max mem: 21002 Epoch: [237] [170/312] eta: 0:01:23 lr: 0.000472 min_lr: 0.000472 loss: 3.7582 (3.6109) weight_decay: 0.0500 (0.0500) time: 0.4709 data: 0.0802 max mem: 21002 Epoch: [237] [180/312] eta: 0:01:18 lr: 0.000472 min_lr: 0.000472 loss: 3.7582 (3.6109) weight_decay: 0.0500 (0.0500) time: 0.5696 data: 0.1472 max mem: 21002 Epoch: [237] [190/312] eta: 0:01:10 lr: 0.000471 min_lr: 0.000471 loss: 3.8017 (3.6248) weight_decay: 0.0500 (0.0500) time: 0.4684 data: 0.0733 max mem: 21002 Epoch: [237] [200/312] eta: 0:01:04 lr: 0.000471 min_lr: 0.000471 loss: 3.5994 (3.6188) weight_decay: 0.0500 (0.0500) time: 0.4717 data: 0.0728 max mem: 21002 Epoch: [237] [210/312] eta: 0:00:59 lr: 0.000470 min_lr: 0.000470 loss: 3.5119 (3.6201) weight_decay: 0.0500 (0.0500) time: 0.7021 data: 0.1898 max mem: 21002 Epoch: [237] [220/312] eta: 0:00:52 lr: 0.000470 min_lr: 0.000470 loss: 3.8944 (3.6440) weight_decay: 0.0500 (0.0500) time: 0.5272 data: 0.1177 max mem: 21002 Epoch: [237] [230/312] eta: 0:00:47 lr: 0.000469 min_lr: 0.000469 loss: 3.7047 (3.6330) weight_decay: 0.0500 (0.0500) time: 0.4368 data: 0.0756 max mem: 21002 Epoch: [237] [240/312] eta: 0:00:41 lr: 0.000469 min_lr: 0.000469 loss: 3.5526 (3.6302) weight_decay: 0.0500 (0.0500) time: 0.5309 data: 0.1086 max mem: 21002 Epoch: [237] [250/312] eta: 0:00:35 lr: 0.000468 min_lr: 0.000468 loss: 3.8160 (3.6357) weight_decay: 0.0500 (0.0500) time: 0.5066 data: 0.1111 max mem: 21002 Epoch: [237] [260/312] eta: 0:00:29 lr: 0.000468 min_lr: 0.000468 loss: 3.6440 (3.6370) weight_decay: 0.0500 (0.0500) time: 0.5542 data: 0.1717 max mem: 21002 Epoch: [237] [270/312] eta: 0:00:23 lr: 0.000468 min_lr: 0.000468 loss: 3.5864 (3.6372) weight_decay: 0.0500 (0.0500) time: 0.4383 data: 0.0943 max mem: 21002 Epoch: [237] [280/312] eta: 0:00:18 lr: 0.000467 min_lr: 0.000467 loss: 3.9122 (3.6472) weight_decay: 0.0500 (0.0500) time: 0.4794 data: 0.1406 max mem: 21002 Epoch: [237] [290/312] eta: 0:00:12 lr: 0.000467 min_lr: 0.000467 loss: 3.9830 (3.6558) weight_decay: 0.0500 (0.0500) time: 0.6031 data: 0.2171 max mem: 21002 Epoch: [237] [300/312] eta: 0:00:06 lr: 0.000466 min_lr: 0.000466 loss: 3.9351 (3.6517) weight_decay: 0.0500 (0.0500) time: 0.4109 data: 0.0770 max mem: 21002 Epoch: [237] [310/312] eta: 0:00:01 lr: 0.000466 min_lr: 0.000466 loss: 3.5669 (3.6479) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [237] [311/312] eta: 0:00:00 lr: 0.000466 min_lr: 0.000466 loss: 3.3909 (3.6445) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [237] Total time: 0:02:50 (0.5470 s / it) Averaged stats: lr: 0.000466 min_lr: 0.000466 loss: 3.3909 (3.6701) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:11 loss: 1.0949 (1.0949) acc1: 78.7760 (78.7760) acc5: 94.7917 (94.7917) time: 7.9596 data: 7.8412 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4766 (1.4073) acc1: 70.7031 (70.9920) acc5: 89.3229 (90.0640) time: 1.0061 data: 0.9083 max mem: 21002 Test: Total time: 0:00:09 (1.0161 s / it) * Acc@1 70.618 Acc@5 90.278 loss 1.417 Accuracy of the model on the 50000 test images: 70.6% Max accuracy: 70.62% Epoch: [238] [ 0/312] eta: 1:01:25 lr: 0.000466 min_lr: 0.000466 loss: 3.8881 (3.8881) weight_decay: 0.0500 (0.0500) time: 11.8128 data: 8.5155 max mem: 21002 Epoch: [238] [ 10/312] eta: 0:08:02 lr: 0.000465 min_lr: 0.000465 loss: 3.8752 (3.6229) weight_decay: 0.0500 (0.0500) time: 1.5968 data: 1.0396 max mem: 21002 Epoch: [238] [ 20/312] eta: 0:05:19 lr: 0.000465 min_lr: 0.000465 loss: 3.6480 (3.4993) weight_decay: 0.0500 (0.0500) time: 0.5568 data: 0.2016 max mem: 21002 Epoch: [238] [ 30/312] eta: 0:03:56 lr: 0.000464 min_lr: 0.000464 loss: 3.6370 (3.5521) weight_decay: 0.0500 (0.0500) time: 0.4200 data: 0.0559 max mem: 21002 Epoch: [238] [ 40/312] eta: 0:03:18 lr: 0.000464 min_lr: 0.000464 loss: 3.6370 (3.5866) weight_decay: 0.0500 (0.0500) time: 0.3513 data: 0.0314 max mem: 21002 Epoch: [238] [ 50/312] eta: 0:03:12 lr: 0.000463 min_lr: 0.000463 loss: 3.9650 (3.6627) weight_decay: 0.0500 (0.0500) time: 0.5770 data: 0.1698 max mem: 21002 Epoch: [238] [ 60/312] eta: 0:02:46 lr: 0.000463 min_lr: 0.000463 loss: 3.9650 (3.6300) weight_decay: 0.0500 (0.0500) time: 0.5193 data: 0.1391 max mem: 21002 Epoch: [238] [ 70/312] eta: 0:02:37 lr: 0.000462 min_lr: 0.000462 loss: 3.5338 (3.6360) weight_decay: 0.0500 (0.0500) time: 0.4400 data: 0.0903 max mem: 21002 Epoch: [238] [ 80/312] eta: 0:02:27 lr: 0.000462 min_lr: 0.000462 loss: 3.7994 (3.6362) weight_decay: 0.0500 (0.0500) time: 0.5565 data: 0.1723 max mem: 21002 Epoch: [238] [ 90/312] eta: 0:02:15 lr: 0.000461 min_lr: 0.000461 loss: 3.8123 (3.6389) weight_decay: 0.0500 (0.0500) time: 0.4711 data: 0.1086 max mem: 21002 Epoch: [238] [100/312] eta: 0:02:12 lr: 0.000461 min_lr: 0.000461 loss: 3.8961 (3.6427) weight_decay: 0.0500 (0.0500) time: 0.5799 data: 0.1574 max mem: 21002 Epoch: [238] [110/312] eta: 0:02:00 lr: 0.000461 min_lr: 0.000461 loss: 3.9180 (3.6539) weight_decay: 0.0500 (0.0500) time: 0.5116 data: 0.1315 max mem: 21002 Epoch: [238] [120/312] eta: 0:01:55 lr: 0.000460 min_lr: 0.000460 loss: 3.9360 (3.6469) weight_decay: 0.0500 (0.0500) time: 0.4867 data: 0.1186 max mem: 21002 Epoch: [238] [130/312] eta: 0:01:49 lr: 0.000460 min_lr: 0.000460 loss: 3.8139 (3.6458) weight_decay: 0.0500 (0.0500) time: 0.6425 data: 0.1890 max mem: 21002 Epoch: [238] [140/312] eta: 0:01:40 lr: 0.000459 min_lr: 0.000459 loss: 3.7412 (3.6415) weight_decay: 0.0500 (0.0500) time: 0.4818 data: 0.1109 max mem: 21002 Epoch: [238] [150/312] eta: 0:01:35 lr: 0.000459 min_lr: 0.000459 loss: 3.6195 (3.6477) weight_decay: 0.0500 (0.0500) time: 0.5131 data: 0.1393 max mem: 21002 Epoch: [238] [160/312] eta: 0:01:28 lr: 0.000458 min_lr: 0.000458 loss: 3.7962 (3.6647) weight_decay: 0.0500 (0.0500) time: 0.5721 data: 0.1318 max mem: 21002 Epoch: [238] [170/312] eta: 0:01:21 lr: 0.000458 min_lr: 0.000458 loss: 3.9569 (3.6834) weight_decay: 0.0500 (0.0500) time: 0.4797 data: 0.1132 max mem: 21002 Epoch: [238] [180/312] eta: 0:01:17 lr: 0.000457 min_lr: 0.000457 loss: 3.8840 (3.6829) weight_decay: 0.0500 (0.0500) time: 0.5883 data: 0.1787 max mem: 21002 Epoch: [238] [190/312] eta: 0:01:09 lr: 0.000457 min_lr: 0.000457 loss: 3.6264 (3.6664) weight_decay: 0.0500 (0.0500) time: 0.4963 data: 0.0984 max mem: 21002 Epoch: [238] [200/312] eta: 0:01:04 lr: 0.000456 min_lr: 0.000456 loss: 3.5871 (3.6624) weight_decay: 0.0500 (0.0500) time: 0.4858 data: 0.0826 max mem: 21002 Epoch: [238] [210/312] eta: 0:00:58 lr: 0.000456 min_lr: 0.000456 loss: 3.8203 (3.6729) weight_decay: 0.0500 (0.0500) time: 0.6525 data: 0.1379 max mem: 21002 Epoch: [238] [220/312] eta: 0:00:52 lr: 0.000456 min_lr: 0.000456 loss: 3.8991 (3.6813) weight_decay: 0.0500 (0.0500) time: 0.4959 data: 0.0942 max mem: 21002 Epoch: [238] [230/312] eta: 0:00:46 lr: 0.000455 min_lr: 0.000455 loss: 3.8991 (3.6894) weight_decay: 0.0500 (0.0500) time: 0.4828 data: 0.0976 max mem: 21002 Epoch: [238] [240/312] eta: 0:00:40 lr: 0.000455 min_lr: 0.000455 loss: 3.9879 (3.7048) weight_decay: 0.0500 (0.0500) time: 0.5583 data: 0.0660 max mem: 21002 Epoch: [238] [250/312] eta: 0:00:34 lr: 0.000454 min_lr: 0.000454 loss: 3.8059 (3.6945) weight_decay: 0.0500 (0.0500) time: 0.4836 data: 0.0892 max mem: 21002 Epoch: [238] [260/312] eta: 0:00:29 lr: 0.000454 min_lr: 0.000454 loss: 3.2783 (3.6801) weight_decay: 0.0500 (0.0500) time: 0.5968 data: 0.1730 max mem: 21002 Epoch: [238] [270/312] eta: 0:00:23 lr: 0.000453 min_lr: 0.000453 loss: 3.5312 (3.6805) weight_decay: 0.0500 (0.0500) time: 0.5150 data: 0.0910 max mem: 21002 Epoch: [238] [280/312] eta: 0:00:18 lr: 0.000453 min_lr: 0.000453 loss: 3.7457 (3.6678) weight_decay: 0.0500 (0.0500) time: 0.4862 data: 0.0878 max mem: 21002 Epoch: [238] [290/312] eta: 0:00:12 lr: 0.000452 min_lr: 0.000452 loss: 3.5846 (3.6715) weight_decay: 0.0500 (0.0500) time: 0.6427 data: 0.1517 max mem: 21002 Epoch: [238] [300/312] eta: 0:00:06 lr: 0.000452 min_lr: 0.000452 loss: 3.9338 (3.6754) weight_decay: 0.0500 (0.0500) time: 0.4401 data: 0.0643 max mem: 21002 Epoch: [238] [310/312] eta: 0:00:01 lr: 0.000451 min_lr: 0.000451 loss: 3.8092 (3.6791) weight_decay: 0.0500 (0.0500) time: 0.2785 data: 0.0001 max mem: 21002 Epoch: [238] [311/312] eta: 0:00:00 lr: 0.000451 min_lr: 0.000451 loss: 3.8092 (3.6815) weight_decay: 0.0500 (0.0500) time: 0.2784 data: 0.0001 max mem: 21002 Epoch: [238] Total time: 0:02:51 (0.5483 s / it) Averaged stats: lr: 0.000451 min_lr: 0.000451 loss: 3.8092 (3.6847) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.2790 (1.2790) acc1: 79.2969 (79.2969) acc5: 94.0104 (94.0104) time: 8.7994 data: 8.6824 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5342 (1.4851) acc1: 69.2708 (70.9600) acc5: 89.4531 (90.4000) time: 1.0669 data: 0.9648 max mem: 21002 Test: Total time: 0:00:09 (1.1000 s / it) * Acc@1 70.642 Acc@5 90.144 loss 1.499 Accuracy of the model on the 50000 test images: 70.6% Max accuracy: 70.64% Epoch: [239] [ 0/312] eta: 0:59:53 lr: 0.000451 min_lr: 0.000451 loss: 4.2469 (4.2469) weight_decay: 0.0500 (0.0500) time: 11.5183 data: 10.5436 max mem: 21002 Epoch: [239] [ 10/312] eta: 0:08:10 lr: 0.000451 min_lr: 0.000451 loss: 3.4868 (3.4234) weight_decay: 0.0500 (0.0500) time: 1.6240 data: 1.0276 max mem: 21002 Epoch: [239] [ 20/312] eta: 0:04:55 lr: 0.000450 min_lr: 0.000450 loss: 3.6476 (3.6415) weight_decay: 0.0500 (0.0500) time: 0.4856 data: 0.0383 max mem: 21002 Epoch: [239] [ 30/312] eta: 0:03:40 lr: 0.000450 min_lr: 0.000450 loss: 3.8633 (3.6414) weight_decay: 0.0500 (0.0500) time: 0.3180 data: 0.0049 max mem: 21002 Epoch: [239] [ 40/312] eta: 0:03:06 lr: 0.000449 min_lr: 0.000449 loss: 3.7707 (3.6340) weight_decay: 0.0500 (0.0500) time: 0.3445 data: 0.0525 max mem: 21002 Epoch: [239] [ 50/312] eta: 0:02:57 lr: 0.000449 min_lr: 0.000449 loss: 3.6091 (3.6356) weight_decay: 0.0500 (0.0500) time: 0.5212 data: 0.2127 max mem: 21002 Epoch: [239] [ 60/312] eta: 0:02:35 lr: 0.000449 min_lr: 0.000449 loss: 3.5478 (3.5923) weight_decay: 0.0500 (0.0500) time: 0.4718 data: 0.1651 max mem: 21002 Epoch: [239] [ 70/312] eta: 0:02:32 lr: 0.000448 min_lr: 0.000448 loss: 3.8460 (3.6355) weight_decay: 0.0500 (0.0500) time: 0.4999 data: 0.1808 max mem: 21002 Epoch: [239] [ 80/312] eta: 0:02:23 lr: 0.000448 min_lr: 0.000448 loss: 3.8460 (3.6197) weight_decay: 0.0500 (0.0500) time: 0.6318 data: 0.2712 max mem: 21002 Epoch: [239] [ 90/312] eta: 0:02:11 lr: 0.000447 min_lr: 0.000447 loss: 3.4965 (3.5999) weight_decay: 0.0500 (0.0500) time: 0.4648 data: 0.1355 max mem: 21002 Epoch: [239] [100/312] eta: 0:02:06 lr: 0.000447 min_lr: 0.000447 loss: 3.7602 (3.6309) weight_decay: 0.0500 (0.0500) time: 0.5079 data: 0.1828 max mem: 21002 Epoch: [239] [110/312] eta: 0:01:55 lr: 0.000446 min_lr: 0.000446 loss: 3.8868 (3.6408) weight_decay: 0.0500 (0.0500) time: 0.4771 data: 0.1539 max mem: 21002 Epoch: [239] [120/312] eta: 0:01:53 lr: 0.000446 min_lr: 0.000446 loss: 3.7419 (3.6308) weight_decay: 0.0500 (0.0500) time: 0.5445 data: 0.1705 max mem: 21002 Epoch: [239] [130/312] eta: 0:01:47 lr: 0.000445 min_lr: 0.000445 loss: 3.7854 (3.6402) weight_decay: 0.0500 (0.0500) time: 0.6816 data: 0.2134 max mem: 21002 Epoch: [239] [140/312] eta: 0:01:38 lr: 0.000445 min_lr: 0.000445 loss: 3.6709 (3.6249) weight_decay: 0.0500 (0.0500) time: 0.4652 data: 0.0873 max mem: 21002 Epoch: [239] [150/312] eta: 0:01:33 lr: 0.000445 min_lr: 0.000445 loss: 3.6455 (3.6338) weight_decay: 0.0500 (0.0500) time: 0.5143 data: 0.1415 max mem: 21002 Epoch: [239] [160/312] eta: 0:01:27 lr: 0.000444 min_lr: 0.000444 loss: 3.8351 (3.6363) weight_decay: 0.0500 (0.0500) time: 0.5951 data: 0.1358 max mem: 21002 Epoch: [239] [170/312] eta: 0:01:20 lr: 0.000444 min_lr: 0.000444 loss: 3.7602 (3.6306) weight_decay: 0.0500 (0.0500) time: 0.4712 data: 0.1031 max mem: 21002 Epoch: [239] [180/312] eta: 0:01:15 lr: 0.000443 min_lr: 0.000443 loss: 3.3608 (3.6185) weight_decay: 0.0500 (0.0500) time: 0.5343 data: 0.1480 max mem: 21002 Epoch: [239] [190/312] eta: 0:01:08 lr: 0.000443 min_lr: 0.000443 loss: 3.3608 (3.6075) weight_decay: 0.0500 (0.0500) time: 0.5035 data: 0.1143 max mem: 21002 Epoch: [239] [200/312] eta: 0:01:03 lr: 0.000442 min_lr: 0.000442 loss: 3.6802 (3.6090) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.1715 max mem: 21002 Epoch: [239] [210/312] eta: 0:00:57 lr: 0.000442 min_lr: 0.000442 loss: 3.6802 (3.6059) weight_decay: 0.0500 (0.0500) time: 0.6250 data: 0.1255 max mem: 21002 Epoch: [239] [220/312] eta: 0:00:51 lr: 0.000441 min_lr: 0.000441 loss: 3.5446 (3.6069) weight_decay: 0.0500 (0.0500) time: 0.4277 data: 0.0545 max mem: 21002 Epoch: [239] [230/312] eta: 0:00:46 lr: 0.000441 min_lr: 0.000441 loss: 3.7586 (3.6076) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.1759 max mem: 21002 Epoch: [239] [240/312] eta: 0:00:40 lr: 0.000440 min_lr: 0.000440 loss: 3.8105 (3.6207) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.1229 max mem: 21002 Epoch: [239] [250/312] eta: 0:00:34 lr: 0.000440 min_lr: 0.000440 loss: 3.8342 (3.6209) weight_decay: 0.0500 (0.0500) time: 0.4552 data: 0.0686 max mem: 21002 Epoch: [239] [260/312] eta: 0:00:29 lr: 0.000440 min_lr: 0.000440 loss: 3.8318 (3.6259) weight_decay: 0.0500 (0.0500) time: 0.5293 data: 0.0924 max mem: 21002 Epoch: [239] [270/312] eta: 0:00:23 lr: 0.000439 min_lr: 0.000439 loss: 3.9188 (3.6350) weight_decay: 0.0500 (0.0500) time: 0.4932 data: 0.0900 max mem: 21002 Epoch: [239] [280/312] eta: 0:00:17 lr: 0.000439 min_lr: 0.000439 loss: 3.6966 (3.6292) weight_decay: 0.0500 (0.0500) time: 0.5551 data: 0.1625 max mem: 21002 Epoch: [239] [290/312] eta: 0:00:12 lr: 0.000438 min_lr: 0.000438 loss: 3.6966 (3.6338) weight_decay: 0.0500 (0.0500) time: 0.5835 data: 0.0972 max mem: 21002 Epoch: [239] [300/312] eta: 0:00:06 lr: 0.000438 min_lr: 0.000438 loss: 3.9145 (3.6387) weight_decay: 0.0500 (0.0500) time: 0.4394 data: 0.0621 max mem: 21002 Epoch: [239] [310/312] eta: 0:00:01 lr: 0.000437 min_lr: 0.000437 loss: 3.8827 (3.6453) weight_decay: 0.0500 (0.0500) time: 0.3382 data: 0.0621 max mem: 21002 Epoch: [239] [311/312] eta: 0:00:00 lr: 0.000437 min_lr: 0.000437 loss: 3.9145 (3.6478) weight_decay: 0.0500 (0.0500) time: 0.3245 data: 0.0478 max mem: 21002 Epoch: [239] Total time: 0:02:49 (0.5442 s / it) Averaged stats: lr: 0.000437 min_lr: 0.000437 loss: 3.9145 (3.6634) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.2604 (1.2604) acc1: 79.6875 (79.6875) acc5: 94.0104 (94.0104) time: 8.4901 data: 8.3720 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.6540 (1.5792) acc1: 68.2292 (69.6640) acc5: 88.2812 (89.2480) time: 1.0370 data: 0.9400 max mem: 21002 Test: Total time: 0:00:09 (1.0473 s / it) * Acc@1 69.182 Acc@5 89.416 loss 1.582 Accuracy of the model on the 50000 test images: 69.2% Max accuracy: 70.64% Epoch: [240] [ 0/312] eta: 0:59:51 lr: 0.000437 min_lr: 0.000437 loss: 4.3861 (4.3861) weight_decay: 0.0500 (0.0500) time: 11.5111 data: 8.9708 max mem: 21002 Epoch: [240] [ 10/312] eta: 0:07:27 lr: 0.000437 min_lr: 0.000437 loss: 3.8459 (3.6848) weight_decay: 0.0500 (0.0500) time: 1.4813 data: 0.9573 max mem: 21002 Epoch: [240] [ 20/312] eta: 0:05:09 lr: 0.000436 min_lr: 0.000436 loss: 3.5274 (3.5959) weight_decay: 0.0500 (0.0500) time: 0.5377 data: 0.0935 max mem: 21002 Epoch: [240] [ 30/312] eta: 0:03:58 lr: 0.000436 min_lr: 0.000436 loss: 3.3835 (3.4945) weight_decay: 0.0500 (0.0500) time: 0.4985 data: 0.0717 max mem: 21002 Epoch: [240] [ 40/312] eta: 0:03:35 lr: 0.000435 min_lr: 0.000435 loss: 3.6080 (3.5557) weight_decay: 0.0500 (0.0500) time: 0.5131 data: 0.0726 max mem: 21002 Epoch: [240] [ 50/312] eta: 0:03:20 lr: 0.000435 min_lr: 0.000435 loss: 3.7189 (3.5936) weight_decay: 0.0500 (0.0500) time: 0.6350 data: 0.0665 max mem: 21002 Epoch: [240] [ 60/312] eta: 0:02:52 lr: 0.000435 min_lr: 0.000435 loss: 3.7170 (3.5953) weight_decay: 0.0500 (0.0500) time: 0.4670 data: 0.0512 max mem: 21002 Epoch: [240] [ 70/312] eta: 0:02:46 lr: 0.000434 min_lr: 0.000434 loss: 3.4886 (3.5783) weight_decay: 0.0500 (0.0500) time: 0.4956 data: 0.0748 max mem: 21002 Epoch: [240] [ 80/312] eta: 0:02:34 lr: 0.000434 min_lr: 0.000434 loss: 3.4510 (3.5875) weight_decay: 0.0500 (0.0500) time: 0.6035 data: 0.0741 max mem: 21002 Epoch: [240] [ 90/312] eta: 0:02:21 lr: 0.000433 min_lr: 0.000433 loss: 3.8141 (3.6199) weight_decay: 0.0500 (0.0500) time: 0.4483 data: 0.0393 max mem: 21002 Epoch: [240] [100/312] eta: 0:02:15 lr: 0.000433 min_lr: 0.000433 loss: 3.8633 (3.6368) weight_decay: 0.0500 (0.0500) time: 0.5403 data: 0.0981 max mem: 21002 Epoch: [240] [110/312] eta: 0:02:03 lr: 0.000432 min_lr: 0.000432 loss: 3.8599 (3.6289) weight_decay: 0.0500 (0.0500) time: 0.4903 data: 0.0630 max mem: 21002 Epoch: [240] [120/312] eta: 0:01:58 lr: 0.000432 min_lr: 0.000432 loss: 3.4771 (3.6145) weight_decay: 0.0500 (0.0500) time: 0.4830 data: 0.0832 max mem: 21002 Epoch: [240] [130/312] eta: 0:01:51 lr: 0.000431 min_lr: 0.000431 loss: 3.7921 (3.6357) weight_decay: 0.0500 (0.0500) time: 0.6145 data: 0.1446 max mem: 21002 Epoch: [240] [140/312] eta: 0:01:41 lr: 0.000431 min_lr: 0.000431 loss: 3.7665 (3.6371) weight_decay: 0.0500 (0.0500) time: 0.4228 data: 0.0697 max mem: 21002 Epoch: [240] [150/312] eta: 0:01:37 lr: 0.000431 min_lr: 0.000431 loss: 3.7648 (3.6445) weight_decay: 0.0500 (0.0500) time: 0.5293 data: 0.0933 max mem: 21002 Epoch: [240] [160/312] eta: 0:01:30 lr: 0.000430 min_lr: 0.000430 loss: 3.9287 (3.6476) weight_decay: 0.0500 (0.0500) time: 0.6613 data: 0.0923 max mem: 21002 Epoch: [240] [170/312] eta: 0:01:23 lr: 0.000430 min_lr: 0.000430 loss: 3.9229 (3.6576) weight_decay: 0.0500 (0.0500) time: 0.4834 data: 0.0688 max mem: 21002 Epoch: [240] [180/312] eta: 0:01:18 lr: 0.000429 min_lr: 0.000429 loss: 3.8297 (3.6483) weight_decay: 0.0500 (0.0500) time: 0.5550 data: 0.1109 max mem: 21002 Epoch: [240] [190/312] eta: 0:01:10 lr: 0.000429 min_lr: 0.000429 loss: 3.5116 (3.6456) weight_decay: 0.0500 (0.0500) time: 0.5000 data: 0.0501 max mem: 21002 Epoch: [240] [200/312] eta: 0:01:04 lr: 0.000428 min_lr: 0.000428 loss: 3.7797 (3.6451) weight_decay: 0.0500 (0.0500) time: 0.4699 data: 0.0685 max mem: 21002 Epoch: [240] [210/312] eta: 0:00:59 lr: 0.000428 min_lr: 0.000428 loss: 3.7949 (3.6459) weight_decay: 0.0500 (0.0500) time: 0.6543 data: 0.1349 max mem: 21002 Epoch: [240] [220/312] eta: 0:00:52 lr: 0.000427 min_lr: 0.000427 loss: 3.4958 (3.6304) weight_decay: 0.0500 (0.0500) time: 0.5057 data: 0.0980 max mem: 21002 Epoch: [240] [230/312] eta: 0:00:47 lr: 0.000427 min_lr: 0.000427 loss: 3.2848 (3.6317) weight_decay: 0.0500 (0.0500) time: 0.5056 data: 0.0739 max mem: 21002 Epoch: [240] [240/312] eta: 0:00:41 lr: 0.000427 min_lr: 0.000427 loss: 3.7943 (3.6361) weight_decay: 0.0500 (0.0500) time: 0.5624 data: 0.0472 max mem: 21002 Epoch: [240] [250/312] eta: 0:00:35 lr: 0.000426 min_lr: 0.000426 loss: 3.8418 (3.6410) weight_decay: 0.0500 (0.0500) time: 0.4687 data: 0.0582 max mem: 21002 Epoch: [240] [260/312] eta: 0:00:29 lr: 0.000426 min_lr: 0.000426 loss: 3.8418 (3.6476) weight_decay: 0.0500 (0.0500) time: 0.5517 data: 0.0932 max mem: 21002 Epoch: [240] [270/312] eta: 0:00:23 lr: 0.000425 min_lr: 0.000425 loss: 3.8035 (3.6470) weight_decay: 0.0500 (0.0500) time: 0.4660 data: 0.0466 max mem: 21002 Epoch: [240] [280/312] eta: 0:00:18 lr: 0.000425 min_lr: 0.000425 loss: 3.7127 (3.6509) weight_decay: 0.0500 (0.0500) time: 0.5261 data: 0.0913 max mem: 21002 Epoch: [240] [290/312] eta: 0:00:12 lr: 0.000424 min_lr: 0.000424 loss: 3.6267 (3.6417) weight_decay: 0.0500 (0.0500) time: 0.5986 data: 0.0809 max mem: 21002 Epoch: [240] [300/312] eta: 0:00:06 lr: 0.000424 min_lr: 0.000424 loss: 3.3689 (3.6320) weight_decay: 0.0500 (0.0500) time: 0.4064 data: 0.0387 max mem: 21002 Epoch: [240] [310/312] eta: 0:00:01 lr: 0.000423 min_lr: 0.000423 loss: 3.6239 (3.6406) weight_decay: 0.0500 (0.0500) time: 0.3169 data: 0.0380 max mem: 21002 Epoch: [240] [311/312] eta: 0:00:00 lr: 0.000423 min_lr: 0.000423 loss: 3.6239 (3.6369) weight_decay: 0.0500 (0.0500) time: 0.3168 data: 0.0380 max mem: 21002 Epoch: [240] Total time: 0:02:51 (0.5508 s / it) Averaged stats: lr: 0.000423 min_lr: 0.000423 loss: 3.6239 (3.6312) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:15 loss: 1.1192 (1.1192) acc1: 79.4271 (79.4271) acc5: 94.5312 (94.5312) time: 8.3596 data: 8.2407 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5143 (1.4183) acc1: 70.9635 (71.3600) acc5: 90.6250 (90.3840) time: 1.0138 data: 0.9157 max mem: 21002 Test: Total time: 0:00:09 (1.0342 s / it) * Acc@1 70.506 Acc@5 90.368 loss 1.430 Accuracy of the model on the 50000 test images: 70.5% Max accuracy: 70.64% Epoch: [241] [ 0/312] eta: 1:00:17 lr: 0.000423 min_lr: 0.000423 loss: 3.4145 (3.4145) weight_decay: 0.0500 (0.0500) time: 11.5954 data: 9.3996 max mem: 21002 Epoch: [241] [ 10/312] eta: 0:08:52 lr: 0.000423 min_lr: 0.000423 loss: 3.8375 (3.6056) weight_decay: 0.0500 (0.0500) time: 1.7617 data: 1.0165 max mem: 21002 Epoch: [241] [ 20/312] eta: 0:05:16 lr: 0.000422 min_lr: 0.000422 loss: 3.9036 (3.6421) weight_decay: 0.0500 (0.0500) time: 0.5597 data: 0.1172 max mem: 21002 Epoch: [241] [ 30/312] eta: 0:03:53 lr: 0.000422 min_lr: 0.000422 loss: 3.6260 (3.5254) weight_decay: 0.0500 (0.0500) time: 0.3163 data: 0.0284 max mem: 21002 Epoch: [241] [ 40/312] eta: 0:03:09 lr: 0.000422 min_lr: 0.000422 loss: 3.5420 (3.5895) weight_decay: 0.0500 (0.0500) time: 0.2913 data: 0.0010 max mem: 21002 Epoch: [241] [ 50/312] eta: 0:02:50 lr: 0.000421 min_lr: 0.000421 loss: 3.9582 (3.6567) weight_decay: 0.0500 (0.0500) time: 0.3790 data: 0.0237 max mem: 21002 Epoch: [241] [ 60/312] eta: 0:02:36 lr: 0.000421 min_lr: 0.000421 loss: 3.9604 (3.6194) weight_decay: 0.0500 (0.0500) time: 0.4634 data: 0.1013 max mem: 21002 Epoch: [241] [ 70/312] eta: 0:02:32 lr: 0.000420 min_lr: 0.000420 loss: 3.9780 (3.6533) weight_decay: 0.0500 (0.0500) time: 0.5691 data: 0.1443 max mem: 21002 Epoch: [241] [ 80/312] eta: 0:02:20 lr: 0.000420 min_lr: 0.000420 loss: 3.8754 (3.6273) weight_decay: 0.0500 (0.0500) time: 0.5549 data: 0.1026 max mem: 21002 Epoch: [241] [ 90/312] eta: 0:02:14 lr: 0.000419 min_lr: 0.000419 loss: 3.3421 (3.6086) weight_decay: 0.0500 (0.0500) time: 0.5142 data: 0.1179 max mem: 21002 Epoch: [241] [100/312] eta: 0:02:08 lr: 0.000419 min_lr: 0.000419 loss: 3.8425 (3.6420) weight_decay: 0.0500 (0.0500) time: 0.6107 data: 0.1076 max mem: 21002 Epoch: [241] [110/312] eta: 0:01:58 lr: 0.000418 min_lr: 0.000418 loss: 3.9000 (3.6444) weight_decay: 0.0500 (0.0500) time: 0.5151 data: 0.0644 max mem: 21002 Epoch: [241] [120/312] eta: 0:01:53 lr: 0.000418 min_lr: 0.000418 loss: 3.7862 (3.6547) weight_decay: 0.0500 (0.0500) time: 0.5102 data: 0.0880 max mem: 21002 Epoch: [241] [130/312] eta: 0:01:44 lr: 0.000418 min_lr: 0.000418 loss: 3.8025 (3.6587) weight_decay: 0.0500 (0.0500) time: 0.4999 data: 0.0673 max mem: 21002 Epoch: [241] [140/312] eta: 0:01:39 lr: 0.000417 min_lr: 0.000417 loss: 3.8234 (3.6638) weight_decay: 0.0500 (0.0500) time: 0.5201 data: 0.0909 max mem: 21002 Epoch: [241] [150/312] eta: 0:01:34 lr: 0.000417 min_lr: 0.000417 loss: 3.8841 (3.6633) weight_decay: 0.0500 (0.0500) time: 0.6432 data: 0.1197 max mem: 21002 Epoch: [241] [160/312] eta: 0:01:26 lr: 0.000416 min_lr: 0.000416 loss: 3.6726 (3.6534) weight_decay: 0.0500 (0.0500) time: 0.4852 data: 0.0716 max mem: 21002 Epoch: [241] [170/312] eta: 0:01:22 lr: 0.000416 min_lr: 0.000416 loss: 3.8075 (3.6691) weight_decay: 0.0500 (0.0500) time: 0.5475 data: 0.0895 max mem: 21002 Epoch: [241] [180/312] eta: 0:01:16 lr: 0.000415 min_lr: 0.000415 loss: 3.7998 (3.6580) weight_decay: 0.0500 (0.0500) time: 0.6677 data: 0.1035 max mem: 21002 Epoch: [241] [190/312] eta: 0:01:09 lr: 0.000415 min_lr: 0.000415 loss: 3.7558 (3.6653) weight_decay: 0.0500 (0.0500) time: 0.4864 data: 0.0630 max mem: 21002 Epoch: [241] [200/312] eta: 0:01:04 lr: 0.000415 min_lr: 0.000415 loss: 3.8921 (3.6684) weight_decay: 0.0500 (0.0500) time: 0.5468 data: 0.0683 max mem: 21002 Epoch: [241] [210/312] eta: 0:00:57 lr: 0.000414 min_lr: 0.000414 loss: 3.8737 (3.6625) weight_decay: 0.0500 (0.0500) time: 0.5051 data: 0.0452 max mem: 21002 Epoch: [241] [220/312] eta: 0:00:52 lr: 0.000414 min_lr: 0.000414 loss: 2.9625 (3.6411) weight_decay: 0.0500 (0.0500) time: 0.5263 data: 0.0733 max mem: 21002 Epoch: [241] [230/312] eta: 0:00:46 lr: 0.000413 min_lr: 0.000413 loss: 3.3413 (3.6476) weight_decay: 0.0500 (0.0500) time: 0.6597 data: 0.0731 max mem: 21002 Epoch: [241] [240/312] eta: 0:00:40 lr: 0.000413 min_lr: 0.000413 loss: 3.8828 (3.6466) weight_decay: 0.0500 (0.0500) time: 0.4784 data: 0.0304 max mem: 21002 Epoch: [241] [250/312] eta: 0:00:35 lr: 0.000412 min_lr: 0.000412 loss: 3.6271 (3.6441) weight_decay: 0.0500 (0.0500) time: 0.5204 data: 0.0671 max mem: 21002 Epoch: [241] [260/312] eta: 0:00:29 lr: 0.000412 min_lr: 0.000412 loss: 3.6216 (3.6420) weight_decay: 0.0500 (0.0500) time: 0.6076 data: 0.0374 max mem: 21002 Epoch: [241] [270/312] eta: 0:00:23 lr: 0.000411 min_lr: 0.000411 loss: 3.7167 (3.6457) weight_decay: 0.0500 (0.0500) time: 0.5057 data: 0.0293 max mem: 21002 Epoch: [241] [280/312] eta: 0:00:18 lr: 0.000411 min_lr: 0.000411 loss: 3.7716 (3.6488) weight_decay: 0.0500 (0.0500) time: 0.6056 data: 0.0791 max mem: 21002 Epoch: [241] [290/312] eta: 0:00:12 lr: 0.000411 min_lr: 0.000411 loss: 3.8378 (3.6525) weight_decay: 0.0500 (0.0500) time: 0.5207 data: 0.0506 max mem: 21002 Epoch: [241] [300/312] eta: 0:00:06 lr: 0.000410 min_lr: 0.000410 loss: 3.9390 (3.6537) weight_decay: 0.0500 (0.0500) time: 0.3828 data: 0.0148 max mem: 21002 Epoch: [241] [310/312] eta: 0:00:01 lr: 0.000410 min_lr: 0.000410 loss: 3.6685 (3.6481) weight_decay: 0.0500 (0.0500) time: 0.3774 data: 0.0145 max mem: 21002 Epoch: [241] [311/312] eta: 0:00:00 lr: 0.000410 min_lr: 0.000410 loss: 3.8088 (3.6488) weight_decay: 0.0500 (0.0500) time: 0.3731 data: 0.0097 max mem: 21002 Epoch: [241] Total time: 0:02:52 (0.5514 s / it) Averaged stats: lr: 0.000410 min_lr: 0.000410 loss: 3.8088 (3.6475) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:11 loss: 1.2249 (1.2249) acc1: 77.3438 (77.3438) acc5: 93.3594 (93.3594) time: 7.9246 data: 7.8086 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5463 (1.4651) acc1: 68.3594 (69.7760) acc5: 88.8021 (89.3280) time: 1.0278 data: 0.9297 max mem: 21002 Test: Total time: 0:00:09 (1.0412 s / it) * Acc@1 69.482 Acc@5 89.796 loss 1.468 Accuracy of the model on the 50000 test images: 69.5% Max accuracy: 70.64% Epoch: [242] [ 0/312] eta: 1:06:29 lr: 0.000410 min_lr: 0.000410 loss: 2.7114 (2.7114) weight_decay: 0.0500 (0.0500) time: 12.7853 data: 10.2701 max mem: 21002 Epoch: [242] [ 10/312] eta: 0:08:20 lr: 0.000409 min_lr: 0.000409 loss: 3.8019 (3.5576) weight_decay: 0.0500 (0.0500) time: 1.6562 data: 1.0814 max mem: 21002 Epoch: [242] [ 20/312] eta: 0:05:15 lr: 0.000409 min_lr: 0.000409 loss: 3.8019 (3.5798) weight_decay: 0.0500 (0.0500) time: 0.4961 data: 0.1116 max mem: 21002 Epoch: [242] [ 30/312] eta: 0:03:53 lr: 0.000408 min_lr: 0.000408 loss: 3.7774 (3.6205) weight_decay: 0.0500 (0.0500) time: 0.3718 data: 0.0307 max mem: 21002 Epoch: [242] [ 40/312] eta: 0:03:19 lr: 0.000408 min_lr: 0.000408 loss: 3.6126 (3.5618) weight_decay: 0.0500 (0.0500) time: 0.3690 data: 0.0473 max mem: 21002 Epoch: [242] [ 50/312] eta: 0:03:10 lr: 0.000407 min_lr: 0.000407 loss: 3.4964 (3.5702) weight_decay: 0.0500 (0.0500) time: 0.5712 data: 0.2008 max mem: 21002 Epoch: [242] [ 60/312] eta: 0:02:45 lr: 0.000407 min_lr: 0.000407 loss: 3.8397 (3.6039) weight_decay: 0.0500 (0.0500) time: 0.4932 data: 0.1542 max mem: 21002 Epoch: [242] [ 70/312] eta: 0:02:39 lr: 0.000407 min_lr: 0.000407 loss: 3.8057 (3.5923) weight_decay: 0.0500 (0.0500) time: 0.4923 data: 0.1577 max mem: 21002 Epoch: [242] [ 80/312] eta: 0:02:30 lr: 0.000406 min_lr: 0.000406 loss: 3.5061 (3.5879) weight_decay: 0.0500 (0.0500) time: 0.6318 data: 0.2147 max mem: 21002 Epoch: [242] [ 90/312] eta: 0:02:18 lr: 0.000406 min_lr: 0.000406 loss: 3.8016 (3.6077) weight_decay: 0.0500 (0.0500) time: 0.5014 data: 0.1320 max mem: 21002 Epoch: [242] [100/312] eta: 0:02:13 lr: 0.000405 min_lr: 0.000405 loss: 3.6425 (3.5906) weight_decay: 0.0500 (0.0500) time: 0.5478 data: 0.2091 max mem: 21002 Epoch: [242] [110/312] eta: 0:02:00 lr: 0.000405 min_lr: 0.000405 loss: 3.6863 (3.6036) weight_decay: 0.0500 (0.0500) time: 0.4742 data: 0.1348 max mem: 21002 Epoch: [242] [120/312] eta: 0:01:56 lr: 0.000404 min_lr: 0.000404 loss: 3.8672 (3.6097) weight_decay: 0.0500 (0.0500) time: 0.4811 data: 0.1198 max mem: 21002 Epoch: [242] [130/312] eta: 0:01:50 lr: 0.000404 min_lr: 0.000404 loss: 3.9335 (3.6290) weight_decay: 0.0500 (0.0500) time: 0.6714 data: 0.2283 max mem: 21002 Epoch: [242] [140/312] eta: 0:01:40 lr: 0.000404 min_lr: 0.000404 loss: 3.9901 (3.6382) weight_decay: 0.0500 (0.0500) time: 0.4789 data: 0.1091 max mem: 21002 Epoch: [242] [150/312] eta: 0:01:36 lr: 0.000403 min_lr: 0.000403 loss: 3.5881 (3.6247) weight_decay: 0.0500 (0.0500) time: 0.4831 data: 0.1142 max mem: 21002 Epoch: [242] [160/312] eta: 0:01:29 lr: 0.000403 min_lr: 0.000403 loss: 3.6586 (3.6371) weight_decay: 0.0500 (0.0500) time: 0.6231 data: 0.2216 max mem: 21002 Epoch: [242] [170/312] eta: 0:01:22 lr: 0.000402 min_lr: 0.000402 loss: 3.6393 (3.6219) weight_decay: 0.0500 (0.0500) time: 0.4985 data: 0.1537 max mem: 21002 Epoch: [242] [180/312] eta: 0:01:17 lr: 0.000402 min_lr: 0.000402 loss: 3.4437 (3.6221) weight_decay: 0.0500 (0.0500) time: 0.5648 data: 0.1758 max mem: 21002 Epoch: [242] [190/312] eta: 0:01:09 lr: 0.000401 min_lr: 0.000401 loss: 3.9045 (3.6320) weight_decay: 0.0500 (0.0500) time: 0.4965 data: 0.1302 max mem: 21002 Epoch: [242] [200/312] eta: 0:01:04 lr: 0.000401 min_lr: 0.000401 loss: 3.7474 (3.6171) weight_decay: 0.0500 (0.0500) time: 0.4862 data: 0.1153 max mem: 21002 Epoch: [242] [210/312] eta: 0:00:59 lr: 0.000401 min_lr: 0.000401 loss: 3.4376 (3.6095) weight_decay: 0.0500 (0.0500) time: 0.7039 data: 0.2115 max mem: 21002 Epoch: [242] [220/312] eta: 0:00:52 lr: 0.000400 min_lr: 0.000400 loss: 3.8372 (3.6167) weight_decay: 0.0500 (0.0500) time: 0.5397 data: 0.1274 max mem: 21002 Epoch: [242] [230/312] eta: 0:00:47 lr: 0.000400 min_lr: 0.000400 loss: 3.8797 (3.6215) weight_decay: 0.0500 (0.0500) time: 0.5021 data: 0.1106 max mem: 21002 Epoch: [242] [240/312] eta: 0:00:41 lr: 0.000399 min_lr: 0.000399 loss: 3.8797 (3.6278) weight_decay: 0.0500 (0.0500) time: 0.5872 data: 0.0807 max mem: 21002 Epoch: [242] [250/312] eta: 0:00:35 lr: 0.000399 min_lr: 0.000399 loss: 3.6028 (3.6227) weight_decay: 0.0500 (0.0500) time: 0.4902 data: 0.0873 max mem: 21002 Epoch: [242] [260/312] eta: 0:00:30 lr: 0.000398 min_lr: 0.000398 loss: 3.6913 (3.6256) weight_decay: 0.0500 (0.0500) time: 0.6089 data: 0.1845 max mem: 21002 Epoch: [242] [270/312] eta: 0:00:23 lr: 0.000398 min_lr: 0.000398 loss: 3.7265 (3.6169) weight_decay: 0.0500 (0.0500) time: 0.5230 data: 0.0987 max mem: 21002 Epoch: [242] [280/312] eta: 0:00:18 lr: 0.000398 min_lr: 0.000398 loss: 3.7906 (3.6188) weight_decay: 0.0500 (0.0500) time: 0.4552 data: 0.0788 max mem: 21002 Epoch: [242] [290/312] eta: 0:00:12 lr: 0.000397 min_lr: 0.000397 loss: 3.8945 (3.6260) weight_decay: 0.0500 (0.0500) time: 0.5653 data: 0.1186 max mem: 21002 Epoch: [242] [300/312] eta: 0:00:06 lr: 0.000397 min_lr: 0.000397 loss: 3.9983 (3.6295) weight_decay: 0.0500 (0.0500) time: 0.4061 data: 0.0541 max mem: 21002 Epoch: [242] [310/312] eta: 0:00:01 lr: 0.000396 min_lr: 0.000396 loss: 4.0274 (3.6406) weight_decay: 0.0500 (0.0500) time: 0.2908 data: 0.0140 max mem: 21002 Epoch: [242] [311/312] eta: 0:00:00 lr: 0.000396 min_lr: 0.000396 loss: 4.0189 (3.6415) weight_decay: 0.0500 (0.0500) time: 0.2906 data: 0.0140 max mem: 21002 Epoch: [242] Total time: 0:02:52 (0.5524 s / it) Averaged stats: lr: 0.000396 min_lr: 0.000396 loss: 4.0189 (3.6635) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.2456 (1.2456) acc1: 79.2969 (79.2969) acc5: 94.1406 (94.1406) time: 8.7366 data: 8.6183 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5791 (1.5325) acc1: 71.0938 (71.4560) acc5: 90.8854 (90.5120) time: 1.0631 data: 0.9577 max mem: 21002 Test: Total time: 0:00:09 (1.0737 s / it) * Acc@1 70.722 Acc@5 90.362 loss 1.543 Accuracy of the model on the 50000 test images: 70.7% Max accuracy: 70.72% Epoch: [243] [ 0/312] eta: 1:01:20 lr: 0.000396 min_lr: 0.000396 loss: 3.8842 (3.8842) weight_decay: 0.0500 (0.0500) time: 11.7968 data: 10.4995 max mem: 21002 Epoch: [243] [ 10/312] eta: 0:08:04 lr: 0.000396 min_lr: 0.000396 loss: 3.8842 (3.8028) weight_decay: 0.0500 (0.0500) time: 1.6040 data: 1.0660 max mem: 21002 Epoch: [243] [ 20/312] eta: 0:05:04 lr: 0.000395 min_lr: 0.000395 loss: 3.8512 (3.7126) weight_decay: 0.0500 (0.0500) time: 0.5034 data: 0.1141 max mem: 21002 Epoch: [243] [ 30/312] eta: 0:03:45 lr: 0.000395 min_lr: 0.000395 loss: 3.7218 (3.6984) weight_decay: 0.0500 (0.0500) time: 0.3551 data: 0.0533 max mem: 21002 Epoch: [243] [ 40/312] eta: 0:03:19 lr: 0.000394 min_lr: 0.000394 loss: 3.6820 (3.6725) weight_decay: 0.0500 (0.0500) time: 0.4095 data: 0.0810 max mem: 21002 Epoch: [243] [ 50/312] eta: 0:03:09 lr: 0.000394 min_lr: 0.000394 loss: 3.6652 (3.6624) weight_decay: 0.0500 (0.0500) time: 0.6089 data: 0.2104 max mem: 21002 Epoch: [243] [ 60/312] eta: 0:02:44 lr: 0.000394 min_lr: 0.000394 loss: 3.6652 (3.6529) weight_decay: 0.0500 (0.0500) time: 0.4878 data: 0.1303 max mem: 21002 Epoch: [243] [ 70/312] eta: 0:02:39 lr: 0.000393 min_lr: 0.000393 loss: 3.7754 (3.6444) weight_decay: 0.0500 (0.0500) time: 0.5008 data: 0.1314 max mem: 21002 Epoch: [243] [ 80/312] eta: 0:02:29 lr: 0.000393 min_lr: 0.000393 loss: 3.8310 (3.6625) weight_decay: 0.0500 (0.0500) time: 0.6174 data: 0.2486 max mem: 21002 Epoch: [243] [ 90/312] eta: 0:02:18 lr: 0.000392 min_lr: 0.000392 loss: 3.7808 (3.6567) weight_decay: 0.0500 (0.0500) time: 0.4890 data: 0.1181 max mem: 21002 Epoch: [243] [100/312] eta: 0:02:13 lr: 0.000392 min_lr: 0.000392 loss: 3.7885 (3.6726) weight_decay: 0.0500 (0.0500) time: 0.5836 data: 0.1201 max mem: 21002 Epoch: [243] [110/312] eta: 0:02:01 lr: 0.000391 min_lr: 0.000391 loss: 3.7885 (3.6743) weight_decay: 0.0500 (0.0500) time: 0.4983 data: 0.1199 max mem: 21002 Epoch: [243] [120/312] eta: 0:01:56 lr: 0.000391 min_lr: 0.000391 loss: 3.7267 (3.6891) weight_decay: 0.0500 (0.0500) time: 0.4838 data: 0.0993 max mem: 21002 Epoch: [243] [130/312] eta: 0:01:52 lr: 0.000391 min_lr: 0.000391 loss: 3.8667 (3.6962) weight_decay: 0.0500 (0.0500) time: 0.7008 data: 0.2187 max mem: 21002 Epoch: [243] [140/312] eta: 0:01:41 lr: 0.000390 min_lr: 0.000390 loss: 3.9154 (3.6963) weight_decay: 0.0500 (0.0500) time: 0.5030 data: 0.1202 max mem: 21002 Epoch: [243] [150/312] eta: 0:01:37 lr: 0.000390 min_lr: 0.000390 loss: 3.8673 (3.7006) weight_decay: 0.0500 (0.0500) time: 0.4979 data: 0.0944 max mem: 21002 Epoch: [243] [160/312] eta: 0:01:29 lr: 0.000389 min_lr: 0.000389 loss: 4.0387 (3.7135) weight_decay: 0.0500 (0.0500) time: 0.5775 data: 0.1729 max mem: 21002 Epoch: [243] [170/312] eta: 0:01:23 lr: 0.000389 min_lr: 0.000389 loss: 3.9517 (3.7248) weight_decay: 0.0500 (0.0500) time: 0.4964 data: 0.0792 max mem: 21002 Epoch: [243] [180/312] eta: 0:01:18 lr: 0.000388 min_lr: 0.000388 loss: 3.9517 (3.7329) weight_decay: 0.0500 (0.0500) time: 0.6089 data: 0.0732 max mem: 21002 Epoch: [243] [190/312] eta: 0:01:10 lr: 0.000388 min_lr: 0.000388 loss: 3.9482 (3.7360) weight_decay: 0.0500 (0.0500) time: 0.4846 data: 0.0732 max mem: 21002 Epoch: [243] [200/312] eta: 0:01:05 lr: 0.000388 min_lr: 0.000388 loss: 3.6022 (3.7218) weight_decay: 0.0500 (0.0500) time: 0.4983 data: 0.0751 max mem: 21002 Epoch: [243] [210/312] eta: 0:01:00 lr: 0.000387 min_lr: 0.000387 loss: 3.5013 (3.7254) weight_decay: 0.0500 (0.0500) time: 0.6990 data: 0.1082 max mem: 21002 Epoch: [243] [220/312] eta: 0:00:52 lr: 0.000387 min_lr: 0.000387 loss: 3.6344 (3.7222) weight_decay: 0.0500 (0.0500) time: 0.4997 data: 0.0338 max mem: 21002 Epoch: [243] [230/312] eta: 0:00:47 lr: 0.000386 min_lr: 0.000386 loss: 3.8735 (3.7235) weight_decay: 0.0500 (0.0500) time: 0.5007 data: 0.0477 max mem: 21002 Epoch: [243] [240/312] eta: 0:00:41 lr: 0.000386 min_lr: 0.000386 loss: 3.8735 (3.7237) weight_decay: 0.0500 (0.0500) time: 0.5592 data: 0.1076 max mem: 21002 Epoch: [243] [250/312] eta: 0:00:35 lr: 0.000385 min_lr: 0.000385 loss: 3.6326 (3.7100) weight_decay: 0.0500 (0.0500) time: 0.5135 data: 0.0606 max mem: 21002 Epoch: [243] [260/312] eta: 0:00:30 lr: 0.000385 min_lr: 0.000385 loss: 3.3128 (3.7010) weight_decay: 0.0500 (0.0500) time: 0.6606 data: 0.0513 max mem: 21002 Epoch: [243] [270/312] eta: 0:00:23 lr: 0.000385 min_lr: 0.000385 loss: 3.5447 (3.6985) weight_decay: 0.0500 (0.0500) time: 0.5023 data: 0.0518 max mem: 21002 Epoch: [243] [280/312] eta: 0:00:18 lr: 0.000384 min_lr: 0.000384 loss: 3.6267 (3.7001) weight_decay: 0.0500 (0.0500) time: 0.4844 data: 0.0215 max mem: 21002 Epoch: [243] [290/312] eta: 0:00:12 lr: 0.000384 min_lr: 0.000384 loss: 3.9153 (3.7048) weight_decay: 0.0500 (0.0500) time: 0.5832 data: 0.0594 max mem: 21002 Epoch: [243] [300/312] eta: 0:00:06 lr: 0.000383 min_lr: 0.000383 loss: 3.9428 (3.7027) weight_decay: 0.0500 (0.0500) time: 0.3842 data: 0.0389 max mem: 21002 Epoch: [243] [310/312] eta: 0:00:01 lr: 0.000383 min_lr: 0.000383 loss: 3.8223 (3.7029) weight_decay: 0.0500 (0.0500) time: 0.2768 data: 0.0001 max mem: 21002 Epoch: [243] [311/312] eta: 0:00:00 lr: 0.000383 min_lr: 0.000383 loss: 3.8223 (3.7030) weight_decay: 0.0500 (0.0500) time: 0.2768 data: 0.0001 max mem: 21002 Epoch: [243] Total time: 0:02:53 (0.5553 s / it) Averaged stats: lr: 0.000383 min_lr: 0.000383 loss: 3.8223 (3.6605) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.1596 (1.1596) acc1: 79.1667 (79.1667) acc5: 94.9219 (94.9219) time: 8.8149 data: 8.6964 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5027 (1.4544) acc1: 70.5729 (70.9760) acc5: 90.4948 (90.3520) time: 1.0680 data: 0.9663 max mem: 21002 Test: Total time: 0:00:09 (1.0821 s / it) * Acc@1 70.678 Acc@5 90.356 loss 1.461 Accuracy of the model on the 50000 test images: 70.7% Max accuracy: 70.72% Epoch: [244] [ 0/312] eta: 1:01:57 lr: 0.000383 min_lr: 0.000383 loss: 3.6579 (3.6579) weight_decay: 0.0500 (0.0500) time: 11.9137 data: 11.5318 max mem: 21002 Epoch: [244] [ 10/312] eta: 0:08:38 lr: 0.000382 min_lr: 0.000382 loss: 4.0683 (3.8333) weight_decay: 0.0500 (0.0500) time: 1.7157 data: 1.0489 max mem: 21002 Epoch: [244] [ 20/312] eta: 0:05:08 lr: 0.000382 min_lr: 0.000382 loss: 3.6716 (3.7002) weight_decay: 0.0500 (0.0500) time: 0.5132 data: 0.0126 max mem: 21002 Epoch: [244] [ 30/312] eta: 0:03:48 lr: 0.000382 min_lr: 0.000382 loss: 3.6716 (3.6699) weight_decay: 0.0500 (0.0500) time: 0.3119 data: 0.0133 max mem: 21002 Epoch: [244] [ 40/312] eta: 0:03:05 lr: 0.000381 min_lr: 0.000381 loss: 3.6941 (3.6412) weight_decay: 0.0500 (0.0500) time: 0.2927 data: 0.0014 max mem: 21002 Epoch: [244] [ 50/312] eta: 0:02:55 lr: 0.000381 min_lr: 0.000381 loss: 3.2977 (3.6131) weight_decay: 0.0500 (0.0500) time: 0.4486 data: 0.1586 max mem: 21002 Epoch: [244] [ 60/312] eta: 0:02:33 lr: 0.000380 min_lr: 0.000380 loss: 3.2867 (3.5834) weight_decay: 0.0500 (0.0500) time: 0.4512 data: 0.1586 max mem: 21002 Epoch: [244] [ 70/312] eta: 0:02:27 lr: 0.000380 min_lr: 0.000380 loss: 3.7589 (3.6447) weight_decay: 0.0500 (0.0500) time: 0.4614 data: 0.1277 max mem: 21002 Epoch: [244] [ 80/312] eta: 0:02:23 lr: 0.000379 min_lr: 0.000379 loss: 3.9779 (3.6424) weight_decay: 0.0500 (0.0500) time: 0.6438 data: 0.2206 max mem: 21002 Epoch: [244] [ 90/312] eta: 0:02:11 lr: 0.000379 min_lr: 0.000379 loss: 3.5315 (3.6579) weight_decay: 0.0500 (0.0500) time: 0.5250 data: 0.1441 max mem: 21002 Epoch: [244] [100/312] eta: 0:02:06 lr: 0.000379 min_lr: 0.000379 loss: 3.9188 (3.6608) weight_decay: 0.0500 (0.0500) time: 0.5119 data: 0.1660 max mem: 21002 Epoch: [244] [110/312] eta: 0:01:55 lr: 0.000378 min_lr: 0.000378 loss: 3.9153 (3.6800) weight_decay: 0.0500 (0.0500) time: 0.4929 data: 0.1411 max mem: 21002 Epoch: [244] [120/312] eta: 0:01:50 lr: 0.000378 min_lr: 0.000378 loss: 3.8296 (3.6802) weight_decay: 0.0500 (0.0500) time: 0.4804 data: 0.1250 max mem: 21002 Epoch: [244] [130/312] eta: 0:01:46 lr: 0.000377 min_lr: 0.000377 loss: 3.4361 (3.6560) weight_decay: 0.0500 (0.0500) time: 0.6581 data: 0.2298 max mem: 21002 Epoch: [244] [140/312] eta: 0:01:37 lr: 0.000377 min_lr: 0.000377 loss: 3.2997 (3.6389) weight_decay: 0.0500 (0.0500) time: 0.5179 data: 0.1316 max mem: 21002 Epoch: [244] [150/312] eta: 0:01:32 lr: 0.000377 min_lr: 0.000377 loss: 3.7578 (3.6570) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.1073 max mem: 21002 Epoch: [244] [160/312] eta: 0:01:26 lr: 0.000376 min_lr: 0.000376 loss: 3.7578 (3.6334) weight_decay: 0.0500 (0.0500) time: 0.5966 data: 0.2316 max mem: 21002 Epoch: [244] [170/312] eta: 0:01:20 lr: 0.000376 min_lr: 0.000376 loss: 3.4271 (3.6390) weight_decay: 0.0500 (0.0500) time: 0.4976 data: 0.1258 max mem: 21002 Epoch: [244] [180/312] eta: 0:01:15 lr: 0.000375 min_lr: 0.000375 loss: 3.6087 (3.6361) weight_decay: 0.0500 (0.0500) time: 0.5492 data: 0.1112 max mem: 21002 Epoch: [244] [190/312] eta: 0:01:08 lr: 0.000375 min_lr: 0.000375 loss: 3.6087 (3.6347) weight_decay: 0.0500 (0.0500) time: 0.5085 data: 0.1203 max mem: 21002 Epoch: [244] [200/312] eta: 0:01:03 lr: 0.000374 min_lr: 0.000374 loss: 3.9403 (3.6425) weight_decay: 0.0500 (0.0500) time: 0.5140 data: 0.1273 max mem: 21002 Epoch: [244] [210/312] eta: 0:00:57 lr: 0.000374 min_lr: 0.000374 loss: 3.9042 (3.6403) weight_decay: 0.0500 (0.0500) time: 0.6447 data: 0.2385 max mem: 21002 Epoch: [244] [220/312] eta: 0:00:51 lr: 0.000374 min_lr: 0.000374 loss: 3.6212 (3.6203) weight_decay: 0.0500 (0.0500) time: 0.4882 data: 0.1211 max mem: 21002 Epoch: [244] [230/312] eta: 0:00:46 lr: 0.000373 min_lr: 0.000373 loss: 3.3295 (3.6205) weight_decay: 0.0500 (0.0500) time: 0.5200 data: 0.1237 max mem: 21002 Epoch: [244] [240/312] eta: 0:00:40 lr: 0.000373 min_lr: 0.000373 loss: 3.7116 (3.6287) weight_decay: 0.0500 (0.0500) time: 0.6306 data: 0.2486 max mem: 21002 Epoch: [244] [250/312] eta: 0:00:34 lr: 0.000372 min_lr: 0.000372 loss: 3.8847 (3.6354) weight_decay: 0.0500 (0.0500) time: 0.5021 data: 0.1257 max mem: 21002 Epoch: [244] [260/312] eta: 0:00:29 lr: 0.000372 min_lr: 0.000372 loss: 3.7481 (3.6362) weight_decay: 0.0500 (0.0500) time: 0.4948 data: 0.1245 max mem: 21002 Epoch: [244] [270/312] eta: 0:00:23 lr: 0.000372 min_lr: 0.000372 loss: 3.5594 (3.6283) weight_decay: 0.0500 (0.0500) time: 0.4875 data: 0.1245 max mem: 21002 Epoch: [244] [280/312] eta: 0:00:17 lr: 0.000371 min_lr: 0.000371 loss: 3.7089 (3.6329) weight_decay: 0.0500 (0.0500) time: 0.5122 data: 0.1382 max mem: 21002 Epoch: [244] [290/312] eta: 0:00:12 lr: 0.000371 min_lr: 0.000371 loss: 3.8204 (3.6397) weight_decay: 0.0500 (0.0500) time: 0.6511 data: 0.2906 max mem: 21002 Epoch: [244] [300/312] eta: 0:00:06 lr: 0.000370 min_lr: 0.000370 loss: 3.8204 (3.6328) weight_decay: 0.0500 (0.0500) time: 0.4851 data: 0.1529 max mem: 21002 Epoch: [244] [310/312] eta: 0:00:01 lr: 0.000370 min_lr: 0.000370 loss: 3.3553 (3.6255) weight_decay: 0.0500 (0.0500) time: 0.3143 data: 0.0310 max mem: 21002 Epoch: [244] [311/312] eta: 0:00:00 lr: 0.000370 min_lr: 0.000370 loss: 3.3619 (3.6257) weight_decay: 0.0500 (0.0500) time: 0.3140 data: 0.0310 max mem: 21002 Epoch: [244] Total time: 0:02:50 (0.5465 s / it) Averaged stats: lr: 0.000370 min_lr: 0.000370 loss: 3.3619 (3.6510) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.1336 (1.1336) acc1: 79.0365 (79.0365) acc5: 94.5312 (94.5312) time: 8.8020 data: 8.6833 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4598 (1.3654) acc1: 70.5729 (71.7120) acc5: 89.9740 (90.5440) time: 1.0631 data: 0.9649 max mem: 21002 Test: Total time: 0:00:09 (1.0888 s / it) * Acc@1 71.344 Acc@5 90.510 loss 1.373 Accuracy of the model on the 50000 test images: 71.3% Max accuracy: 71.34% Epoch: [245] [ 0/312] eta: 1:00:33 lr: 0.000370 min_lr: 0.000370 loss: 3.5391 (3.5391) weight_decay: 0.0500 (0.0500) time: 11.6447 data: 9.2924 max mem: 21002 Epoch: [245] [ 10/312] eta: 0:07:37 lr: 0.000369 min_lr: 0.000369 loss: 3.6434 (3.6717) weight_decay: 0.0500 (0.0500) time: 1.5146 data: 1.0077 max mem: 21002 Epoch: [245] [ 20/312] eta: 0:04:51 lr: 0.000369 min_lr: 0.000369 loss: 3.5752 (3.5458) weight_decay: 0.0500 (0.0500) time: 0.4666 data: 0.1363 max mem: 21002 Epoch: [245] [ 30/312] eta: 0:03:37 lr: 0.000369 min_lr: 0.000369 loss: 3.5354 (3.5537) weight_decay: 0.0500 (0.0500) time: 0.3626 data: 0.0521 max mem: 21002 Epoch: [245] [ 40/312] eta: 0:03:13 lr: 0.000368 min_lr: 0.000368 loss: 3.5354 (3.5372) weight_decay: 0.0500 (0.0500) time: 0.4102 data: 0.0928 max mem: 21002 Epoch: [245] [ 50/312] eta: 0:03:10 lr: 0.000368 min_lr: 0.000368 loss: 3.7071 (3.5838) weight_decay: 0.0500 (0.0500) time: 0.6588 data: 0.2394 max mem: 21002 Epoch: [245] [ 60/312] eta: 0:02:44 lr: 0.000367 min_lr: 0.000367 loss: 3.8285 (3.6057) weight_decay: 0.0500 (0.0500) time: 0.5366 data: 0.1524 max mem: 21002 Epoch: [245] [ 70/312] eta: 0:02:39 lr: 0.000367 min_lr: 0.000367 loss: 3.7084 (3.6132) weight_decay: 0.0500 (0.0500) time: 0.4870 data: 0.0684 max mem: 21002 Epoch: [245] [ 80/312] eta: 0:02:29 lr: 0.000366 min_lr: 0.000366 loss: 3.8080 (3.6391) weight_decay: 0.0500 (0.0500) time: 0.6086 data: 0.1063 max mem: 21002 Epoch: [245] [ 90/312] eta: 0:02:19 lr: 0.000366 min_lr: 0.000366 loss: 3.8152 (3.6485) weight_decay: 0.0500 (0.0500) time: 0.5127 data: 0.1347 max mem: 21002 Epoch: [245] [100/312] eta: 0:02:14 lr: 0.000366 min_lr: 0.000366 loss: 3.6288 (3.6376) weight_decay: 0.0500 (0.0500) time: 0.5961 data: 0.2017 max mem: 21002 Epoch: [245] [110/312] eta: 0:02:01 lr: 0.000365 min_lr: 0.000365 loss: 3.6288 (3.6310) weight_decay: 0.0500 (0.0500) time: 0.4939 data: 0.1056 max mem: 21002 Epoch: [245] [120/312] eta: 0:01:56 lr: 0.000365 min_lr: 0.000365 loss: 3.7600 (3.6289) weight_decay: 0.0500 (0.0500) time: 0.4673 data: 0.0829 max mem: 21002 Epoch: [245] [130/312] eta: 0:01:51 lr: 0.000364 min_lr: 0.000364 loss: 3.5472 (3.6002) weight_decay: 0.0500 (0.0500) time: 0.6660 data: 0.1940 max mem: 21002 Epoch: [245] [140/312] eta: 0:01:41 lr: 0.000364 min_lr: 0.000364 loss: 3.4825 (3.6032) weight_decay: 0.0500 (0.0500) time: 0.4911 data: 0.1118 max mem: 21002 Epoch: [245] [150/312] eta: 0:01:36 lr: 0.000364 min_lr: 0.000364 loss: 3.8376 (3.6142) weight_decay: 0.0500 (0.0500) time: 0.4787 data: 0.1041 max mem: 21002 Epoch: [245] [160/312] eta: 0:01:29 lr: 0.000363 min_lr: 0.000363 loss: 3.8902 (3.6171) weight_decay: 0.0500 (0.0500) time: 0.5650 data: 0.1041 max mem: 21002 Epoch: [245] [170/312] eta: 0:01:23 lr: 0.000363 min_lr: 0.000363 loss: 3.8644 (3.6364) weight_decay: 0.0500 (0.0500) time: 0.5143 data: 0.1403 max mem: 21002 Epoch: [245] [180/312] eta: 0:01:17 lr: 0.000362 min_lr: 0.000362 loss: 3.8654 (3.6435) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.2358 max mem: 21002 Epoch: [245] [190/312] eta: 0:01:09 lr: 0.000362 min_lr: 0.000362 loss: 3.7843 (3.6374) weight_decay: 0.0500 (0.0500) time: 0.4561 data: 0.0962 max mem: 21002 Epoch: [245] [200/312] eta: 0:01:04 lr: 0.000362 min_lr: 0.000362 loss: 3.6752 (3.6317) weight_decay: 0.0500 (0.0500) time: 0.5093 data: 0.1288 max mem: 21002 Epoch: [245] [210/312] eta: 0:00:58 lr: 0.000361 min_lr: 0.000361 loss: 3.7685 (3.6388) weight_decay: 0.0500 (0.0500) time: 0.6263 data: 0.2023 max mem: 21002 Epoch: [245] [220/312] eta: 0:00:51 lr: 0.000361 min_lr: 0.000361 loss: 3.6420 (3.6281) weight_decay: 0.0500 (0.0500) time: 0.4061 data: 0.0742 max mem: 21002 Epoch: [245] [230/312] eta: 0:00:47 lr: 0.000360 min_lr: 0.000360 loss: 3.5674 (3.6293) weight_decay: 0.0500 (0.0500) time: 0.5751 data: 0.1379 max mem: 21002 Epoch: [245] [240/312] eta: 0:00:40 lr: 0.000360 min_lr: 0.000360 loss: 3.4795 (3.6160) weight_decay: 0.0500 (0.0500) time: 0.6171 data: 0.1379 max mem: 21002 Epoch: [245] [250/312] eta: 0:00:35 lr: 0.000359 min_lr: 0.000359 loss: 3.5744 (3.6213) weight_decay: 0.0500 (0.0500) time: 0.4265 data: 0.0982 max mem: 21002 Epoch: [245] [260/312] eta: 0:00:29 lr: 0.000359 min_lr: 0.000359 loss: 3.8369 (3.6270) weight_decay: 0.0500 (0.0500) time: 0.6175 data: 0.2317 max mem: 21002 Epoch: [245] [270/312] eta: 0:00:23 lr: 0.000359 min_lr: 0.000359 loss: 3.9268 (3.6235) weight_decay: 0.0500 (0.0500) time: 0.5214 data: 0.1343 max mem: 21002 Epoch: [245] [280/312] eta: 0:00:18 lr: 0.000358 min_lr: 0.000358 loss: 3.7056 (3.6190) weight_decay: 0.0500 (0.0500) time: 0.4623 data: 0.1172 max mem: 21002 Epoch: [245] [290/312] eta: 0:00:12 lr: 0.000358 min_lr: 0.000358 loss: 3.8398 (3.6254) weight_decay: 0.0500 (0.0500) time: 0.5848 data: 0.1953 max mem: 21002 Epoch: [245] [300/312] eta: 0:00:06 lr: 0.000357 min_lr: 0.000357 loss: 3.7663 (3.6251) weight_decay: 0.0500 (0.0500) time: 0.4072 data: 0.0786 max mem: 21002 Epoch: [245] [310/312] eta: 0:00:01 lr: 0.000357 min_lr: 0.000357 loss: 3.7459 (3.6225) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [245] [311/312] eta: 0:00:00 lr: 0.000357 min_lr: 0.000357 loss: 3.7663 (3.6235) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [245] Total time: 0:02:50 (0.5478 s / it) Averaged stats: lr: 0.000357 min_lr: 0.000357 loss: 3.7663 (3.6531) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:14 loss: 1.1163 (1.1163) acc1: 79.1667 (79.1667) acc5: 94.6615 (94.6615) time: 8.2577 data: 8.1389 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4108 (1.3598) acc1: 70.7031 (71.6640) acc5: 91.1458 (90.8960) time: 1.0078 data: 0.9112 max mem: 21002 Test: Total time: 0:00:09 (1.0182 s / it) * Acc@1 71.648 Acc@5 90.912 loss 1.366 Accuracy of the model on the 50000 test images: 71.6% Max accuracy: 71.65% Epoch: [246] [ 0/312] eta: 0:57:45 lr: 0.000357 min_lr: 0.000357 loss: 3.9860 (3.9860) weight_decay: 0.0500 (0.0500) time: 11.1090 data: 9.9342 max mem: 21002 Epoch: [246] [ 10/312] eta: 0:07:31 lr: 0.000356 min_lr: 0.000356 loss: 3.2744 (3.3755) weight_decay: 0.0500 (0.0500) time: 1.4949 data: 1.0187 max mem: 21002 Epoch: [246] [ 20/312] eta: 0:04:56 lr: 0.000356 min_lr: 0.000356 loss: 3.3919 (3.4762) weight_decay: 0.0500 (0.0500) time: 0.5111 data: 0.1653 max mem: 21002 Epoch: [246] [ 30/312] eta: 0:03:40 lr: 0.000356 min_lr: 0.000356 loss: 3.7866 (3.5599) weight_decay: 0.0500 (0.0500) time: 0.3879 data: 0.1020 max mem: 21002 Epoch: [246] [ 40/312] eta: 0:03:19 lr: 0.000355 min_lr: 0.000355 loss: 3.8029 (3.6335) weight_decay: 0.0500 (0.0500) time: 0.4333 data: 0.1466 max mem: 21002 Epoch: [246] [ 50/312] eta: 0:03:06 lr: 0.000355 min_lr: 0.000355 loss: 3.9294 (3.6668) weight_decay: 0.0500 (0.0500) time: 0.6062 data: 0.3177 max mem: 21002 Epoch: [246] [ 60/312] eta: 0:02:43 lr: 0.000354 min_lr: 0.000354 loss: 3.7767 (3.6316) weight_decay: 0.0500 (0.0500) time: 0.4768 data: 0.1717 max mem: 21002 Epoch: [246] [ 70/312] eta: 0:02:37 lr: 0.000354 min_lr: 0.000354 loss: 3.6909 (3.6841) weight_decay: 0.0500 (0.0500) time: 0.4938 data: 0.1917 max mem: 21002 Epoch: [246] [ 80/312] eta: 0:02:28 lr: 0.000354 min_lr: 0.000354 loss: 3.9783 (3.7138) weight_decay: 0.0500 (0.0500) time: 0.6188 data: 0.3352 max mem: 21002 Epoch: [246] [ 90/312] eta: 0:02:15 lr: 0.000353 min_lr: 0.000353 loss: 3.8721 (3.6931) weight_decay: 0.0500 (0.0500) time: 0.4714 data: 0.1871 max mem: 21002 Epoch: [246] [100/312] eta: 0:02:13 lr: 0.000353 min_lr: 0.000353 loss: 3.4565 (3.6477) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.2416 max mem: 21002 Epoch: [246] [110/312] eta: 0:02:01 lr: 0.000352 min_lr: 0.000352 loss: 3.3311 (3.6338) weight_decay: 0.0500 (0.0500) time: 0.5466 data: 0.2024 max mem: 21002 Epoch: [246] [120/312] eta: 0:01:55 lr: 0.000352 min_lr: 0.000352 loss: 3.8221 (3.6386) weight_decay: 0.0500 (0.0500) time: 0.4583 data: 0.1303 max mem: 21002 Epoch: [246] [130/312] eta: 0:01:49 lr: 0.000352 min_lr: 0.000352 loss: 3.8546 (3.6305) weight_decay: 0.0500 (0.0500) time: 0.6154 data: 0.2718 max mem: 21002 Epoch: [246] [140/312] eta: 0:01:41 lr: 0.000351 min_lr: 0.000351 loss: 3.9405 (3.6469) weight_decay: 0.0500 (0.0500) time: 0.5114 data: 0.1461 max mem: 21002 Epoch: [246] [150/312] eta: 0:01:36 lr: 0.000351 min_lr: 0.000351 loss: 3.9405 (3.6551) weight_decay: 0.0500 (0.0500) time: 0.5677 data: 0.1300 max mem: 21002 Epoch: [246] [160/312] eta: 0:01:29 lr: 0.000350 min_lr: 0.000350 loss: 3.9137 (3.6596) weight_decay: 0.0500 (0.0500) time: 0.5962 data: 0.2219 max mem: 21002 Epoch: [246] [170/312] eta: 0:01:23 lr: 0.000350 min_lr: 0.000350 loss: 3.8436 (3.6638) weight_decay: 0.0500 (0.0500) time: 0.5089 data: 0.0976 max mem: 21002 Epoch: [246] [180/312] eta: 0:01:16 lr: 0.000350 min_lr: 0.000350 loss: 3.8574 (3.6667) weight_decay: 0.0500 (0.0500) time: 0.5076 data: 0.0657 max mem: 21002 Epoch: [246] [190/312] eta: 0:01:10 lr: 0.000349 min_lr: 0.000349 loss: 3.8285 (3.6792) weight_decay: 0.0500 (0.0500) time: 0.4866 data: 0.0608 max mem: 21002 Epoch: [246] [200/312] eta: 0:01:05 lr: 0.000349 min_lr: 0.000349 loss: 3.8285 (3.6911) weight_decay: 0.0500 (0.0500) time: 0.6003 data: 0.0902 max mem: 21002 Epoch: [246] [210/312] eta: 0:00:59 lr: 0.000348 min_lr: 0.000348 loss: 3.8342 (3.6885) weight_decay: 0.0500 (0.0500) time: 0.6039 data: 0.1417 max mem: 21002 Epoch: [246] [220/312] eta: 0:00:52 lr: 0.000348 min_lr: 0.000348 loss: 3.7236 (3.6764) weight_decay: 0.0500 (0.0500) time: 0.5041 data: 0.0524 max mem: 21002 Epoch: [246] [230/312] eta: 0:00:47 lr: 0.000348 min_lr: 0.000348 loss: 3.6543 (3.6712) weight_decay: 0.0500 (0.0500) time: 0.5849 data: 0.0286 max mem: 21002 Epoch: [246] [240/312] eta: 0:00:40 lr: 0.000347 min_lr: 0.000347 loss: 3.4834 (3.6629) weight_decay: 0.0500 (0.0500) time: 0.4841 data: 0.0285 max mem: 21002 Epoch: [246] [250/312] eta: 0:00:35 lr: 0.000347 min_lr: 0.000347 loss: 3.2644 (3.6510) weight_decay: 0.0500 (0.0500) time: 0.5198 data: 0.0184 max mem: 21002 Epoch: [246] [260/312] eta: 0:00:29 lr: 0.000346 min_lr: 0.000346 loss: 3.7561 (3.6549) weight_decay: 0.0500 (0.0500) time: 0.5556 data: 0.0351 max mem: 21002 Epoch: [246] [270/312] eta: 0:00:23 lr: 0.000346 min_lr: 0.000346 loss: 3.7855 (3.6563) weight_decay: 0.0500 (0.0500) time: 0.4785 data: 0.0173 max mem: 21002 Epoch: [246] [280/312] eta: 0:00:18 lr: 0.000346 min_lr: 0.000346 loss: 3.7857 (3.6582) weight_decay: 0.0500 (0.0500) time: 0.6758 data: 0.0037 max mem: 21002 Epoch: [246] [290/312] eta: 0:00:12 lr: 0.000345 min_lr: 0.000345 loss: 3.7161 (3.6541) weight_decay: 0.0500 (0.0500) time: 0.5212 data: 0.0035 max mem: 21002 Epoch: [246] [300/312] eta: 0:00:06 lr: 0.000345 min_lr: 0.000345 loss: 3.4456 (3.6460) weight_decay: 0.0500 (0.0500) time: 0.3187 data: 0.0002 max mem: 21002 Epoch: [246] [310/312] eta: 0:00:01 lr: 0.000344 min_lr: 0.000344 loss: 3.6478 (3.6499) weight_decay: 0.0500 (0.0500) time: 0.3169 data: 0.0001 max mem: 21002 Epoch: [246] [311/312] eta: 0:00:00 lr: 0.000344 min_lr: 0.000344 loss: 3.7191 (3.6504) weight_decay: 0.0500 (0.0500) time: 0.3167 data: 0.0001 max mem: 21002 Epoch: [246] Total time: 0:02:52 (0.5515 s / it) Averaged stats: lr: 0.000344 min_lr: 0.000344 loss: 3.7191 (3.6609) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:17 loss: 1.0835 (1.0835) acc1: 80.3385 (80.3385) acc5: 94.6615 (94.6615) time: 8.5946 data: 8.4757 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4416 (1.3713) acc1: 70.5729 (71.9840) acc5: 91.0156 (91.0880) time: 1.0753 data: 0.9768 max mem: 21002 Test: Total time: 0:00:09 (1.0847 s / it) * Acc@1 71.640 Acc@5 90.936 loss 1.381 Accuracy of the model on the 50000 test images: 71.6% Max accuracy: 71.65% Epoch: [247] [ 0/312] eta: 1:01:18 lr: 0.000344 min_lr: 0.000344 loss: 4.1294 (4.1294) weight_decay: 0.0500 (0.0500) time: 11.7890 data: 11.4943 max mem: 21002 Epoch: [247] [ 10/312] eta: 0:08:01 lr: 0.000344 min_lr: 0.000344 loss: 3.8434 (3.7379) weight_decay: 0.0500 (0.0500) time: 1.5930 data: 1.0455 max mem: 21002 Epoch: [247] [ 20/312] eta: 0:04:55 lr: 0.000343 min_lr: 0.000343 loss: 3.8086 (3.7234) weight_decay: 0.0500 (0.0500) time: 0.4739 data: 0.0406 max mem: 21002 Epoch: [247] [ 30/312] eta: 0:03:39 lr: 0.000343 min_lr: 0.000343 loss: 3.5349 (3.6062) weight_decay: 0.0500 (0.0500) time: 0.3318 data: 0.0405 max mem: 21002 Epoch: [247] [ 40/312] eta: 0:03:12 lr: 0.000343 min_lr: 0.000343 loss: 3.4700 (3.5797) weight_decay: 0.0500 (0.0500) time: 0.3892 data: 0.0867 max mem: 21002 Epoch: [247] [ 50/312] eta: 0:03:03 lr: 0.000342 min_lr: 0.000342 loss: 3.4664 (3.5129) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.2802 max mem: 21002 Epoch: [247] [ 60/312] eta: 0:02:39 lr: 0.000342 min_lr: 0.000342 loss: 3.4664 (3.5496) weight_decay: 0.0500 (0.0500) time: 0.4833 data: 0.1942 max mem: 21002 Epoch: [247] [ 70/312] eta: 0:02:34 lr: 0.000341 min_lr: 0.000341 loss: 3.7873 (3.5726) weight_decay: 0.0500 (0.0500) time: 0.4739 data: 0.1805 max mem: 21002 Epoch: [247] [ 80/312] eta: 0:02:26 lr: 0.000341 min_lr: 0.000341 loss: 3.9008 (3.6345) weight_decay: 0.0500 (0.0500) time: 0.6188 data: 0.3271 max mem: 21002 Epoch: [247] [ 90/312] eta: 0:02:14 lr: 0.000341 min_lr: 0.000341 loss: 3.8853 (3.6270) weight_decay: 0.0500 (0.0500) time: 0.4964 data: 0.1475 max mem: 21002 Epoch: [247] [100/312] eta: 0:02:08 lr: 0.000340 min_lr: 0.000340 loss: 3.7209 (3.6426) weight_decay: 0.0500 (0.0500) time: 0.5180 data: 0.1294 max mem: 21002 Epoch: [247] [110/312] eta: 0:01:57 lr: 0.000340 min_lr: 0.000340 loss: 3.7765 (3.6568) weight_decay: 0.0500 (0.0500) time: 0.4546 data: 0.1293 max mem: 21002 Epoch: [247] [120/312] eta: 0:01:52 lr: 0.000339 min_lr: 0.000339 loss: 3.8436 (3.6542) weight_decay: 0.0500 (0.0500) time: 0.4736 data: 0.1708 max mem: 21002 Epoch: [247] [130/312] eta: 0:01:48 lr: 0.000339 min_lr: 0.000339 loss: 3.8436 (3.6566) weight_decay: 0.0500 (0.0500) time: 0.6736 data: 0.3457 max mem: 21002 Epoch: [247] [140/312] eta: 0:01:38 lr: 0.000339 min_lr: 0.000339 loss: 3.9437 (3.6705) weight_decay: 0.0500 (0.0500) time: 0.4901 data: 0.1796 max mem: 21002 Epoch: [247] [150/312] eta: 0:01:33 lr: 0.000338 min_lr: 0.000338 loss: 3.9009 (3.6699) weight_decay: 0.0500 (0.0500) time: 0.4486 data: 0.1551 max mem: 21002 Epoch: [247] [160/312] eta: 0:01:28 lr: 0.000338 min_lr: 0.000338 loss: 3.7086 (3.6623) weight_decay: 0.0500 (0.0500) time: 0.6591 data: 0.3536 max mem: 21002 Epoch: [247] [170/312] eta: 0:01:20 lr: 0.000337 min_lr: 0.000337 loss: 3.7086 (3.6533) weight_decay: 0.0500 (0.0500) time: 0.5011 data: 0.2033 max mem: 21002 Epoch: [247] [180/312] eta: 0:01:15 lr: 0.000337 min_lr: 0.000337 loss: 3.7292 (3.6544) weight_decay: 0.0500 (0.0500) time: 0.4917 data: 0.2038 max mem: 21002 Epoch: [247] [190/312] eta: 0:01:08 lr: 0.000337 min_lr: 0.000337 loss: 3.7282 (3.6596) weight_decay: 0.0500 (0.0500) time: 0.4922 data: 0.2048 max mem: 21002 Epoch: [247] [200/312] eta: 0:01:03 lr: 0.000336 min_lr: 0.000336 loss: 3.9419 (3.6647) weight_decay: 0.0500 (0.0500) time: 0.4944 data: 0.1664 max mem: 21002 Epoch: [247] [210/312] eta: 0:00:58 lr: 0.000336 min_lr: 0.000336 loss: 3.9266 (3.6664) weight_decay: 0.0500 (0.0500) time: 0.7077 data: 0.3230 max mem: 21002 Epoch: [247] [220/312] eta: 0:00:51 lr: 0.000335 min_lr: 0.000335 loss: 3.8044 (3.6698) weight_decay: 0.0500 (0.0500) time: 0.5002 data: 0.1589 max mem: 21002 Epoch: [247] [230/312] eta: 0:00:46 lr: 0.000335 min_lr: 0.000335 loss: 3.8044 (3.6799) weight_decay: 0.0500 (0.0500) time: 0.4783 data: 0.1159 max mem: 21002 Epoch: [247] [240/312] eta: 0:00:40 lr: 0.000335 min_lr: 0.000335 loss: 3.7957 (3.6730) weight_decay: 0.0500 (0.0500) time: 0.5714 data: 0.1721 max mem: 21002 Epoch: [247] [250/312] eta: 0:00:34 lr: 0.000334 min_lr: 0.000334 loss: 3.6088 (3.6739) weight_decay: 0.0500 (0.0500) time: 0.4581 data: 0.1044 max mem: 21002 Epoch: [247] [260/312] eta: 0:00:29 lr: 0.000334 min_lr: 0.000334 loss: 3.6674 (3.6753) weight_decay: 0.0500 (0.0500) time: 0.5273 data: 0.1520 max mem: 21002 Epoch: [247] [270/312] eta: 0:00:23 lr: 0.000333 min_lr: 0.000333 loss: 3.7244 (3.6769) weight_decay: 0.0500 (0.0500) time: 0.5128 data: 0.1052 max mem: 21002 Epoch: [247] [280/312] eta: 0:00:17 lr: 0.000333 min_lr: 0.000333 loss: 3.6677 (3.6780) weight_decay: 0.0500 (0.0500) time: 0.5012 data: 0.1116 max mem: 21002 Epoch: [247] [290/312] eta: 0:00:12 lr: 0.000333 min_lr: 0.000333 loss: 3.8285 (3.6780) weight_decay: 0.0500 (0.0500) time: 0.6066 data: 0.1708 max mem: 21002 Epoch: [247] [300/312] eta: 0:00:06 lr: 0.000332 min_lr: 0.000332 loss: 3.7949 (3.6728) weight_decay: 0.0500 (0.0500) time: 0.4602 data: 0.0687 max mem: 21002 Epoch: [247] [310/312] eta: 0:00:01 lr: 0.000332 min_lr: 0.000332 loss: 3.4360 (3.6656) weight_decay: 0.0500 (0.0500) time: 0.2964 data: 0.0131 max mem: 21002 Epoch: [247] [311/312] eta: 0:00:00 lr: 0.000332 min_lr: 0.000332 loss: 3.4360 (3.6631) weight_decay: 0.0500 (0.0500) time: 0.2880 data: 0.0041 max mem: 21002 Epoch: [247] Total time: 0:02:49 (0.5424 s / it) Averaged stats: lr: 0.000332 min_lr: 0.000332 loss: 3.4360 (3.6291) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.2156 (1.2156) acc1: 78.6458 (78.6458) acc5: 94.0104 (94.0104) time: 8.8155 data: 8.6966 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4310 (1.3571) acc1: 71.6146 (71.9680) acc5: 91.0156 (90.9120) time: 1.0639 data: 0.9664 max mem: 21002 Test: Total time: 0:00:09 (1.0779 s / it) * Acc@1 71.536 Acc@5 90.830 loss 1.366 Accuracy of the model on the 50000 test images: 71.5% Max accuracy: 71.65% Epoch: [248] [ 0/312] eta: 1:02:14 lr: 0.000332 min_lr: 0.000332 loss: 3.0823 (3.0823) weight_decay: 0.0500 (0.0500) time: 11.9704 data: 8.5388 max mem: 21002 Epoch: [248] [ 10/312] eta: 0:07:57 lr: 0.000331 min_lr: 0.000331 loss: 3.7239 (3.5559) weight_decay: 0.0500 (0.0500) time: 1.5815 data: 1.0134 max mem: 21002 Epoch: [248] [ 20/312] eta: 0:05:16 lr: 0.000331 min_lr: 0.000331 loss: 3.7239 (3.5644) weight_decay: 0.0500 (0.0500) time: 0.5394 data: 0.1567 max mem: 21002 Epoch: [248] [ 30/312] eta: 0:03:53 lr: 0.000331 min_lr: 0.000331 loss: 3.6366 (3.5900) weight_decay: 0.0500 (0.0500) time: 0.4137 data: 0.0266 max mem: 21002 Epoch: [248] [ 40/312] eta: 0:03:19 lr: 0.000330 min_lr: 0.000330 loss: 3.8773 (3.6103) weight_decay: 0.0500 (0.0500) time: 0.3655 data: 0.0322 max mem: 21002 Epoch: [248] [ 50/312] eta: 0:03:14 lr: 0.000330 min_lr: 0.000330 loss: 3.8773 (3.6557) weight_decay: 0.0500 (0.0500) time: 0.6122 data: 0.1746 max mem: 21002 Epoch: [248] [ 60/312] eta: 0:02:48 lr: 0.000329 min_lr: 0.000329 loss: 3.5915 (3.5980) weight_decay: 0.0500 (0.0500) time: 0.5348 data: 0.1432 max mem: 21002 Epoch: [248] [ 70/312] eta: 0:02:40 lr: 0.000329 min_lr: 0.000329 loss: 3.5329 (3.6424) weight_decay: 0.0500 (0.0500) time: 0.4599 data: 0.1288 max mem: 21002 Epoch: [248] [ 80/312] eta: 0:02:29 lr: 0.000329 min_lr: 0.000329 loss: 3.8326 (3.6461) weight_decay: 0.0500 (0.0500) time: 0.5779 data: 0.1771 max mem: 21002 Epoch: [248] [ 90/312] eta: 0:02:18 lr: 0.000328 min_lr: 0.000328 loss: 3.6951 (3.6431) weight_decay: 0.0500 (0.0500) time: 0.4813 data: 0.1289 max mem: 21002 Epoch: [248] [100/312] eta: 0:02:13 lr: 0.000328 min_lr: 0.000328 loss: 3.6788 (3.6321) weight_decay: 0.0500 (0.0500) time: 0.5560 data: 0.1987 max mem: 21002 Epoch: [248] [110/312] eta: 0:02:00 lr: 0.000327 min_lr: 0.000327 loss: 3.8739 (3.6574) weight_decay: 0.0500 (0.0500) time: 0.4795 data: 0.1188 max mem: 21002 Epoch: [248] [120/312] eta: 0:01:55 lr: 0.000327 min_lr: 0.000327 loss: 3.8160 (3.6387) weight_decay: 0.0500 (0.0500) time: 0.4749 data: 0.1293 max mem: 21002 Epoch: [248] [130/312] eta: 0:01:50 lr: 0.000327 min_lr: 0.000327 loss: 3.4364 (3.6342) weight_decay: 0.0500 (0.0500) time: 0.6545 data: 0.2660 max mem: 21002 Epoch: [248] [140/312] eta: 0:01:40 lr: 0.000326 min_lr: 0.000326 loss: 3.3945 (3.6163) weight_decay: 0.0500 (0.0500) time: 0.4683 data: 0.1374 max mem: 21002 Epoch: [248] [150/312] eta: 0:01:35 lr: 0.000326 min_lr: 0.000326 loss: 3.6748 (3.6163) weight_decay: 0.0500 (0.0500) time: 0.4638 data: 0.1482 max mem: 21002 Epoch: [248] [160/312] eta: 0:01:29 lr: 0.000325 min_lr: 0.000325 loss: 3.5871 (3.6046) weight_decay: 0.0500 (0.0500) time: 0.6163 data: 0.2408 max mem: 21002 Epoch: [248] [170/312] eta: 0:01:21 lr: 0.000325 min_lr: 0.000325 loss: 3.5596 (3.5973) weight_decay: 0.0500 (0.0500) time: 0.4918 data: 0.1442 max mem: 21002 Epoch: [248] [180/312] eta: 0:01:16 lr: 0.000325 min_lr: 0.000325 loss: 3.8662 (3.6043) weight_decay: 0.0500 (0.0500) time: 0.4938 data: 0.2029 max mem: 21002 Epoch: [248] [190/312] eta: 0:01:08 lr: 0.000324 min_lr: 0.000324 loss: 3.7909 (3.6035) weight_decay: 0.0500 (0.0500) time: 0.4469 data: 0.1520 max mem: 21002 Epoch: [248] [200/312] eta: 0:01:03 lr: 0.000324 min_lr: 0.000324 loss: 3.5846 (3.5979) weight_decay: 0.0500 (0.0500) time: 0.4879 data: 0.1958 max mem: 21002 Epoch: [248] [210/312] eta: 0:00:58 lr: 0.000323 min_lr: 0.000323 loss: 3.6686 (3.6087) weight_decay: 0.0500 (0.0500) time: 0.6815 data: 0.3934 max mem: 21002 Epoch: [248] [220/312] eta: 0:00:51 lr: 0.000323 min_lr: 0.000323 loss: 3.6686 (3.6070) weight_decay: 0.0500 (0.0500) time: 0.4878 data: 0.1981 max mem: 21002 Epoch: [248] [230/312] eta: 0:00:46 lr: 0.000323 min_lr: 0.000323 loss: 3.8928 (3.6202) weight_decay: 0.0500 (0.0500) time: 0.5051 data: 0.1788 max mem: 21002 Epoch: [248] [240/312] eta: 0:00:40 lr: 0.000322 min_lr: 0.000322 loss: 3.9625 (3.6268) weight_decay: 0.0500 (0.0500) time: 0.5951 data: 0.2454 max mem: 21002 Epoch: [248] [250/312] eta: 0:00:34 lr: 0.000322 min_lr: 0.000322 loss: 3.6850 (3.6263) weight_decay: 0.0500 (0.0500) time: 0.5186 data: 0.1411 max mem: 21002 Epoch: [248] [260/312] eta: 0:00:29 lr: 0.000322 min_lr: 0.000322 loss: 3.6850 (3.6308) weight_decay: 0.0500 (0.0500) time: 0.5882 data: 0.1836 max mem: 21002 Epoch: [248] [270/312] eta: 0:00:23 lr: 0.000321 min_lr: 0.000321 loss: 3.6827 (3.6337) weight_decay: 0.0500 (0.0500) time: 0.4474 data: 0.1101 max mem: 21002 Epoch: [248] [280/312] eta: 0:00:17 lr: 0.000321 min_lr: 0.000321 loss: 3.6938 (3.6396) weight_decay: 0.0500 (0.0500) time: 0.4534 data: 0.1524 max mem: 21002 Epoch: [248] [290/312] eta: 0:00:12 lr: 0.000320 min_lr: 0.000320 loss: 3.6938 (3.6323) weight_decay: 0.0500 (0.0500) time: 0.6234 data: 0.2913 max mem: 21002 Epoch: [248] [300/312] eta: 0:00:06 lr: 0.000320 min_lr: 0.000320 loss: 3.7362 (3.6372) weight_decay: 0.0500 (0.0500) time: 0.4508 data: 0.1396 max mem: 21002 Epoch: [248] [310/312] eta: 0:00:01 lr: 0.000320 min_lr: 0.000320 loss: 3.7968 (3.6363) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [248] [311/312] eta: 0:00:00 lr: 0.000320 min_lr: 0.000320 loss: 3.7771 (3.6352) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [248] Total time: 0:02:49 (0.5448 s / it) Averaged stats: lr: 0.000320 min_lr: 0.000320 loss: 3.7771 (3.6417) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:15 loss: 1.1173 (1.1173) acc1: 79.2969 (79.2969) acc5: 94.4010 (94.4010) time: 8.3511 data: 8.2328 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4428 (1.3355) acc1: 70.5729 (71.8880) acc5: 90.3646 (90.9280) time: 1.0215 data: 0.9239 max mem: 21002 Test: Total time: 0:00:09 (1.0314 s / it) * Acc@1 71.804 Acc@5 90.836 loss 1.349 Accuracy of the model on the 50000 test images: 71.8% Max accuracy: 71.80% Epoch: [249] [ 0/312] eta: 1:00:13 lr: 0.000320 min_lr: 0.000320 loss: 3.5886 (3.5886) weight_decay: 0.0500 (0.0500) time: 11.5820 data: 9.0084 max mem: 21002 Epoch: [249] [ 10/312] eta: 0:07:54 lr: 0.000319 min_lr: 0.000319 loss: 3.5886 (3.5954) weight_decay: 0.0500 (0.0500) time: 1.5714 data: 1.0167 max mem: 21002 Epoch: [249] [ 20/312] eta: 0:05:15 lr: 0.000319 min_lr: 0.000319 loss: 3.5422 (3.5266) weight_decay: 0.0500 (0.0500) time: 0.5537 data: 0.1464 max mem: 21002 Epoch: [249] [ 30/312] eta: 0:03:52 lr: 0.000318 min_lr: 0.000318 loss: 3.5365 (3.5457) weight_decay: 0.0500 (0.0500) time: 0.4135 data: 0.0380 max mem: 21002 Epoch: [249] [ 40/312] eta: 0:03:10 lr: 0.000318 min_lr: 0.000318 loss: 3.5400 (3.5551) weight_decay: 0.0500 (0.0500) time: 0.3047 data: 0.0007 max mem: 21002 Epoch: [249] [ 50/312] eta: 0:03:02 lr: 0.000318 min_lr: 0.000318 loss: 3.8488 (3.6038) weight_decay: 0.0500 (0.0500) time: 0.4970 data: 0.1763 max mem: 21002 Epoch: [249] [ 60/312] eta: 0:02:38 lr: 0.000317 min_lr: 0.000317 loss: 3.8568 (3.5991) weight_decay: 0.0500 (0.0500) time: 0.4799 data: 0.1764 max mem: 21002 Epoch: [249] [ 70/312] eta: 0:02:34 lr: 0.000317 min_lr: 0.000317 loss: 3.8568 (3.6509) weight_decay: 0.0500 (0.0500) time: 0.4877 data: 0.1739 max mem: 21002 Epoch: [249] [ 80/312] eta: 0:02:25 lr: 0.000316 min_lr: 0.000316 loss: 3.8754 (3.6697) weight_decay: 0.0500 (0.0500) time: 0.6245 data: 0.2534 max mem: 21002 Epoch: [249] [ 90/312] eta: 0:02:16 lr: 0.000316 min_lr: 0.000316 loss: 3.8551 (3.6589) weight_decay: 0.0500 (0.0500) time: 0.5265 data: 0.1433 max mem: 21002 Epoch: [249] [100/312] eta: 0:02:09 lr: 0.000316 min_lr: 0.000316 loss: 3.8379 (3.6676) weight_decay: 0.0500 (0.0500) time: 0.5516 data: 0.1660 max mem: 21002 Epoch: [249] [110/312] eta: 0:01:57 lr: 0.000315 min_lr: 0.000315 loss: 3.8379 (3.6902) weight_decay: 0.0500 (0.0500) time: 0.4476 data: 0.1029 max mem: 21002 Epoch: [249] [120/312] eta: 0:01:52 lr: 0.000315 min_lr: 0.000315 loss: 3.6917 (3.6781) weight_decay: 0.0500 (0.0500) time: 0.4545 data: 0.0937 max mem: 21002 Epoch: [249] [130/312] eta: 0:01:48 lr: 0.000314 min_lr: 0.000314 loss: 3.8176 (3.7038) weight_decay: 0.0500 (0.0500) time: 0.6550 data: 0.1742 max mem: 21002 Epoch: [249] [140/312] eta: 0:01:39 lr: 0.000314 min_lr: 0.000314 loss: 3.9176 (3.6973) weight_decay: 0.0500 (0.0500) time: 0.5188 data: 0.1152 max mem: 21002 Epoch: [249] [150/312] eta: 0:01:34 lr: 0.000314 min_lr: 0.000314 loss: 3.4525 (3.6734) weight_decay: 0.0500 (0.0500) time: 0.5213 data: 0.0951 max mem: 21002 Epoch: [249] [160/312] eta: 0:01:27 lr: 0.000313 min_lr: 0.000313 loss: 3.4452 (3.6655) weight_decay: 0.0500 (0.0500) time: 0.5662 data: 0.0680 max mem: 21002 Epoch: [249] [170/312] eta: 0:01:21 lr: 0.000313 min_lr: 0.000313 loss: 3.5527 (3.6603) weight_decay: 0.0500 (0.0500) time: 0.4774 data: 0.0648 max mem: 21002 Epoch: [249] [180/312] eta: 0:01:16 lr: 0.000313 min_lr: 0.000313 loss: 3.5527 (3.6563) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.1094 max mem: 21002 Epoch: [249] [190/312] eta: 0:01:08 lr: 0.000312 min_lr: 0.000312 loss: 3.6545 (3.6606) weight_decay: 0.0500 (0.0500) time: 0.4806 data: 0.0565 max mem: 21002 Epoch: [249] [200/312] eta: 0:01:03 lr: 0.000312 min_lr: 0.000312 loss: 3.6545 (3.6508) weight_decay: 0.0500 (0.0500) time: 0.4863 data: 0.0630 max mem: 21002 Epoch: [249] [210/312] eta: 0:00:58 lr: 0.000311 min_lr: 0.000311 loss: 3.5850 (3.6453) weight_decay: 0.0500 (0.0500) time: 0.6512 data: 0.1003 max mem: 21002 Epoch: [249] [220/312] eta: 0:00:51 lr: 0.000311 min_lr: 0.000311 loss: 3.4050 (3.6368) weight_decay: 0.0500 (0.0500) time: 0.4854 data: 0.0718 max mem: 21002 Epoch: [249] [230/312] eta: 0:00:46 lr: 0.000311 min_lr: 0.000311 loss: 3.4264 (3.6373) weight_decay: 0.0500 (0.0500) time: 0.5113 data: 0.0836 max mem: 21002 Epoch: [249] [240/312] eta: 0:00:40 lr: 0.000310 min_lr: 0.000310 loss: 3.7432 (3.6324) weight_decay: 0.0500 (0.0500) time: 0.6301 data: 0.0542 max mem: 21002 Epoch: [249] [250/312] eta: 0:00:34 lr: 0.000310 min_lr: 0.000310 loss: 3.7432 (3.6322) weight_decay: 0.0500 (0.0500) time: 0.4754 data: 0.0346 max mem: 21002 Epoch: [249] [260/312] eta: 0:00:29 lr: 0.000309 min_lr: 0.000309 loss: 3.7240 (3.6321) weight_decay: 0.0500 (0.0500) time: 0.5223 data: 0.0504 max mem: 21002 Epoch: [249] [270/312] eta: 0:00:23 lr: 0.000309 min_lr: 0.000309 loss: 3.6738 (3.6301) weight_decay: 0.0500 (0.0500) time: 0.4964 data: 0.0314 max mem: 21002 Epoch: [249] [280/312] eta: 0:00:17 lr: 0.000309 min_lr: 0.000309 loss: 3.8097 (3.6352) weight_decay: 0.0500 (0.0500) time: 0.5108 data: 0.0550 max mem: 21002 Epoch: [249] [290/312] eta: 0:00:12 lr: 0.000308 min_lr: 0.000308 loss: 3.7033 (3.6278) weight_decay: 0.0500 (0.0500) time: 0.6335 data: 0.0505 max mem: 21002 Epoch: [249] [300/312] eta: 0:00:06 lr: 0.000308 min_lr: 0.000308 loss: 3.3602 (3.6261) weight_decay: 0.0500 (0.0500) time: 0.4431 data: 0.0351 max mem: 21002 Epoch: [249] [310/312] eta: 0:00:01 lr: 0.000308 min_lr: 0.000308 loss: 3.7005 (3.6255) weight_decay: 0.0500 (0.0500) time: 0.3004 data: 0.0244 max mem: 21002 Epoch: [249] [311/312] eta: 0:00:00 lr: 0.000308 min_lr: 0.000308 loss: 3.7381 (3.6271) weight_decay: 0.0500 (0.0500) time: 0.2926 data: 0.0160 max mem: 21002 Epoch: [249] Total time: 0:02:50 (0.5454 s / it) Averaged stats: lr: 0.000308 min_lr: 0.000308 loss: 3.7381 (3.6271) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.1653 (1.1653) acc1: 78.2552 (78.2552) acc5: 94.6615 (94.6615) time: 8.7596 data: 8.6409 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4441 (1.4043) acc1: 71.8750 (71.9200) acc5: 91.2760 (91.3440) time: 1.0589 data: 0.9602 max mem: 21002 Test: Total time: 0:00:09 (1.0858 s / it) * Acc@1 71.632 Acc@5 90.964 loss 1.414 Accuracy of the model on the 50000 test images: 71.6% Max accuracy: 71.80% Epoch: [250] [ 0/312] eta: 1:03:37 lr: 0.000307 min_lr: 0.000307 loss: 4.1180 (4.1180) weight_decay: 0.0500 (0.0500) time: 12.2350 data: 8.5905 max mem: 21002 Epoch: [250] [ 10/312] eta: 0:08:13 lr: 0.000307 min_lr: 0.000307 loss: 4.0028 (3.7599) weight_decay: 0.0500 (0.0500) time: 1.6337 data: 0.9893 max mem: 21002 Epoch: [250] [ 20/312] eta: 0:05:23 lr: 0.000307 min_lr: 0.000307 loss: 3.8929 (3.7260) weight_decay: 0.0500 (0.0500) time: 0.5500 data: 0.1660 max mem: 21002 Epoch: [250] [ 30/312] eta: 0:03:57 lr: 0.000306 min_lr: 0.000306 loss: 3.7333 (3.6636) weight_decay: 0.0500 (0.0500) time: 0.4095 data: 0.0533 max mem: 21002 Epoch: [250] [ 40/312] eta: 0:03:20 lr: 0.000306 min_lr: 0.000306 loss: 3.6906 (3.6522) weight_decay: 0.0500 (0.0500) time: 0.3507 data: 0.0024 max mem: 21002 Epoch: [250] [ 50/312] eta: 0:03:16 lr: 0.000306 min_lr: 0.000306 loss: 3.6906 (3.6426) weight_decay: 0.0500 (0.0500) time: 0.6000 data: 0.0139 max mem: 21002 Epoch: [250] [ 60/312] eta: 0:02:54 lr: 0.000305 min_lr: 0.000305 loss: 3.7956 (3.6882) weight_decay: 0.0500 (0.0500) time: 0.6034 data: 0.0748 max mem: 21002 Epoch: [250] [ 70/312] eta: 0:02:43 lr: 0.000305 min_lr: 0.000305 loss: 3.4195 (3.6219) weight_decay: 0.0500 (0.0500) time: 0.4846 data: 0.0618 max mem: 21002 Epoch: [250] [ 80/312] eta: 0:02:32 lr: 0.000304 min_lr: 0.000304 loss: 3.1487 (3.5860) weight_decay: 0.0500 (0.0500) time: 0.5478 data: 0.0017 max mem: 21002 Epoch: [250] [ 90/312] eta: 0:02:20 lr: 0.000304 min_lr: 0.000304 loss: 3.4762 (3.6166) weight_decay: 0.0500 (0.0500) time: 0.4844 data: 0.0159 max mem: 21002 Epoch: [250] [100/312] eta: 0:02:15 lr: 0.000304 min_lr: 0.000304 loss: 3.8466 (3.6215) weight_decay: 0.0500 (0.0500) time: 0.5669 data: 0.0548 max mem: 21002 Epoch: [250] [110/312] eta: 0:02:02 lr: 0.000303 min_lr: 0.000303 loss: 3.8466 (3.6335) weight_decay: 0.0500 (0.0500) time: 0.4989 data: 0.0405 max mem: 21002 Epoch: [250] [120/312] eta: 0:01:57 lr: 0.000303 min_lr: 0.000303 loss: 3.7318 (3.6323) weight_decay: 0.0500 (0.0500) time: 0.4843 data: 0.0501 max mem: 21002 Epoch: [250] [130/312] eta: 0:01:53 lr: 0.000303 min_lr: 0.000303 loss: 3.4965 (3.6148) weight_decay: 0.0500 (0.0500) time: 0.6970 data: 0.0811 max mem: 21002 Epoch: [250] [140/312] eta: 0:01:42 lr: 0.000302 min_lr: 0.000302 loss: 3.7549 (3.6222) weight_decay: 0.0500 (0.0500) time: 0.5023 data: 0.0326 max mem: 21002 Epoch: [250] [150/312] eta: 0:01:38 lr: 0.000302 min_lr: 0.000302 loss: 3.8219 (3.6377) weight_decay: 0.0500 (0.0500) time: 0.5027 data: 0.0335 max mem: 21002 Epoch: [250] [160/312] eta: 0:01:31 lr: 0.000301 min_lr: 0.000301 loss: 3.7931 (3.6266) weight_decay: 0.0500 (0.0500) time: 0.6317 data: 0.0566 max mem: 21002 Epoch: [250] [170/312] eta: 0:01:24 lr: 0.000301 min_lr: 0.000301 loss: 3.5204 (3.6204) weight_decay: 0.0500 (0.0500) time: 0.4964 data: 0.0413 max mem: 21002 Epoch: [250] [180/312] eta: 0:01:19 lr: 0.000301 min_lr: 0.000301 loss: 3.7237 (3.6293) weight_decay: 0.0500 (0.0500) time: 0.5796 data: 0.0241 max mem: 21002 Epoch: [250] [190/312] eta: 0:01:11 lr: 0.000300 min_lr: 0.000300 loss: 3.7431 (3.6330) weight_decay: 0.0500 (0.0500) time: 0.5093 data: 0.0135 max mem: 21002 Epoch: [250] [200/312] eta: 0:01:05 lr: 0.000300 min_lr: 0.000300 loss: 3.7869 (3.6407) weight_decay: 0.0500 (0.0500) time: 0.4603 data: 0.0193 max mem: 21002 Epoch: [250] [210/312] eta: 0:01:00 lr: 0.000299 min_lr: 0.000299 loss: 3.7841 (3.6427) weight_decay: 0.0500 (0.0500) time: 0.6388 data: 0.0314 max mem: 21002 Epoch: [250] [220/312] eta: 0:00:53 lr: 0.000299 min_lr: 0.000299 loss: 3.5892 (3.6332) weight_decay: 0.0500 (0.0500) time: 0.5079 data: 0.0505 max mem: 21002 Epoch: [250] [230/312] eta: 0:00:47 lr: 0.000299 min_lr: 0.000299 loss: 3.6645 (3.6366) weight_decay: 0.0500 (0.0500) time: 0.4629 data: 0.0768 max mem: 21002 Epoch: [250] [240/312] eta: 0:00:41 lr: 0.000298 min_lr: 0.000298 loss: 3.8002 (3.6401) weight_decay: 0.0500 (0.0500) time: 0.6061 data: 0.0452 max mem: 21002 Epoch: [250] [250/312] eta: 0:00:35 lr: 0.000298 min_lr: 0.000298 loss: 3.8337 (3.6383) weight_decay: 0.0500 (0.0500) time: 0.5564 data: 0.0008 max mem: 21002 Epoch: [250] [260/312] eta: 0:00:29 lr: 0.000298 min_lr: 0.000298 loss: 3.9192 (3.6508) weight_decay: 0.0500 (0.0500) time: 0.4680 data: 0.0571 max mem: 21002 Epoch: [250] [270/312] eta: 0:00:23 lr: 0.000297 min_lr: 0.000297 loss: 3.7754 (3.6504) weight_decay: 0.0500 (0.0500) time: 0.3769 data: 0.0571 max mem: 21002 Epoch: [250] [280/312] eta: 0:00:18 lr: 0.000297 min_lr: 0.000297 loss: 3.4518 (3.6425) weight_decay: 0.0500 (0.0500) time: 0.5783 data: 0.0218 max mem: 21002 Epoch: [250] [290/312] eta: 0:00:12 lr: 0.000296 min_lr: 0.000296 loss: 3.8858 (3.6528) weight_decay: 0.0500 (0.0500) time: 0.6494 data: 0.0325 max mem: 21002 Epoch: [250] [300/312] eta: 0:00:06 lr: 0.000296 min_lr: 0.000296 loss: 3.7939 (3.6524) weight_decay: 0.0500 (0.0500) time: 0.3554 data: 0.0111 max mem: 21002 Epoch: [250] [310/312] eta: 0:00:01 lr: 0.000296 min_lr: 0.000296 loss: 3.7349 (3.6602) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [250] [311/312] eta: 0:00:00 lr: 0.000296 min_lr: 0.000296 loss: 3.7349 (3.6576) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [250] Total time: 0:02:52 (0.5524 s / it) Averaged stats: lr: 0.000296 min_lr: 0.000296 loss: 3.7349 (3.6425) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:20 loss: 1.1918 (1.1918) acc1: 78.5156 (78.5156) acc5: 94.6615 (94.6615) time: 8.9842 data: 8.8655 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4628 (1.3902) acc1: 70.9635 (72.3520) acc5: 90.6250 (90.7200) time: 1.0839 data: 0.9851 max mem: 21002 Test: Total time: 0:00:09 (1.0944 s / it) * Acc@1 72.080 Acc@5 90.992 loss 1.394 Accuracy of the model on the 50000 test images: 72.1% Max accuracy: 72.08% Epoch: [251] [ 0/312] eta: 1:00:07 lr: 0.000296 min_lr: 0.000296 loss: 3.9908 (3.9908) weight_decay: 0.0500 (0.0500) time: 11.5623 data: 10.9183 max mem: 21002 Epoch: [251] [ 10/312] eta: 0:08:04 lr: 0.000295 min_lr: 0.000295 loss: 3.9908 (3.8718) weight_decay: 0.0500 (0.0500) time: 1.6054 data: 0.9932 max mem: 21002 Epoch: [251] [ 20/312] eta: 0:05:01 lr: 0.000295 min_lr: 0.000295 loss: 3.8876 (3.8268) weight_decay: 0.0500 (0.0500) time: 0.5060 data: 0.0281 max mem: 21002 Epoch: [251] [ 30/312] eta: 0:03:43 lr: 0.000295 min_lr: 0.000295 loss: 3.8249 (3.7891) weight_decay: 0.0500 (0.0500) time: 0.3450 data: 0.0281 max mem: 21002 Epoch: [251] [ 40/312] eta: 0:03:09 lr: 0.000294 min_lr: 0.000294 loss: 3.7646 (3.7279) weight_decay: 0.0500 (0.0500) time: 0.3447 data: 0.0580 max mem: 21002 Epoch: [251] [ 50/312] eta: 0:02:59 lr: 0.000294 min_lr: 0.000294 loss: 3.6712 (3.6800) weight_decay: 0.0500 (0.0500) time: 0.5202 data: 0.2347 max mem: 21002 Epoch: [251] [ 60/312] eta: 0:02:36 lr: 0.000293 min_lr: 0.000293 loss: 3.7713 (3.7088) weight_decay: 0.0500 (0.0500) time: 0.4620 data: 0.1774 max mem: 21002 Epoch: [251] [ 70/312] eta: 0:02:31 lr: 0.000293 min_lr: 0.000293 loss: 3.8724 (3.7108) weight_decay: 0.0500 (0.0500) time: 0.4670 data: 0.1819 max mem: 21002 Epoch: [251] [ 80/312] eta: 0:02:24 lr: 0.000293 min_lr: 0.000293 loss: 3.6738 (3.6874) weight_decay: 0.0500 (0.0500) time: 0.6398 data: 0.3537 max mem: 21002 Epoch: [251] [ 90/312] eta: 0:02:12 lr: 0.000292 min_lr: 0.000292 loss: 3.7612 (3.7003) weight_decay: 0.0500 (0.0500) time: 0.4928 data: 0.2058 max mem: 21002 Epoch: [251] [100/312] eta: 0:02:08 lr: 0.000292 min_lr: 0.000292 loss: 3.6682 (3.6930) weight_decay: 0.0500 (0.0500) time: 0.5290 data: 0.2269 max mem: 21002 Epoch: [251] [110/312] eta: 0:01:56 lr: 0.000292 min_lr: 0.000292 loss: 3.6682 (3.6996) weight_decay: 0.0500 (0.0500) time: 0.4969 data: 0.1936 max mem: 21002 Epoch: [251] [120/312] eta: 0:01:51 lr: 0.000291 min_lr: 0.000291 loss: 3.7862 (3.6920) weight_decay: 0.0500 (0.0500) time: 0.4551 data: 0.1648 max mem: 21002 Epoch: [251] [130/312] eta: 0:01:46 lr: 0.000291 min_lr: 0.000291 loss: 3.6517 (3.6821) weight_decay: 0.0500 (0.0500) time: 0.6361 data: 0.3324 max mem: 21002 Epoch: [251] [140/312] eta: 0:01:37 lr: 0.000290 min_lr: 0.000290 loss: 3.6629 (3.6813) weight_decay: 0.0500 (0.0500) time: 0.4824 data: 0.1684 max mem: 21002 Epoch: [251] [150/312] eta: 0:01:32 lr: 0.000290 min_lr: 0.000290 loss: 3.6228 (3.6692) weight_decay: 0.0500 (0.0500) time: 0.4840 data: 0.1861 max mem: 21002 Epoch: [251] [160/312] eta: 0:01:27 lr: 0.000290 min_lr: 0.000290 loss: 3.6053 (3.6742) weight_decay: 0.0500 (0.0500) time: 0.6431 data: 0.3413 max mem: 21002 Epoch: [251] [170/312] eta: 0:01:20 lr: 0.000289 min_lr: 0.000289 loss: 3.8912 (3.6770) weight_decay: 0.0500 (0.0500) time: 0.4950 data: 0.1937 max mem: 21002 Epoch: [251] [180/312] eta: 0:01:15 lr: 0.000289 min_lr: 0.000289 loss: 3.8558 (3.6779) weight_decay: 0.0500 (0.0500) time: 0.5099 data: 0.2001 max mem: 21002 Epoch: [251] [190/312] eta: 0:01:07 lr: 0.000289 min_lr: 0.000289 loss: 3.8361 (3.6796) weight_decay: 0.0500 (0.0500) time: 0.4733 data: 0.1623 max mem: 21002 Epoch: [251] [200/312] eta: 0:01:02 lr: 0.000288 min_lr: 0.000288 loss: 3.9802 (3.6805) weight_decay: 0.0500 (0.0500) time: 0.4746 data: 0.1860 max mem: 21002 Epoch: [251] [210/312] eta: 0:00:58 lr: 0.000288 min_lr: 0.000288 loss: 3.3604 (3.6580) weight_decay: 0.0500 (0.0500) time: 0.7163 data: 0.3786 max mem: 21002 Epoch: [251] [220/312] eta: 0:00:51 lr: 0.000287 min_lr: 0.000287 loss: 3.8866 (3.6670) weight_decay: 0.0500 (0.0500) time: 0.5284 data: 0.1932 max mem: 21002 Epoch: [251] [230/312] eta: 0:00:46 lr: 0.000287 min_lr: 0.000287 loss: 3.9003 (3.6726) weight_decay: 0.0500 (0.0500) time: 0.4731 data: 0.1428 max mem: 21002 Epoch: [251] [240/312] eta: 0:00:40 lr: 0.000287 min_lr: 0.000287 loss: 3.8252 (3.6730) weight_decay: 0.0500 (0.0500) time: 0.6219 data: 0.2262 max mem: 21002 Epoch: [251] [250/312] eta: 0:00:34 lr: 0.000286 min_lr: 0.000286 loss: 3.9237 (3.6809) weight_decay: 0.0500 (0.0500) time: 0.5681 data: 0.1723 max mem: 21002 Epoch: [251] [260/312] eta: 0:00:29 lr: 0.000286 min_lr: 0.000286 loss: 3.6617 (3.6719) weight_decay: 0.0500 (0.0500) time: 0.5797 data: 0.1509 max mem: 21002 Epoch: [251] [270/312] eta: 0:00:23 lr: 0.000286 min_lr: 0.000286 loss: 3.6617 (3.6662) weight_decay: 0.0500 (0.0500) time: 0.4483 data: 0.0626 max mem: 21002 Epoch: [251] [280/312] eta: 0:00:17 lr: 0.000285 min_lr: 0.000285 loss: 3.6846 (3.6593) weight_decay: 0.0500 (0.0500) time: 0.5146 data: 0.1127 max mem: 21002 Epoch: [251] [290/312] eta: 0:00:12 lr: 0.000285 min_lr: 0.000285 loss: 3.6921 (3.6584) weight_decay: 0.0500 (0.0500) time: 0.6344 data: 0.1799 max mem: 21002 Epoch: [251] [300/312] eta: 0:00:06 lr: 0.000284 min_lr: 0.000284 loss: 3.7091 (3.6638) weight_decay: 0.0500 (0.0500) time: 0.4031 data: 0.0676 max mem: 21002 Epoch: [251] [310/312] eta: 0:00:01 lr: 0.000284 min_lr: 0.000284 loss: 3.6435 (3.6618) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [251] [311/312] eta: 0:00:00 lr: 0.000284 min_lr: 0.000284 loss: 3.6435 (3.6630) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [251] Total time: 0:02:49 (0.5439 s / it) Averaged stats: lr: 0.000284 min_lr: 0.000284 loss: 3.6435 (3.6336) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:21 loss: 1.1007 (1.1007) acc1: 80.7292 (80.7292) acc5: 94.1406 (94.1406) time: 9.0145 data: 8.8957 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4347 (1.3840) acc1: 72.0052 (72.1440) acc5: 91.6667 (91.0400) time: 1.0889 data: 0.9885 max mem: 21002 Test: Total time: 0:00:10 (1.1275 s / it) * Acc@1 71.820 Acc@5 90.914 loss 1.397 Accuracy of the model on the 50000 test images: 71.8% Max accuracy: 72.08% Epoch: [252] [ 0/312] eta: 1:03:38 lr: 0.000284 min_lr: 0.000284 loss: 3.3697 (3.3697) weight_decay: 0.0500 (0.0500) time: 12.2390 data: 9.2059 max mem: 21002 Epoch: [252] [ 10/312] eta: 0:08:42 lr: 0.000284 min_lr: 0.000284 loss: 3.5072 (3.5612) weight_decay: 0.0500 (0.0500) time: 1.7315 data: 1.0404 max mem: 21002 Epoch: [252] [ 20/312] eta: 0:05:23 lr: 0.000283 min_lr: 0.000283 loss: 3.7896 (3.6447) weight_decay: 0.0500 (0.0500) time: 0.5501 data: 0.1606 max mem: 21002 Epoch: [252] [ 30/312] eta: 0:03:57 lr: 0.000283 min_lr: 0.000283 loss: 3.8168 (3.5689) weight_decay: 0.0500 (0.0500) time: 0.3557 data: 0.0498 max mem: 21002 Epoch: [252] [ 40/312] eta: 0:03:13 lr: 0.000283 min_lr: 0.000283 loss: 3.1919 (3.5072) weight_decay: 0.0500 (0.0500) time: 0.2936 data: 0.0015 max mem: 21002 Epoch: [252] [ 50/312] eta: 0:03:03 lr: 0.000282 min_lr: 0.000282 loss: 3.7013 (3.5777) weight_decay: 0.0500 (0.0500) time: 0.4743 data: 0.0327 max mem: 21002 Epoch: [252] [ 60/312] eta: 0:02:43 lr: 0.000282 min_lr: 0.000282 loss: 3.8855 (3.5709) weight_decay: 0.0500 (0.0500) time: 0.5188 data: 0.0845 max mem: 21002 Epoch: [252] [ 70/312] eta: 0:02:37 lr: 0.000281 min_lr: 0.000281 loss: 3.7078 (3.5655) weight_decay: 0.0500 (0.0500) time: 0.5227 data: 0.1256 max mem: 21002 Epoch: [252] [ 80/312] eta: 0:02:29 lr: 0.000281 min_lr: 0.000281 loss: 3.5925 (3.5603) weight_decay: 0.0500 (0.0500) time: 0.6416 data: 0.1076 max mem: 21002 Epoch: [252] [ 90/312] eta: 0:02:18 lr: 0.000281 min_lr: 0.000281 loss: 3.6110 (3.5709) weight_decay: 0.0500 (0.0500) time: 0.5250 data: 0.0731 max mem: 21002 Epoch: [252] [100/312] eta: 0:02:11 lr: 0.000280 min_lr: 0.000280 loss: 3.7106 (3.5826) weight_decay: 0.0500 (0.0500) time: 0.5281 data: 0.0614 max mem: 21002 Epoch: [252] [110/312] eta: 0:02:00 lr: 0.000280 min_lr: 0.000280 loss: 3.6446 (3.5736) weight_decay: 0.0500 (0.0500) time: 0.4710 data: 0.0235 max mem: 21002 Epoch: [252] [120/312] eta: 0:01:54 lr: 0.000280 min_lr: 0.000280 loss: 3.6050 (3.5569) weight_decay: 0.0500 (0.0500) time: 0.4747 data: 0.0397 max mem: 21002 Epoch: [252] [130/312] eta: 0:01:49 lr: 0.000279 min_lr: 0.000279 loss: 3.5874 (3.5556) weight_decay: 0.0500 (0.0500) time: 0.6397 data: 0.0729 max mem: 21002 Epoch: [252] [140/312] eta: 0:01:40 lr: 0.000279 min_lr: 0.000279 loss: 3.6923 (3.5706) weight_decay: 0.0500 (0.0500) time: 0.5118 data: 0.0696 max mem: 21002 Epoch: [252] [150/312] eta: 0:01:36 lr: 0.000279 min_lr: 0.000279 loss: 3.6923 (3.5706) weight_decay: 0.0500 (0.0500) time: 0.5383 data: 0.0749 max mem: 21002 Epoch: [252] [160/312] eta: 0:01:30 lr: 0.000278 min_lr: 0.000278 loss: 3.6150 (3.5790) weight_decay: 0.0500 (0.0500) time: 0.6407 data: 0.0678 max mem: 21002 Epoch: [252] [170/312] eta: 0:01:22 lr: 0.000278 min_lr: 0.000278 loss: 3.6086 (3.5685) weight_decay: 0.0500 (0.0500) time: 0.4697 data: 0.0487 max mem: 21002 Epoch: [252] [180/312] eta: 0:01:17 lr: 0.000277 min_lr: 0.000277 loss: 3.4173 (3.5622) weight_decay: 0.0500 (0.0500) time: 0.5373 data: 0.0738 max mem: 21002 Epoch: [252] [190/312] eta: 0:01:09 lr: 0.000277 min_lr: 0.000277 loss: 3.7781 (3.5683) weight_decay: 0.0500 (0.0500) time: 0.5098 data: 0.0596 max mem: 21002 Epoch: [252] [200/312] eta: 0:01:05 lr: 0.000277 min_lr: 0.000277 loss: 3.8230 (3.5689) weight_decay: 0.0500 (0.0500) time: 0.5400 data: 0.0512 max mem: 21002 Epoch: [252] [210/312] eta: 0:00:59 lr: 0.000276 min_lr: 0.000276 loss: 3.4201 (3.5629) weight_decay: 0.0500 (0.0500) time: 0.6904 data: 0.0453 max mem: 21002 Epoch: [252] [220/312] eta: 0:00:52 lr: 0.000276 min_lr: 0.000276 loss: 3.3248 (3.5499) weight_decay: 0.0500 (0.0500) time: 0.4785 data: 0.0350 max mem: 21002 Epoch: [252] [230/312] eta: 0:00:47 lr: 0.000276 min_lr: 0.000276 loss: 3.3713 (3.5498) weight_decay: 0.0500 (0.0500) time: 0.5694 data: 0.0804 max mem: 21002 Epoch: [252] [240/312] eta: 0:00:41 lr: 0.000275 min_lr: 0.000275 loss: 3.6038 (3.5523) weight_decay: 0.0500 (0.0500) time: 0.6252 data: 0.0461 max mem: 21002 Epoch: [252] [250/312] eta: 0:00:35 lr: 0.000275 min_lr: 0.000275 loss: 3.8363 (3.5624) weight_decay: 0.0500 (0.0500) time: 0.4487 data: 0.0489 max mem: 21002 Epoch: [252] [260/312] eta: 0:00:29 lr: 0.000275 min_lr: 0.000275 loss: 3.8363 (3.5658) weight_decay: 0.0500 (0.0500) time: 0.5674 data: 0.0863 max mem: 21002 Epoch: [252] [270/312] eta: 0:00:23 lr: 0.000274 min_lr: 0.000274 loss: 3.7453 (3.5647) weight_decay: 0.0500 (0.0500) time: 0.4968 data: 0.0382 max mem: 21002 Epoch: [252] [280/312] eta: 0:00:18 lr: 0.000274 min_lr: 0.000274 loss: 3.5112 (3.5645) weight_decay: 0.0500 (0.0500) time: 0.5370 data: 0.0440 max mem: 21002 Epoch: [252] [290/312] eta: 0:00:12 lr: 0.000273 min_lr: 0.000273 loss: 3.9603 (3.5783) weight_decay: 0.0500 (0.0500) time: 0.6278 data: 0.0437 max mem: 21002 Epoch: [252] [300/312] eta: 0:00:06 lr: 0.000273 min_lr: 0.000273 loss: 3.9356 (3.5830) weight_decay: 0.0500 (0.0500) time: 0.3842 data: 0.0114 max mem: 21002 Epoch: [252] [310/312] eta: 0:00:01 lr: 0.000273 min_lr: 0.000273 loss: 3.8843 (3.5884) weight_decay: 0.0500 (0.0500) time: 0.2875 data: 0.0113 max mem: 21002 Epoch: [252] [311/312] eta: 0:00:00 lr: 0.000273 min_lr: 0.000273 loss: 3.8843 (3.5909) weight_decay: 0.0500 (0.0500) time: 0.2768 data: 0.0001 max mem: 21002 Epoch: [252] Total time: 0:02:53 (0.5546 s / it) Averaged stats: lr: 0.000273 min_lr: 0.000273 loss: 3.8843 (3.6302) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:13 loss: 1.1315 (1.1315) acc1: 79.9479 (79.9479) acc5: 95.1823 (95.1823) time: 8.1459 data: 8.0290 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5054 (1.4405) acc1: 70.9635 (71.8720) acc5: 90.3646 (90.6880) time: 1.0032 data: 0.9032 max mem: 21002 Test: Total time: 0:00:09 (1.0142 s / it) * Acc@1 71.392 Acc@5 90.546 loss 1.453 Accuracy of the model on the 50000 test images: 71.4% Max accuracy: 72.08% Epoch: [253] [ 0/312] eta: 1:04:46 lr: 0.000273 min_lr: 0.000273 loss: 3.9073 (3.9073) weight_decay: 0.0500 (0.0500) time: 12.4572 data: 12.1594 max mem: 21002 Epoch: [253] [ 10/312] eta: 0:08:24 lr: 0.000272 min_lr: 0.000272 loss: 3.9073 (3.6394) weight_decay: 0.0500 (0.0500) time: 1.6702 data: 1.1061 max mem: 21002 Epoch: [253] [ 20/312] eta: 0:05:09 lr: 0.000272 min_lr: 0.000272 loss: 3.7884 (3.6085) weight_decay: 0.0500 (0.0500) time: 0.4900 data: 0.0464 max mem: 21002 Epoch: [253] [ 30/312] eta: 0:03:49 lr: 0.000272 min_lr: 0.000272 loss: 3.7714 (3.6532) weight_decay: 0.0500 (0.0500) time: 0.3408 data: 0.0463 max mem: 21002 Epoch: [253] [ 40/312] eta: 0:03:10 lr: 0.000271 min_lr: 0.000271 loss: 3.7714 (3.6316) weight_decay: 0.0500 (0.0500) time: 0.3219 data: 0.0246 max mem: 21002 Epoch: [253] [ 50/312] eta: 0:03:03 lr: 0.000271 min_lr: 0.000271 loss: 3.7593 (3.6269) weight_decay: 0.0500 (0.0500) time: 0.5271 data: 0.2004 max mem: 21002 Epoch: [253] [ 60/312] eta: 0:02:39 lr: 0.000270 min_lr: 0.000270 loss: 3.7593 (3.6397) weight_decay: 0.0500 (0.0500) time: 0.4956 data: 0.1765 max mem: 21002 Epoch: [253] [ 70/312] eta: 0:02:35 lr: 0.000270 min_lr: 0.000270 loss: 3.7984 (3.6608) weight_decay: 0.0500 (0.0500) time: 0.4978 data: 0.1529 max mem: 21002 Epoch: [253] [ 80/312] eta: 0:02:30 lr: 0.000270 min_lr: 0.000270 loss: 3.6572 (3.6238) weight_decay: 0.0500 (0.0500) time: 0.6901 data: 0.2855 max mem: 21002 Epoch: [253] [ 90/312] eta: 0:02:14 lr: 0.000269 min_lr: 0.000269 loss: 3.4183 (3.5976) weight_decay: 0.0500 (0.0500) time: 0.4822 data: 0.1338 max mem: 21002 Epoch: [253] [100/312] eta: 0:02:11 lr: 0.000269 min_lr: 0.000269 loss: 3.0120 (3.5459) weight_decay: 0.0500 (0.0500) time: 0.5020 data: 0.1379 max mem: 21002 Epoch: [253] [110/312] eta: 0:01:58 lr: 0.000269 min_lr: 0.000269 loss: 3.2629 (3.5627) weight_decay: 0.0500 (0.0500) time: 0.4984 data: 0.1375 max mem: 21002 Epoch: [253] [120/312] eta: 0:01:54 lr: 0.000268 min_lr: 0.000268 loss: 3.8386 (3.5692) weight_decay: 0.0500 (0.0500) time: 0.4847 data: 0.1284 max mem: 21002 Epoch: [253] [130/312] eta: 0:01:50 lr: 0.000268 min_lr: 0.000268 loss: 3.6875 (3.5792) weight_decay: 0.0500 (0.0500) time: 0.7192 data: 0.2326 max mem: 21002 Epoch: [253] [140/312] eta: 0:01:40 lr: 0.000268 min_lr: 0.000268 loss: 3.7897 (3.5933) weight_decay: 0.0500 (0.0500) time: 0.5212 data: 0.1048 max mem: 21002 Epoch: [253] [150/312] eta: 0:01:36 lr: 0.000267 min_lr: 0.000267 loss: 3.8275 (3.6084) weight_decay: 0.0500 (0.0500) time: 0.5331 data: 0.0854 max mem: 21002 Epoch: [253] [160/312] eta: 0:01:31 lr: 0.000267 min_lr: 0.000267 loss: 3.7513 (3.6102) weight_decay: 0.0500 (0.0500) time: 0.7370 data: 0.1266 max mem: 21002 Epoch: [253] [170/312] eta: 0:01:23 lr: 0.000267 min_lr: 0.000267 loss: 3.5261 (3.5948) weight_decay: 0.0500 (0.0500) time: 0.4974 data: 0.0420 max mem: 21002 Epoch: [253] [180/312] eta: 0:01:19 lr: 0.000266 min_lr: 0.000266 loss: 3.4621 (3.6022) weight_decay: 0.0500 (0.0500) time: 0.5539 data: 0.0109 max mem: 21002 Epoch: [253] [190/312] eta: 0:01:11 lr: 0.000266 min_lr: 0.000266 loss: 3.5588 (3.5999) weight_decay: 0.0500 (0.0500) time: 0.5633 data: 0.0109 max mem: 21002 Epoch: [253] [200/312] eta: 0:01:05 lr: 0.000265 min_lr: 0.000265 loss: 3.5340 (3.5942) weight_decay: 0.0500 (0.0500) time: 0.4431 data: 0.0461 max mem: 21002 Epoch: [253] [210/312] eta: 0:00:59 lr: 0.000265 min_lr: 0.000265 loss: 3.7271 (3.5925) weight_decay: 0.0500 (0.0500) time: 0.6252 data: 0.0698 max mem: 21002 Epoch: [253] [220/312] eta: 0:00:53 lr: 0.000265 min_lr: 0.000265 loss: 3.7271 (3.5904) weight_decay: 0.0500 (0.0500) time: 0.5466 data: 0.0246 max mem: 21002 Epoch: [253] [230/312] eta: 0:00:47 lr: 0.000264 min_lr: 0.000264 loss: 3.5916 (3.5885) weight_decay: 0.0500 (0.0500) time: 0.4948 data: 0.0324 max mem: 21002 Epoch: [253] [240/312] eta: 0:00:42 lr: 0.000264 min_lr: 0.000264 loss: 3.5916 (3.5871) weight_decay: 0.0500 (0.0500) time: 0.6319 data: 0.0792 max mem: 21002 Epoch: [253] [250/312] eta: 0:00:35 lr: 0.000264 min_lr: 0.000264 loss: 3.6617 (3.5771) weight_decay: 0.0500 (0.0500) time: 0.5224 data: 0.0476 max mem: 21002 Epoch: [253] [260/312] eta: 0:00:30 lr: 0.000263 min_lr: 0.000263 loss: 3.6705 (3.5826) weight_decay: 0.0500 (0.0500) time: 0.5395 data: 0.0009 max mem: 21002 Epoch: [253] [270/312] eta: 0:00:23 lr: 0.000263 min_lr: 0.000263 loss: 3.8532 (3.5850) weight_decay: 0.0500 (0.0500) time: 0.5045 data: 0.0009 max mem: 21002 Epoch: [253] [280/312] eta: 0:00:18 lr: 0.000263 min_lr: 0.000263 loss: 3.4942 (3.5764) weight_decay: 0.0500 (0.0500) time: 0.4597 data: 0.0621 max mem: 21002 Epoch: [253] [290/312] eta: 0:00:12 lr: 0.000262 min_lr: 0.000262 loss: 3.1071 (3.5723) weight_decay: 0.0500 (0.0500) time: 0.5496 data: 0.0849 max mem: 21002 Epoch: [253] [300/312] eta: 0:00:06 lr: 0.000262 min_lr: 0.000262 loss: 3.4824 (3.5686) weight_decay: 0.0500 (0.0500) time: 0.3809 data: 0.0232 max mem: 21002 Epoch: [253] [310/312] eta: 0:00:01 lr: 0.000262 min_lr: 0.000262 loss: 3.7386 (3.5759) weight_decay: 0.0500 (0.0500) time: 0.2851 data: 0.0001 max mem: 21002 Epoch: [253] [311/312] eta: 0:00:00 lr: 0.000261 min_lr: 0.000261 loss: 3.7386 (3.5774) weight_decay: 0.0500 (0.0500) time: 0.2768 data: 0.0001 max mem: 21002 Epoch: [253] Total time: 0:02:52 (0.5534 s / it) Averaged stats: lr: 0.000261 min_lr: 0.000261 loss: 3.7386 (3.6363) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:15 loss: 1.1525 (1.1525) acc1: 78.9062 (78.9062) acc5: 94.7917 (94.7917) time: 8.3537 data: 8.2349 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5081 (1.4207) acc1: 71.7448 (72.5920) acc5: 90.6250 (90.9920) time: 1.0228 data: 0.9151 max mem: 21002 Test: Total time: 0:00:09 (1.0508 s / it) * Acc@1 72.088 Acc@5 91.132 loss 1.426 Accuracy of the model on the 50000 test images: 72.1% Max accuracy: 72.09% Epoch: [254] [ 0/312] eta: 0:55:55 lr: 0.000261 min_lr: 0.000261 loss: 2.7564 (2.7564) weight_decay: 0.0500 (0.0500) time: 10.7546 data: 9.9672 max mem: 21002 Epoch: [254] [ 10/312] eta: 0:07:19 lr: 0.000261 min_lr: 0.000261 loss: 3.0281 (3.3236) weight_decay: 0.0500 (0.0500) time: 1.4564 data: 1.0872 max mem: 21002 Epoch: [254] [ 20/312] eta: 0:05:05 lr: 0.000261 min_lr: 0.000261 loss: 3.1639 (3.3721) weight_decay: 0.0500 (0.0500) time: 0.5612 data: 0.2403 max mem: 21002 Epoch: [254] [ 30/312] eta: 0:03:46 lr: 0.000260 min_lr: 0.000260 loss: 3.3154 (3.3580) weight_decay: 0.0500 (0.0500) time: 0.4412 data: 0.1411 max mem: 21002 Epoch: [254] [ 40/312] eta: 0:03:37 lr: 0.000260 min_lr: 0.000260 loss: 3.6715 (3.4714) weight_decay: 0.0500 (0.0500) time: 0.5432 data: 0.1633 max mem: 21002 Epoch: [254] [ 50/312] eta: 0:03:16 lr: 0.000260 min_lr: 0.000260 loss: 3.7255 (3.5122) weight_decay: 0.0500 (0.0500) time: 0.6716 data: 0.2865 max mem: 21002 Epoch: [254] [ 60/312] eta: 0:02:49 lr: 0.000259 min_lr: 0.000259 loss: 3.7255 (3.5274) weight_decay: 0.0500 (0.0500) time: 0.4139 data: 0.1238 max mem: 21002 Epoch: [254] [ 70/312] eta: 0:02:41 lr: 0.000259 min_lr: 0.000259 loss: 3.5399 (3.4759) weight_decay: 0.0500 (0.0500) time: 0.4487 data: 0.1549 max mem: 21002 Epoch: [254] [ 80/312] eta: 0:02:31 lr: 0.000259 min_lr: 0.000259 loss: 3.0664 (3.4668) weight_decay: 0.0500 (0.0500) time: 0.5957 data: 0.3004 max mem: 21002 Epoch: [254] [ 90/312] eta: 0:02:18 lr: 0.000258 min_lr: 0.000258 loss: 3.3518 (3.4498) weight_decay: 0.0500 (0.0500) time: 0.4742 data: 0.1895 max mem: 21002 Epoch: [254] [100/312] eta: 0:02:12 lr: 0.000258 min_lr: 0.000258 loss: 3.7722 (3.4786) weight_decay: 0.0500 (0.0500) time: 0.5055 data: 0.2080 max mem: 21002 Epoch: [254] [110/312] eta: 0:02:00 lr: 0.000258 min_lr: 0.000258 loss: 3.8747 (3.5019) weight_decay: 0.0500 (0.0500) time: 0.4649 data: 0.1645 max mem: 21002 Epoch: [254] [120/312] eta: 0:01:55 lr: 0.000257 min_lr: 0.000257 loss: 3.8747 (3.5180) weight_decay: 0.0500 (0.0500) time: 0.4704 data: 0.1836 max mem: 21002 Epoch: [254] [130/312] eta: 0:01:49 lr: 0.000257 min_lr: 0.000257 loss: 3.6386 (3.5167) weight_decay: 0.0500 (0.0500) time: 0.6435 data: 0.3541 max mem: 21002 Epoch: [254] [140/312] eta: 0:01:39 lr: 0.000257 min_lr: 0.000257 loss: 3.3431 (3.5187) weight_decay: 0.0500 (0.0500) time: 0.4658 data: 0.1711 max mem: 21002 Epoch: [254] [150/312] eta: 0:01:35 lr: 0.000256 min_lr: 0.000256 loss: 3.6202 (3.5382) weight_decay: 0.0500 (0.0500) time: 0.5041 data: 0.1969 max mem: 21002 Epoch: [254] [160/312] eta: 0:01:29 lr: 0.000256 min_lr: 0.000256 loss: 3.6818 (3.5419) weight_decay: 0.0500 (0.0500) time: 0.6312 data: 0.3022 max mem: 21002 Epoch: [254] [170/312] eta: 0:01:22 lr: 0.000255 min_lr: 0.000255 loss: 3.7814 (3.5536) weight_decay: 0.0500 (0.0500) time: 0.4921 data: 0.1651 max mem: 21002 Epoch: [254] [180/312] eta: 0:01:17 lr: 0.000255 min_lr: 0.000255 loss: 3.7814 (3.5541) weight_decay: 0.0500 (0.0500) time: 0.5699 data: 0.2286 max mem: 21002 Epoch: [254] [190/312] eta: 0:01:09 lr: 0.000255 min_lr: 0.000255 loss: 3.6043 (3.5664) weight_decay: 0.0500 (0.0500) time: 0.4987 data: 0.1696 max mem: 21002 Epoch: [254] [200/312] eta: 0:01:04 lr: 0.000254 min_lr: 0.000254 loss: 3.6713 (3.5701) weight_decay: 0.0500 (0.0500) time: 0.4866 data: 0.1473 max mem: 21002 Epoch: [254] [210/312] eta: 0:00:58 lr: 0.000254 min_lr: 0.000254 loss: 3.6799 (3.5729) weight_decay: 0.0500 (0.0500) time: 0.6294 data: 0.2700 max mem: 21002 Epoch: [254] [220/312] eta: 0:00:52 lr: 0.000254 min_lr: 0.000254 loss: 3.6462 (3.5689) weight_decay: 0.0500 (0.0500) time: 0.4882 data: 0.1235 max mem: 21002 Epoch: [254] [230/312] eta: 0:00:46 lr: 0.000253 min_lr: 0.000253 loss: 3.7140 (3.5781) weight_decay: 0.0500 (0.0500) time: 0.5402 data: 0.1026 max mem: 21002 Epoch: [254] [240/312] eta: 0:00:41 lr: 0.000253 min_lr: 0.000253 loss: 3.8858 (3.5847) weight_decay: 0.0500 (0.0500) time: 0.6078 data: 0.1587 max mem: 21002 Epoch: [254] [250/312] eta: 0:00:35 lr: 0.000253 min_lr: 0.000253 loss: 3.8189 (3.5824) weight_decay: 0.0500 (0.0500) time: 0.5193 data: 0.1128 max mem: 21002 Epoch: [254] [260/312] eta: 0:00:29 lr: 0.000252 min_lr: 0.000252 loss: 3.9428 (3.5888) weight_decay: 0.0500 (0.0500) time: 0.6034 data: 0.0910 max mem: 21002 Epoch: [254] [270/312] eta: 0:00:23 lr: 0.000252 min_lr: 0.000252 loss: 3.5999 (3.5789) weight_decay: 0.0500 (0.0500) time: 0.4961 data: 0.0349 max mem: 21002 Epoch: [254] [280/312] eta: 0:00:18 lr: 0.000252 min_lr: 0.000252 loss: 3.7747 (3.5853) weight_decay: 0.0500 (0.0500) time: 0.4811 data: 0.0414 max mem: 21002 Epoch: [254] [290/312] eta: 0:00:12 lr: 0.000251 min_lr: 0.000251 loss: 3.9252 (3.5971) weight_decay: 0.0500 (0.0500) time: 0.5363 data: 0.0518 max mem: 21002 Epoch: [254] [300/312] eta: 0:00:06 lr: 0.000251 min_lr: 0.000251 loss: 3.9325 (3.6046) weight_decay: 0.0500 (0.0500) time: 0.3653 data: 0.0108 max mem: 21002 Epoch: [254] [310/312] eta: 0:00:01 lr: 0.000251 min_lr: 0.000251 loss: 3.8661 (3.5991) weight_decay: 0.0500 (0.0500) time: 0.3041 data: 0.0001 max mem: 21002 Epoch: [254] [311/312] eta: 0:00:00 lr: 0.000251 min_lr: 0.000251 loss: 3.5894 (3.5975) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [254] Total time: 0:02:50 (0.5472 s / it) Averaged stats: lr: 0.000251 min_lr: 0.000251 loss: 3.5894 (3.6142) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.0520 (1.0520) acc1: 80.2083 (80.2083) acc5: 95.5729 (95.5729) time: 8.5384 data: 8.4201 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4028 (1.3186) acc1: 72.0052 (72.5600) acc5: 90.8854 (91.3440) time: 1.0376 data: 0.9356 max mem: 21002 Test: Total time: 0:00:09 (1.0538 s / it) * Acc@1 72.316 Acc@5 91.334 loss 1.327 Accuracy of the model on the 50000 test images: 72.3% Max accuracy: 72.32% Epoch: [255] [ 0/312] eta: 0:57:44 lr: 0.000250 min_lr: 0.000250 loss: 4.1130 (4.1130) weight_decay: 0.0500 (0.0500) time: 11.1056 data: 8.4944 max mem: 21002 Epoch: [255] [ 10/312] eta: 0:07:57 lr: 0.000250 min_lr: 0.000250 loss: 3.7792 (3.5560) weight_decay: 0.0500 (0.0500) time: 1.5804 data: 1.0856 max mem: 21002 Epoch: [255] [ 20/312] eta: 0:05:23 lr: 0.000250 min_lr: 0.000250 loss: 3.3233 (3.5369) weight_decay: 0.0500 (0.0500) time: 0.6085 data: 0.2479 max mem: 21002 Epoch: [255] [ 30/312] eta: 0:03:58 lr: 0.000249 min_lr: 0.000249 loss: 3.6070 (3.6627) weight_decay: 0.0500 (0.0500) time: 0.4390 data: 0.0758 max mem: 21002 Epoch: [255] [ 40/312] eta: 0:03:17 lr: 0.000249 min_lr: 0.000249 loss: 3.8364 (3.6651) weight_decay: 0.0500 (0.0500) time: 0.3214 data: 0.0243 max mem: 21002 Epoch: [255] [ 50/312] eta: 0:03:06 lr: 0.000249 min_lr: 0.000249 loss: 3.7578 (3.6394) weight_decay: 0.0500 (0.0500) time: 0.5058 data: 0.1776 max mem: 21002 Epoch: [255] [ 60/312] eta: 0:02:42 lr: 0.000248 min_lr: 0.000248 loss: 3.7724 (3.6731) weight_decay: 0.0500 (0.0500) time: 0.4824 data: 0.1659 max mem: 21002 Epoch: [255] [ 70/312] eta: 0:02:35 lr: 0.000248 min_lr: 0.000248 loss: 3.6561 (3.6180) weight_decay: 0.0500 (0.0500) time: 0.4702 data: 0.1576 max mem: 21002 Epoch: [255] [ 80/312] eta: 0:02:28 lr: 0.000248 min_lr: 0.000248 loss: 3.6022 (3.6420) weight_decay: 0.0500 (0.0500) time: 0.6271 data: 0.2698 max mem: 21002 Epoch: [255] [ 90/312] eta: 0:02:16 lr: 0.000247 min_lr: 0.000247 loss: 3.8158 (3.6563) weight_decay: 0.0500 (0.0500) time: 0.5105 data: 0.1859 max mem: 21002 Epoch: [255] [100/312] eta: 0:02:12 lr: 0.000247 min_lr: 0.000247 loss: 3.6687 (3.6503) weight_decay: 0.0500 (0.0500) time: 0.5583 data: 0.2178 max mem: 21002 Epoch: [255] [110/312] eta: 0:01:59 lr: 0.000247 min_lr: 0.000247 loss: 3.6348 (3.6361) weight_decay: 0.0500 (0.0500) time: 0.5013 data: 0.1566 max mem: 21002 Epoch: [255] [120/312] eta: 0:01:55 lr: 0.000246 min_lr: 0.000246 loss: 3.5580 (3.6328) weight_decay: 0.0500 (0.0500) time: 0.4975 data: 0.1613 max mem: 21002 Epoch: [255] [130/312] eta: 0:01:49 lr: 0.000246 min_lr: 0.000246 loss: 3.5055 (3.6222) weight_decay: 0.0500 (0.0500) time: 0.6594 data: 0.2850 max mem: 21002 Epoch: [255] [140/312] eta: 0:01:40 lr: 0.000246 min_lr: 0.000246 loss: 3.6629 (3.6351) weight_decay: 0.0500 (0.0500) time: 0.4600 data: 0.1373 max mem: 21002 Epoch: [255] [150/312] eta: 0:01:36 lr: 0.000245 min_lr: 0.000245 loss: 3.7912 (3.6398) weight_decay: 0.0500 (0.0500) time: 0.5378 data: 0.1631 max mem: 21002 Epoch: [255] [160/312] eta: 0:01:29 lr: 0.000245 min_lr: 0.000245 loss: 3.7318 (3.6363) weight_decay: 0.0500 (0.0500) time: 0.6430 data: 0.1701 max mem: 21002 Epoch: [255] [170/312] eta: 0:01:23 lr: 0.000245 min_lr: 0.000245 loss: 3.9041 (3.6520) weight_decay: 0.0500 (0.0500) time: 0.5072 data: 0.1240 max mem: 21002 Epoch: [255] [180/312] eta: 0:01:17 lr: 0.000244 min_lr: 0.000244 loss: 3.9140 (3.6506) weight_decay: 0.0500 (0.0500) time: 0.5681 data: 0.1992 max mem: 21002 Epoch: [255] [190/312] eta: 0:01:09 lr: 0.000244 min_lr: 0.000244 loss: 3.4920 (3.6360) weight_decay: 0.0500 (0.0500) time: 0.4669 data: 0.0960 max mem: 21002 Epoch: [255] [200/312] eta: 0:01:04 lr: 0.000244 min_lr: 0.000244 loss: 3.2415 (3.6235) weight_decay: 0.0500 (0.0500) time: 0.4800 data: 0.1296 max mem: 21002 Epoch: [255] [210/312] eta: 0:00:59 lr: 0.000243 min_lr: 0.000243 loss: 3.6743 (3.6228) weight_decay: 0.0500 (0.0500) time: 0.6506 data: 0.2611 max mem: 21002 Epoch: [255] [220/312] eta: 0:00:52 lr: 0.000243 min_lr: 0.000243 loss: 3.7866 (3.6236) weight_decay: 0.0500 (0.0500) time: 0.4599 data: 0.1322 max mem: 21002 Epoch: [255] [230/312] eta: 0:00:46 lr: 0.000243 min_lr: 0.000243 loss: 3.8323 (3.6354) weight_decay: 0.0500 (0.0500) time: 0.4578 data: 0.1629 max mem: 21002 Epoch: [255] [240/312] eta: 0:00:40 lr: 0.000242 min_lr: 0.000242 loss: 3.7954 (3.6275) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.2903 max mem: 21002 Epoch: [255] [250/312] eta: 0:00:34 lr: 0.000242 min_lr: 0.000242 loss: 3.3393 (3.6243) weight_decay: 0.0500 (0.0500) time: 0.4751 data: 0.1890 max mem: 21002 Epoch: [255] [260/312] eta: 0:00:29 lr: 0.000242 min_lr: 0.000242 loss: 3.3073 (3.6214) weight_decay: 0.0500 (0.0500) time: 0.5428 data: 0.2589 max mem: 21002 Epoch: [255] [270/312] eta: 0:00:23 lr: 0.000241 min_lr: 0.000241 loss: 3.6740 (3.6251) weight_decay: 0.0500 (0.0500) time: 0.4832 data: 0.1979 max mem: 21002 Epoch: [255] [280/312] eta: 0:00:17 lr: 0.000241 min_lr: 0.000241 loss: 3.6740 (3.6151) weight_decay: 0.0500 (0.0500) time: 0.4965 data: 0.2053 max mem: 21002 Epoch: [255] [290/312] eta: 0:00:12 lr: 0.000240 min_lr: 0.000240 loss: 3.3269 (3.6116) weight_decay: 0.0500 (0.0500) time: 0.6219 data: 0.3315 max mem: 21002 Epoch: [255] [300/312] eta: 0:00:06 lr: 0.000240 min_lr: 0.000240 loss: 3.3594 (3.6088) weight_decay: 0.0500 (0.0500) time: 0.4155 data: 0.1350 max mem: 21002 Epoch: [255] [310/312] eta: 0:00:01 lr: 0.000240 min_lr: 0.000240 loss: 3.7579 (3.6141) weight_decay: 0.0500 (0.0500) time: 0.2921 data: 0.0161 max mem: 21002 Epoch: [255] [311/312] eta: 0:00:00 lr: 0.000240 min_lr: 0.000240 loss: 3.7579 (3.6161) weight_decay: 0.0500 (0.0500) time: 0.2841 data: 0.0075 max mem: 21002 Epoch: [255] Total time: 0:02:50 (0.5461 s / it) Averaged stats: lr: 0.000240 min_lr: 0.000240 loss: 3.7579 (3.6253) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.1037 (1.1037) acc1: 78.3854 (78.3854) acc5: 95.0521 (95.0521) time: 8.7106 data: 8.5935 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4469 (1.3403) acc1: 71.2240 (72.0640) acc5: 90.4948 (90.9920) time: 1.0556 data: 0.9549 max mem: 21002 Test: Total time: 0:00:09 (1.0751 s / it) * Acc@1 72.160 Acc@5 91.118 loss 1.340 Accuracy of the model on the 50000 test images: 72.2% Max accuracy: 72.32% Epoch: [256] [ 0/312] eta: 0:59:44 lr: 0.000240 min_lr: 0.000240 loss: 2.7938 (2.7938) weight_decay: 0.0500 (0.0500) time: 11.4874 data: 11.0109 max mem: 21002 Epoch: [256] [ 10/312] eta: 0:07:25 lr: 0.000239 min_lr: 0.000239 loss: 3.3787 (3.3607) weight_decay: 0.0500 (0.0500) time: 1.4746 data: 1.0015 max mem: 21002 Epoch: [256] [ 20/312] eta: 0:05:00 lr: 0.000239 min_lr: 0.000239 loss: 3.8023 (3.5085) weight_decay: 0.0500 (0.0500) time: 0.5052 data: 0.0909 max mem: 21002 Epoch: [256] [ 30/312] eta: 0:03:42 lr: 0.000239 min_lr: 0.000239 loss: 3.8418 (3.6059) weight_decay: 0.0500 (0.0500) time: 0.4112 data: 0.0909 max mem: 21002 Epoch: [256] [ 40/312] eta: 0:03:21 lr: 0.000238 min_lr: 0.000238 loss: 3.9487 (3.5840) weight_decay: 0.0500 (0.0500) time: 0.4389 data: 0.1405 max mem: 21002 Epoch: [256] [ 50/312] eta: 0:03:11 lr: 0.000238 min_lr: 0.000238 loss: 3.9149 (3.5887) weight_decay: 0.0500 (0.0500) time: 0.6409 data: 0.3426 max mem: 21002 Epoch: [256] [ 60/312] eta: 0:02:45 lr: 0.000238 min_lr: 0.000238 loss: 3.8449 (3.5834) weight_decay: 0.0500 (0.0500) time: 0.4884 data: 0.2027 max mem: 21002 Epoch: [256] [ 70/312] eta: 0:02:39 lr: 0.000237 min_lr: 0.000237 loss: 3.8320 (3.6031) weight_decay: 0.0500 (0.0500) time: 0.4712 data: 0.1743 max mem: 21002 Epoch: [256] [ 80/312] eta: 0:02:32 lr: 0.000237 min_lr: 0.000237 loss: 3.6945 (3.5946) weight_decay: 0.0500 (0.0500) time: 0.6537 data: 0.3576 max mem: 21002 Epoch: [256] [ 90/312] eta: 0:02:16 lr: 0.000237 min_lr: 0.000237 loss: 3.5775 (3.5881) weight_decay: 0.0500 (0.0500) time: 0.4697 data: 0.1838 max mem: 21002 Epoch: [256] [100/312] eta: 0:02:11 lr: 0.000236 min_lr: 0.000236 loss: 3.7409 (3.6075) weight_decay: 0.0500 (0.0500) time: 0.4786 data: 0.1935 max mem: 21002 Epoch: [256] [110/312] eta: 0:01:59 lr: 0.000236 min_lr: 0.000236 loss: 3.9068 (3.6391) weight_decay: 0.0500 (0.0500) time: 0.4776 data: 0.1935 max mem: 21002 Epoch: [256] [120/312] eta: 0:01:54 lr: 0.000236 min_lr: 0.000236 loss: 3.9068 (3.6604) weight_decay: 0.0500 (0.0500) time: 0.4691 data: 0.1849 max mem: 21002 Epoch: [256] [130/312] eta: 0:01:49 lr: 0.000235 min_lr: 0.000235 loss: 3.8997 (3.6800) weight_decay: 0.0500 (0.0500) time: 0.6612 data: 0.3710 max mem: 21002 Epoch: [256] [140/312] eta: 0:01:39 lr: 0.000235 min_lr: 0.000235 loss: 3.6637 (3.6576) weight_decay: 0.0500 (0.0500) time: 0.4767 data: 0.1868 max mem: 21002 Epoch: [256] [150/312] eta: 0:01:35 lr: 0.000235 min_lr: 0.000235 loss: 3.3847 (3.6428) weight_decay: 0.0500 (0.0500) time: 0.5021 data: 0.2156 max mem: 21002 Epoch: [256] [160/312] eta: 0:01:30 lr: 0.000234 min_lr: 0.000234 loss: 3.5079 (3.6453) weight_decay: 0.0500 (0.0500) time: 0.6937 data: 0.4046 max mem: 21002 Epoch: [256] [170/312] eta: 0:01:21 lr: 0.000234 min_lr: 0.000234 loss: 3.7932 (3.6502) weight_decay: 0.0500 (0.0500) time: 0.4770 data: 0.1897 max mem: 21002 Epoch: [256] [180/312] eta: 0:01:16 lr: 0.000234 min_lr: 0.000234 loss: 3.9328 (3.6680) weight_decay: 0.0500 (0.0500) time: 0.4809 data: 0.1945 max mem: 21002 Epoch: [256] [190/312] eta: 0:01:09 lr: 0.000233 min_lr: 0.000233 loss: 3.9780 (3.6725) weight_decay: 0.0500 (0.0500) time: 0.5078 data: 0.1944 max mem: 21002 Epoch: [256] [200/312] eta: 0:01:04 lr: 0.000233 min_lr: 0.000233 loss: 3.6225 (3.6704) weight_decay: 0.0500 (0.0500) time: 0.5325 data: 0.1769 max mem: 21002 Epoch: [256] [210/312] eta: 0:00:58 lr: 0.000233 min_lr: 0.000233 loss: 3.6405 (3.6770) weight_decay: 0.0500 (0.0500) time: 0.6448 data: 0.3122 max mem: 21002 Epoch: [256] [220/312] eta: 0:00:52 lr: 0.000232 min_lr: 0.000232 loss: 3.6176 (3.6699) weight_decay: 0.0500 (0.0500) time: 0.4926 data: 0.1365 max mem: 21002 Epoch: [256] [230/312] eta: 0:00:46 lr: 0.000232 min_lr: 0.000232 loss: 3.6861 (3.6731) weight_decay: 0.0500 (0.0500) time: 0.4761 data: 0.1226 max mem: 21002 Epoch: [256] [240/312] eta: 0:00:41 lr: 0.000232 min_lr: 0.000232 loss: 3.6933 (3.6747) weight_decay: 0.0500 (0.0500) time: 0.6070 data: 0.2349 max mem: 21002 Epoch: [256] [250/312] eta: 0:00:35 lr: 0.000231 min_lr: 0.000231 loss: 3.7214 (3.6776) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.1136 max mem: 21002 Epoch: [256] [260/312] eta: 0:00:29 lr: 0.000231 min_lr: 0.000231 loss: 3.8732 (3.6699) weight_decay: 0.0500 (0.0500) time: 0.4998 data: 0.1095 max mem: 21002 Epoch: [256] [270/312] eta: 0:00:23 lr: 0.000231 min_lr: 0.000231 loss: 3.7722 (3.6736) weight_decay: 0.0500 (0.0500) time: 0.4648 data: 0.1101 max mem: 21002 Epoch: [256] [280/312] eta: 0:00:18 lr: 0.000230 min_lr: 0.000230 loss: 3.7722 (3.6723) weight_decay: 0.0500 (0.0500) time: 0.5585 data: 0.1081 max mem: 21002 Epoch: [256] [290/312] eta: 0:00:12 lr: 0.000230 min_lr: 0.000230 loss: 3.8105 (3.6773) weight_decay: 0.0500 (0.0500) time: 0.5497 data: 0.1556 max mem: 21002 Epoch: [256] [300/312] eta: 0:00:06 lr: 0.000230 min_lr: 0.000230 loss: 3.9001 (3.6813) weight_decay: 0.0500 (0.0500) time: 0.3489 data: 0.0488 max mem: 21002 Epoch: [256] [310/312] eta: 0:00:01 lr: 0.000229 min_lr: 0.000229 loss: 3.8574 (3.6850) weight_decay: 0.0500 (0.0500) time: 0.2867 data: 0.0001 max mem: 21002 Epoch: [256] [311/312] eta: 0:00:00 lr: 0.000229 min_lr: 0.000229 loss: 3.8574 (3.6878) weight_decay: 0.0500 (0.0500) time: 0.2865 data: 0.0001 max mem: 21002 Epoch: [256] Total time: 0:02:50 (0.5454 s / it) Averaged stats: lr: 0.000229 min_lr: 0.000229 loss: 3.8574 (3.6373) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:14 loss: 1.2048 (1.2048) acc1: 79.4271 (79.4271) acc5: 94.7917 (94.7917) time: 8.2237 data: 8.1052 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5609 (1.4834) acc1: 71.2240 (72.4160) acc5: 91.1458 (91.3440) time: 1.0378 data: 0.9415 max mem: 21002 Test: Total time: 0:00:09 (1.0480 s / it) * Acc@1 72.126 Acc@5 91.242 loss 1.486 Accuracy of the model on the 50000 test images: 72.1% Max accuracy: 72.32% Epoch: [257] [ 0/312] eta: 1:01:37 lr: 0.000229 min_lr: 0.000229 loss: 3.0032 (3.0032) weight_decay: 0.0500 (0.0500) time: 11.8525 data: 8.8095 max mem: 21002 Epoch: [257] [ 10/312] eta: 0:07:54 lr: 0.000229 min_lr: 0.000229 loss: 3.7659 (3.5205) weight_decay: 0.0500 (0.0500) time: 1.5715 data: 1.0286 max mem: 21002 Epoch: [257] [ 20/312] eta: 0:05:06 lr: 0.000229 min_lr: 0.000229 loss: 3.8384 (3.6081) weight_decay: 0.0500 (0.0500) time: 0.5095 data: 0.1977 max mem: 21002 Epoch: [257] [ 30/312] eta: 0:03:46 lr: 0.000228 min_lr: 0.000228 loss: 3.7616 (3.5608) weight_decay: 0.0500 (0.0500) time: 0.3818 data: 0.0728 max mem: 21002 Epoch: [257] [ 40/312] eta: 0:03:30 lr: 0.000228 min_lr: 0.000228 loss: 3.6099 (3.5894) weight_decay: 0.0500 (0.0500) time: 0.4866 data: 0.0818 max mem: 21002 Epoch: [257] [ 50/312] eta: 0:03:12 lr: 0.000228 min_lr: 0.000228 loss: 3.8562 (3.6760) weight_decay: 0.0500 (0.0500) time: 0.6257 data: 0.1270 max mem: 21002 Epoch: [257] [ 60/312] eta: 0:02:50 lr: 0.000227 min_lr: 0.000227 loss: 3.8704 (3.6733) weight_decay: 0.0500 (0.0500) time: 0.4719 data: 0.0929 max mem: 21002 Epoch: [257] [ 70/312] eta: 0:02:42 lr: 0.000227 min_lr: 0.000227 loss: 3.7175 (3.6643) weight_decay: 0.0500 (0.0500) time: 0.5057 data: 0.1453 max mem: 21002 Epoch: [257] [ 80/312] eta: 0:02:29 lr: 0.000227 min_lr: 0.000227 loss: 3.5414 (3.6397) weight_decay: 0.0500 (0.0500) time: 0.5547 data: 0.1480 max mem: 21002 Epoch: [257] [ 90/312] eta: 0:02:18 lr: 0.000226 min_lr: 0.000226 loss: 3.6150 (3.6426) weight_decay: 0.0500 (0.0500) time: 0.4658 data: 0.1336 max mem: 21002 Epoch: [257] [100/312] eta: 0:02:13 lr: 0.000226 min_lr: 0.000226 loss: 3.6150 (3.6221) weight_decay: 0.0500 (0.0500) time: 0.5734 data: 0.2227 max mem: 21002 Epoch: [257] [110/312] eta: 0:02:01 lr: 0.000226 min_lr: 0.000226 loss: 3.7323 (3.6321) weight_decay: 0.0500 (0.0500) time: 0.4903 data: 0.1397 max mem: 21002 Epoch: [257] [120/312] eta: 0:01:57 lr: 0.000225 min_lr: 0.000225 loss: 3.8631 (3.6275) weight_decay: 0.0500 (0.0500) time: 0.5038 data: 0.1164 max mem: 21002 Epoch: [257] [130/312] eta: 0:01:51 lr: 0.000225 min_lr: 0.000225 loss: 3.1553 (3.5926) weight_decay: 0.0500 (0.0500) time: 0.6663 data: 0.1929 max mem: 21002 Epoch: [257] [140/312] eta: 0:01:42 lr: 0.000225 min_lr: 0.000225 loss: 3.5306 (3.6059) weight_decay: 0.0500 (0.0500) time: 0.4908 data: 0.1163 max mem: 21002 Epoch: [257] [150/312] eta: 0:01:37 lr: 0.000224 min_lr: 0.000224 loss: 3.8407 (3.6107) weight_decay: 0.0500 (0.0500) time: 0.5260 data: 0.1421 max mem: 21002 Epoch: [257] [160/312] eta: 0:01:29 lr: 0.000224 min_lr: 0.000224 loss: 3.8571 (3.6113) weight_decay: 0.0500 (0.0500) time: 0.5712 data: 0.1031 max mem: 21002 Epoch: [257] [170/312] eta: 0:01:23 lr: 0.000224 min_lr: 0.000224 loss: 3.8364 (3.6232) weight_decay: 0.0500 (0.0500) time: 0.4755 data: 0.1046 max mem: 21002 Epoch: [257] [180/312] eta: 0:01:17 lr: 0.000223 min_lr: 0.000223 loss: 3.8030 (3.6305) weight_decay: 0.0500 (0.0500) time: 0.5781 data: 0.1948 max mem: 21002 Epoch: [257] [190/312] eta: 0:01:09 lr: 0.000223 min_lr: 0.000223 loss: 3.8544 (3.6393) weight_decay: 0.0500 (0.0500) time: 0.4764 data: 0.0908 max mem: 21002 Epoch: [257] [200/312] eta: 0:01:04 lr: 0.000223 min_lr: 0.000223 loss: 3.8544 (3.6358) weight_decay: 0.0500 (0.0500) time: 0.4844 data: 0.1055 max mem: 21002 Epoch: [257] [210/312] eta: 0:00:59 lr: 0.000222 min_lr: 0.000222 loss: 3.4894 (3.6257) weight_decay: 0.0500 (0.0500) time: 0.6550 data: 0.1794 max mem: 21002 Epoch: [257] [220/312] eta: 0:00:52 lr: 0.000222 min_lr: 0.000222 loss: 3.3443 (3.6134) weight_decay: 0.0500 (0.0500) time: 0.4705 data: 0.0864 max mem: 21002 Epoch: [257] [230/312] eta: 0:00:46 lr: 0.000222 min_lr: 0.000222 loss: 3.3443 (3.5966) weight_decay: 0.0500 (0.0500) time: 0.4856 data: 0.1053 max mem: 21002 Epoch: [257] [240/312] eta: 0:00:41 lr: 0.000221 min_lr: 0.000221 loss: 3.3639 (3.5939) weight_decay: 0.0500 (0.0500) time: 0.5867 data: 0.0994 max mem: 21002 Epoch: [257] [250/312] eta: 0:00:35 lr: 0.000221 min_lr: 0.000221 loss: 3.6406 (3.5896) weight_decay: 0.0500 (0.0500) time: 0.4994 data: 0.0874 max mem: 21002 Epoch: [257] [260/312] eta: 0:00:29 lr: 0.000221 min_lr: 0.000221 loss: 3.7689 (3.5916) weight_decay: 0.0500 (0.0500) time: 0.5672 data: 0.1498 max mem: 21002 Epoch: [257] [270/312] eta: 0:00:23 lr: 0.000220 min_lr: 0.000220 loss: 3.7720 (3.5937) weight_decay: 0.0500 (0.0500) time: 0.4690 data: 0.0691 max mem: 21002 Epoch: [257] [280/312] eta: 0:00:18 lr: 0.000220 min_lr: 0.000220 loss: 3.7720 (3.5976) weight_decay: 0.0500 (0.0500) time: 0.4974 data: 0.1056 max mem: 21002 Epoch: [257] [290/312] eta: 0:00:12 lr: 0.000220 min_lr: 0.000220 loss: 3.8308 (3.6016) weight_decay: 0.0500 (0.0500) time: 0.6021 data: 0.1469 max mem: 21002 Epoch: [257] [300/312] eta: 0:00:06 lr: 0.000219 min_lr: 0.000219 loss: 3.8925 (3.6044) weight_decay: 0.0500 (0.0500) time: 0.4119 data: 0.0655 max mem: 21002 Epoch: [257] [310/312] eta: 0:00:01 lr: 0.000219 min_lr: 0.000219 loss: 3.6853 (3.5955) weight_decay: 0.0500 (0.0500) time: 0.3004 data: 0.0239 max mem: 21002 Epoch: [257] [311/312] eta: 0:00:00 lr: 0.000219 min_lr: 0.000219 loss: 3.6853 (3.5954) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [257] Total time: 0:02:51 (0.5488 s / it) Averaged stats: lr: 0.000219 min_lr: 0.000219 loss: 3.6853 (3.6051) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.1084 (1.1084) acc1: 78.9062 (78.9062) acc5: 95.0521 (95.0521) time: 8.4633 data: 8.3443 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4214 (1.3278) acc1: 72.9167 (72.9120) acc5: 91.1458 (91.3760) time: 1.0255 data: 0.9272 max mem: 21002 Test: Total time: 0:00:09 (1.0595 s / it) * Acc@1 72.694 Acc@5 91.366 loss 1.336 Accuracy of the model on the 50000 test images: 72.7% Max accuracy: 72.69% Epoch: [258] [ 0/312] eta: 0:58:54 lr: 0.000219 min_lr: 0.000219 loss: 4.2753 (4.2753) weight_decay: 0.0500 (0.0500) time: 11.3275 data: 9.7593 max mem: 21002 Epoch: [258] [ 10/312] eta: 0:07:31 lr: 0.000219 min_lr: 0.000219 loss: 3.9915 (3.9612) weight_decay: 0.0500 (0.0500) time: 1.4943 data: 1.0074 max mem: 21002 Epoch: [258] [ 20/312] eta: 0:05:27 lr: 0.000218 min_lr: 0.000218 loss: 3.7935 (3.7249) weight_decay: 0.0500 (0.0500) time: 0.6126 data: 0.1595 max mem: 21002 Epoch: [258] [ 30/312] eta: 0:04:01 lr: 0.000218 min_lr: 0.000218 loss: 3.8829 (3.8017) weight_decay: 0.0500 (0.0500) time: 0.5057 data: 0.0937 max mem: 21002 Epoch: [258] [ 40/312] eta: 0:03:26 lr: 0.000218 min_lr: 0.000218 loss: 3.9693 (3.7809) weight_decay: 0.0500 (0.0500) time: 0.3800 data: 0.0055 max mem: 21002 Epoch: [258] [ 50/312] eta: 0:03:13 lr: 0.000217 min_lr: 0.000217 loss: 3.5463 (3.6805) weight_decay: 0.0500 (0.0500) time: 0.5598 data: 0.1327 max mem: 21002 Epoch: [258] [ 60/312] eta: 0:02:47 lr: 0.000217 min_lr: 0.000217 loss: 3.5309 (3.6699) weight_decay: 0.0500 (0.0500) time: 0.4726 data: 0.1279 max mem: 21002 Epoch: [258] [ 70/312] eta: 0:02:41 lr: 0.000217 min_lr: 0.000217 loss: 3.6350 (3.6545) weight_decay: 0.0500 (0.0500) time: 0.4890 data: 0.1452 max mem: 21002 Epoch: [258] [ 80/312] eta: 0:02:34 lr: 0.000216 min_lr: 0.000216 loss: 3.8381 (3.6553) weight_decay: 0.0500 (0.0500) time: 0.6716 data: 0.2977 max mem: 21002 Epoch: [258] [ 90/312] eta: 0:02:18 lr: 0.000216 min_lr: 0.000216 loss: 3.7174 (3.6242) weight_decay: 0.0500 (0.0500) time: 0.4703 data: 0.1532 max mem: 21002 Epoch: [258] [100/312] eta: 0:02:13 lr: 0.000216 min_lr: 0.000216 loss: 3.1730 (3.5917) weight_decay: 0.0500 (0.0500) time: 0.4683 data: 0.0946 max mem: 21002 Epoch: [258] [110/312] eta: 0:02:00 lr: 0.000215 min_lr: 0.000215 loss: 3.3712 (3.5768) weight_decay: 0.0500 (0.0500) time: 0.4698 data: 0.0947 max mem: 21002 Epoch: [258] [120/312] eta: 0:01:56 lr: 0.000215 min_lr: 0.000215 loss: 3.6248 (3.5879) weight_decay: 0.0500 (0.0500) time: 0.4866 data: 0.1070 max mem: 21002 Epoch: [258] [130/312] eta: 0:01:51 lr: 0.000215 min_lr: 0.000215 loss: 3.8684 (3.5874) weight_decay: 0.0500 (0.0500) time: 0.6804 data: 0.2345 max mem: 21002 Epoch: [258] [140/312] eta: 0:01:41 lr: 0.000214 min_lr: 0.000214 loss: 3.8684 (3.6069) weight_decay: 0.0500 (0.0500) time: 0.4851 data: 0.1333 max mem: 21002 Epoch: [258] [150/312] eta: 0:01:36 lr: 0.000214 min_lr: 0.000214 loss: 3.5750 (3.5965) weight_decay: 0.0500 (0.0500) time: 0.4887 data: 0.1561 max mem: 21002 Epoch: [258] [160/312] eta: 0:01:31 lr: 0.000214 min_lr: 0.000214 loss: 3.5649 (3.5972) weight_decay: 0.0500 (0.0500) time: 0.6857 data: 0.2628 max mem: 21002 Epoch: [258] [170/312] eta: 0:01:22 lr: 0.000213 min_lr: 0.000213 loss: 3.7522 (3.6014) weight_decay: 0.0500 (0.0500) time: 0.4892 data: 0.1161 max mem: 21002 Epoch: [258] [180/312] eta: 0:01:17 lr: 0.000213 min_lr: 0.000213 loss: 3.8082 (3.5975) weight_decay: 0.0500 (0.0500) time: 0.4765 data: 0.1331 max mem: 21002 Epoch: [258] [190/312] eta: 0:01:09 lr: 0.000213 min_lr: 0.000213 loss: 3.8082 (3.5996) weight_decay: 0.0500 (0.0500) time: 0.4757 data: 0.1296 max mem: 21002 Epoch: [258] [200/312] eta: 0:01:04 lr: 0.000212 min_lr: 0.000212 loss: 3.7705 (3.5945) weight_decay: 0.0500 (0.0500) time: 0.4839 data: 0.1113 max mem: 21002 Epoch: [258] [210/312] eta: 0:00:59 lr: 0.000212 min_lr: 0.000212 loss: 3.5172 (3.5978) weight_decay: 0.0500 (0.0500) time: 0.6976 data: 0.2186 max mem: 21002 Epoch: [258] [220/312] eta: 0:00:52 lr: 0.000212 min_lr: 0.000212 loss: 3.8286 (3.6011) weight_decay: 0.0500 (0.0500) time: 0.5016 data: 0.1080 max mem: 21002 Epoch: [258] [230/312] eta: 0:00:47 lr: 0.000211 min_lr: 0.000211 loss: 3.8523 (3.6043) weight_decay: 0.0500 (0.0500) time: 0.4789 data: 0.0992 max mem: 21002 Epoch: [258] [240/312] eta: 0:00:41 lr: 0.000211 min_lr: 0.000211 loss: 3.8523 (3.6141) weight_decay: 0.0500 (0.0500) time: 0.6719 data: 0.2004 max mem: 21002 Epoch: [258] [250/312] eta: 0:00:35 lr: 0.000211 min_lr: 0.000211 loss: 3.6747 (3.6062) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.1027 max mem: 21002 Epoch: [258] [260/312] eta: 0:00:29 lr: 0.000211 min_lr: 0.000211 loss: 3.4706 (3.6036) weight_decay: 0.0500 (0.0500) time: 0.4889 data: 0.1041 max mem: 21002 Epoch: [258] [270/312] eta: 0:00:23 lr: 0.000210 min_lr: 0.000210 loss: 3.6642 (3.6022) weight_decay: 0.0500 (0.0500) time: 0.4983 data: 0.1042 max mem: 21002 Epoch: [258] [280/312] eta: 0:00:18 lr: 0.000210 min_lr: 0.000210 loss: 3.4614 (3.5917) weight_decay: 0.0500 (0.0500) time: 0.4898 data: 0.0838 max mem: 21002 Epoch: [258] [290/312] eta: 0:00:12 lr: 0.000210 min_lr: 0.000210 loss: 3.3316 (3.5889) weight_decay: 0.0500 (0.0500) time: 0.6228 data: 0.1457 max mem: 21002 Epoch: [258] [300/312] eta: 0:00:06 lr: 0.000209 min_lr: 0.000209 loss: 3.6578 (3.5858) weight_decay: 0.0500 (0.0500) time: 0.4305 data: 0.0633 max mem: 21002 Epoch: [258] [310/312] eta: 0:00:01 lr: 0.000209 min_lr: 0.000209 loss: 3.8570 (3.5958) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [258] [311/312] eta: 0:00:00 lr: 0.000209 min_lr: 0.000209 loss: 3.7281 (3.5927) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [258] Total time: 0:02:51 (0.5502 s / it) Averaged stats: lr: 0.000209 min_lr: 0.000209 loss: 3.7281 (3.6199) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.1557 (1.1557) acc1: 80.5990 (80.5990) acc5: 95.1823 (95.1823) time: 8.8161 data: 8.6973 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4608 (1.3574) acc1: 70.9635 (72.4800) acc5: 91.5365 (91.4880) time: 1.0657 data: 0.9664 max mem: 21002 Test: Total time: 0:00:09 (1.0820 s / it) * Acc@1 72.498 Acc@5 91.404 loss 1.362 Accuracy of the model on the 50000 test images: 72.5% Max accuracy: 72.69% Epoch: [259] [ 0/312] eta: 1:01:58 lr: 0.000209 min_lr: 0.000209 loss: 3.5129 (3.5129) weight_decay: 0.0500 (0.0500) time: 11.9183 data: 8.4844 max mem: 21002 Epoch: [259] [ 10/312] eta: 0:07:53 lr: 0.000209 min_lr: 0.000209 loss: 3.8294 (3.5907) weight_decay: 0.0500 (0.0500) time: 1.5678 data: 0.9945 max mem: 21002 Epoch: [259] [ 20/312] eta: 0:05:10 lr: 0.000208 min_lr: 0.000208 loss: 3.8845 (3.6990) weight_decay: 0.0500 (0.0500) time: 0.5209 data: 0.1668 max mem: 21002 Epoch: [259] [ 30/312] eta: 0:03:49 lr: 0.000208 min_lr: 0.000208 loss: 3.7203 (3.5612) weight_decay: 0.0500 (0.0500) time: 0.3992 data: 0.0443 max mem: 21002 Epoch: [259] [ 40/312] eta: 0:03:20 lr: 0.000208 min_lr: 0.000208 loss: 3.4481 (3.5736) weight_decay: 0.0500 (0.0500) time: 0.3970 data: 0.0466 max mem: 21002 Epoch: [259] [ 50/312] eta: 0:03:10 lr: 0.000207 min_lr: 0.000207 loss: 3.4481 (3.5559) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.1102 max mem: 21002 Epoch: [259] [ 60/312] eta: 0:02:48 lr: 0.000207 min_lr: 0.000207 loss: 3.6700 (3.5712) weight_decay: 0.0500 (0.0500) time: 0.5230 data: 0.1070 max mem: 21002 Epoch: [259] [ 70/312] eta: 0:02:40 lr: 0.000207 min_lr: 0.000207 loss: 3.7446 (3.5844) weight_decay: 0.0500 (0.0500) time: 0.5075 data: 0.1161 max mem: 21002 Epoch: [259] [ 80/312] eta: 0:02:29 lr: 0.000206 min_lr: 0.000206 loss: 3.5882 (3.5600) weight_decay: 0.0500 (0.0500) time: 0.5711 data: 0.0751 max mem: 21002 Epoch: [259] [ 90/312] eta: 0:02:19 lr: 0.000206 min_lr: 0.000206 loss: 3.5729 (3.5663) weight_decay: 0.0500 (0.0500) time: 0.5083 data: 0.1215 max mem: 21002 Epoch: [259] [100/312] eta: 0:02:12 lr: 0.000206 min_lr: 0.000206 loss: 3.5980 (3.5486) weight_decay: 0.0500 (0.0500) time: 0.5430 data: 0.1667 max mem: 21002 Epoch: [259] [110/312] eta: 0:02:00 lr: 0.000205 min_lr: 0.000205 loss: 3.8520 (3.5740) weight_decay: 0.0500 (0.0500) time: 0.4527 data: 0.0754 max mem: 21002 Epoch: [259] [120/312] eta: 0:01:58 lr: 0.000205 min_lr: 0.000205 loss: 3.8538 (3.5677) weight_decay: 0.0500 (0.0500) time: 0.5696 data: 0.1616 max mem: 21002 Epoch: [259] [130/312] eta: 0:01:49 lr: 0.000205 min_lr: 0.000205 loss: 3.7429 (3.5763) weight_decay: 0.0500 (0.0500) time: 0.6094 data: 0.1348 max mem: 21002 Epoch: [259] [140/312] eta: 0:01:40 lr: 0.000204 min_lr: 0.000204 loss: 3.7728 (3.5914) weight_decay: 0.0500 (0.0500) time: 0.4089 data: 0.0596 max mem: 21002 Epoch: [259] [150/312] eta: 0:01:37 lr: 0.000204 min_lr: 0.000204 loss: 3.6780 (3.6014) weight_decay: 0.0500 (0.0500) time: 0.6094 data: 0.1937 max mem: 21002 Epoch: [259] [160/312] eta: 0:01:29 lr: 0.000204 min_lr: 0.000204 loss: 3.5776 (3.5957) weight_decay: 0.0500 (0.0500) time: 0.6360 data: 0.1363 max mem: 21002 Epoch: [259] [170/312] eta: 0:01:23 lr: 0.000203 min_lr: 0.000203 loss: 3.5809 (3.5993) weight_decay: 0.0500 (0.0500) time: 0.4638 data: 0.0583 max mem: 21002 Epoch: [259] [180/312] eta: 0:01:16 lr: 0.000203 min_lr: 0.000203 loss: 3.8328 (3.6168) weight_decay: 0.0500 (0.0500) time: 0.4892 data: 0.0763 max mem: 21002 Epoch: [259] [190/312] eta: 0:01:09 lr: 0.000203 min_lr: 0.000203 loss: 3.8424 (3.6193) weight_decay: 0.0500 (0.0500) time: 0.4501 data: 0.0729 max mem: 21002 Epoch: [259] [200/312] eta: 0:01:04 lr: 0.000203 min_lr: 0.000203 loss: 3.4517 (3.5958) weight_decay: 0.0500 (0.0500) time: 0.5737 data: 0.1577 max mem: 21002 Epoch: [259] [210/312] eta: 0:00:58 lr: 0.000202 min_lr: 0.000202 loss: 3.6081 (3.5933) weight_decay: 0.0500 (0.0500) time: 0.6041 data: 0.1043 max mem: 21002 Epoch: [259] [220/312] eta: 0:00:52 lr: 0.000202 min_lr: 0.000202 loss: 3.7249 (3.6019) weight_decay: 0.0500 (0.0500) time: 0.4352 data: 0.0647 max mem: 21002 Epoch: [259] [230/312] eta: 0:00:47 lr: 0.000202 min_lr: 0.000202 loss: 3.8909 (3.6132) weight_decay: 0.0500 (0.0500) time: 0.5994 data: 0.1781 max mem: 21002 Epoch: [259] [240/312] eta: 0:00:41 lr: 0.000201 min_lr: 0.000201 loss: 3.8909 (3.6136) weight_decay: 0.0500 (0.0500) time: 0.6200 data: 0.1141 max mem: 21002 Epoch: [259] [250/312] eta: 0:00:35 lr: 0.000201 min_lr: 0.000201 loss: 3.7102 (3.6073) weight_decay: 0.0500 (0.0500) time: 0.4874 data: 0.0842 max mem: 21002 Epoch: [259] [260/312] eta: 0:00:29 lr: 0.000201 min_lr: 0.000201 loss: 3.8617 (3.6147) weight_decay: 0.0500 (0.0500) time: 0.5244 data: 0.0922 max mem: 21002 Epoch: [259] [270/312] eta: 0:00:23 lr: 0.000200 min_lr: 0.000200 loss: 3.8025 (3.6104) weight_decay: 0.0500 (0.0500) time: 0.4709 data: 0.0710 max mem: 21002 Epoch: [259] [280/312] eta: 0:00:18 lr: 0.000200 min_lr: 0.000200 loss: 3.6897 (3.6133) weight_decay: 0.0500 (0.0500) time: 0.5565 data: 0.1507 max mem: 21002 Epoch: [259] [290/312] eta: 0:00:12 lr: 0.000200 min_lr: 0.000200 loss: 3.8069 (3.6087) weight_decay: 0.0500 (0.0500) time: 0.5955 data: 0.0979 max mem: 21002 Epoch: [259] [300/312] eta: 0:00:06 lr: 0.000199 min_lr: 0.000199 loss: 3.7988 (3.6043) weight_decay: 0.0500 (0.0500) time: 0.4246 data: 0.0509 max mem: 21002 Epoch: [259] [310/312] eta: 0:00:01 lr: 0.000199 min_lr: 0.000199 loss: 3.6675 (3.6007) weight_decay: 0.0500 (0.0500) time: 0.3175 data: 0.0411 max mem: 21002 Epoch: [259] [311/312] eta: 0:00:00 lr: 0.000199 min_lr: 0.000199 loss: 2.9817 (3.5969) weight_decay: 0.0500 (0.0500) time: 0.3175 data: 0.0410 max mem: 21002 Epoch: [259] Total time: 0:02:51 (0.5513 s / it) Averaged stats: lr: 0.000199 min_lr: 0.000199 loss: 2.9817 (3.6131) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:11 loss: 1.0294 (1.0294) acc1: 80.5990 (80.5990) acc5: 95.5729 (95.5729) time: 7.9015 data: 7.7827 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3549 (1.2706) acc1: 72.7865 (73.2320) acc5: 91.1458 (91.3600) time: 1.0477 data: 0.9512 max mem: 21002 Test: Total time: 0:00:09 (1.0631 s / it) * Acc@1 72.744 Acc@5 91.528 loss 1.274 Accuracy of the model on the 50000 test images: 72.7% Max accuracy: 72.74% Epoch: [260] [ 0/312] eta: 1:01:22 lr: 0.000199 min_lr: 0.000199 loss: 3.9244 (3.9244) weight_decay: 0.0500 (0.0500) time: 11.8026 data: 9.4712 max mem: 21002 Epoch: [260] [ 10/312] eta: 0:07:59 lr: 0.000199 min_lr: 0.000199 loss: 3.3915 (3.3692) weight_decay: 0.0500 (0.0500) time: 1.5886 data: 1.0098 max mem: 21002 Epoch: [260] [ 20/312] eta: 0:04:58 lr: 0.000198 min_lr: 0.000198 loss: 3.2349 (3.3600) weight_decay: 0.0500 (0.0500) time: 0.4821 data: 0.1130 max mem: 21002 Epoch: [260] [ 30/312] eta: 0:03:41 lr: 0.000198 min_lr: 0.000198 loss: 3.6899 (3.3995) weight_decay: 0.0500 (0.0500) time: 0.3421 data: 0.0314 max mem: 21002 Epoch: [260] [ 40/312] eta: 0:03:20 lr: 0.000198 min_lr: 0.000198 loss: 3.5949 (3.4147) weight_decay: 0.0500 (0.0500) time: 0.4362 data: 0.0580 max mem: 21002 Epoch: [260] [ 50/312] eta: 0:03:05 lr: 0.000197 min_lr: 0.000197 loss: 3.5949 (3.4749) weight_decay: 0.0500 (0.0500) time: 0.5875 data: 0.1515 max mem: 21002 Epoch: [260] [ 60/312] eta: 0:02:41 lr: 0.000197 min_lr: 0.000197 loss: 3.8542 (3.5370) weight_decay: 0.0500 (0.0500) time: 0.4476 data: 0.0942 max mem: 21002 Epoch: [260] [ 70/312] eta: 0:02:33 lr: 0.000197 min_lr: 0.000197 loss: 3.8542 (3.5452) weight_decay: 0.0500 (0.0500) time: 0.4515 data: 0.1299 max mem: 21002 Epoch: [260] [ 80/312] eta: 0:02:28 lr: 0.000197 min_lr: 0.000197 loss: 3.7471 (3.5632) weight_decay: 0.0500 (0.0500) time: 0.6324 data: 0.3194 max mem: 21002 Epoch: [260] [ 90/312] eta: 0:02:14 lr: 0.000196 min_lr: 0.000196 loss: 3.7471 (3.5604) weight_decay: 0.0500 (0.0500) time: 0.5026 data: 0.2138 max mem: 21002 Epoch: [260] [100/312] eta: 0:02:09 lr: 0.000196 min_lr: 0.000196 loss: 3.4746 (3.5276) weight_decay: 0.0500 (0.0500) time: 0.5033 data: 0.2146 max mem: 21002 Epoch: [260] [110/312] eta: 0:01:57 lr: 0.000196 min_lr: 0.000196 loss: 3.1231 (3.5324) weight_decay: 0.0500 (0.0500) time: 0.4772 data: 0.1909 max mem: 21002 Epoch: [260] [120/312] eta: 0:01:53 lr: 0.000195 min_lr: 0.000195 loss: 3.6699 (3.5418) weight_decay: 0.0500 (0.0500) time: 0.4979 data: 0.2096 max mem: 21002 Epoch: [260] [130/312] eta: 0:01:48 lr: 0.000195 min_lr: 0.000195 loss: 3.6934 (3.5540) weight_decay: 0.0500 (0.0500) time: 0.6782 data: 0.3826 max mem: 21002 Epoch: [260] [140/312] eta: 0:01:38 lr: 0.000195 min_lr: 0.000195 loss: 3.7018 (3.5527) weight_decay: 0.0500 (0.0500) time: 0.4676 data: 0.1737 max mem: 21002 Epoch: [260] [150/312] eta: 0:01:34 lr: 0.000194 min_lr: 0.000194 loss: 3.4679 (3.5624) weight_decay: 0.0500 (0.0500) time: 0.4752 data: 0.1862 max mem: 21002 Epoch: [260] [160/312] eta: 0:01:29 lr: 0.000194 min_lr: 0.000194 loss: 3.7239 (3.5779) weight_decay: 0.0500 (0.0500) time: 0.6789 data: 0.3699 max mem: 21002 Epoch: [260] [170/312] eta: 0:01:21 lr: 0.000194 min_lr: 0.000194 loss: 3.7715 (3.5839) weight_decay: 0.0500 (0.0500) time: 0.5399 data: 0.1843 max mem: 21002 Epoch: [260] [180/312] eta: 0:01:16 lr: 0.000193 min_lr: 0.000193 loss: 3.7750 (3.5983) weight_decay: 0.0500 (0.0500) time: 0.4819 data: 0.1210 max mem: 21002 Epoch: [260] [190/312] eta: 0:01:09 lr: 0.000193 min_lr: 0.000193 loss: 3.7288 (3.5996) weight_decay: 0.0500 (0.0500) time: 0.4986 data: 0.1210 max mem: 21002 Epoch: [260] [200/312] eta: 0:01:03 lr: 0.000193 min_lr: 0.000193 loss: 3.7288 (3.6072) weight_decay: 0.0500 (0.0500) time: 0.4947 data: 0.1076 max mem: 21002 Epoch: [260] [210/312] eta: 0:00:58 lr: 0.000193 min_lr: 0.000193 loss: 3.8835 (3.6205) weight_decay: 0.0500 (0.0500) time: 0.6620 data: 0.2296 max mem: 21002 Epoch: [260] [220/312] eta: 0:00:52 lr: 0.000192 min_lr: 0.000192 loss: 3.8886 (3.6257) weight_decay: 0.0500 (0.0500) time: 0.5436 data: 0.1229 max mem: 21002 Epoch: [260] [230/312] eta: 0:00:46 lr: 0.000192 min_lr: 0.000192 loss: 3.7520 (3.6254) weight_decay: 0.0500 (0.0500) time: 0.4788 data: 0.1218 max mem: 21002 Epoch: [260] [240/312] eta: 0:00:40 lr: 0.000192 min_lr: 0.000192 loss: 3.5846 (3.6287) weight_decay: 0.0500 (0.0500) time: 0.5969 data: 0.1924 max mem: 21002 Epoch: [260] [250/312] eta: 0:00:35 lr: 0.000191 min_lr: 0.000191 loss: 3.5733 (3.6193) weight_decay: 0.0500 (0.0500) time: 0.5239 data: 0.1364 max mem: 21002 Epoch: [260] [260/312] eta: 0:00:29 lr: 0.000191 min_lr: 0.000191 loss: 3.5733 (3.6141) weight_decay: 0.0500 (0.0500) time: 0.5949 data: 0.2136 max mem: 21002 Epoch: [260] [270/312] eta: 0:00:23 lr: 0.000191 min_lr: 0.000191 loss: 3.8088 (3.6168) weight_decay: 0.0500 (0.0500) time: 0.5000 data: 0.1487 max mem: 21002 Epoch: [260] [280/312] eta: 0:00:18 lr: 0.000190 min_lr: 0.000190 loss: 3.9226 (3.6278) weight_decay: 0.0500 (0.0500) time: 0.5145 data: 0.1325 max mem: 21002 Epoch: [260] [290/312] eta: 0:00:12 lr: 0.000190 min_lr: 0.000190 loss: 3.8318 (3.6248) weight_decay: 0.0500 (0.0500) time: 0.6428 data: 0.1932 max mem: 21002 Epoch: [260] [300/312] eta: 0:00:06 lr: 0.000190 min_lr: 0.000190 loss: 3.7376 (3.6257) weight_decay: 0.0500 (0.0500) time: 0.4103 data: 0.0611 max mem: 21002 Epoch: [260] [310/312] eta: 0:00:01 lr: 0.000189 min_lr: 0.000189 loss: 3.6980 (3.6215) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [260] [311/312] eta: 0:00:00 lr: 0.000189 min_lr: 0.000189 loss: 3.6980 (3.6220) weight_decay: 0.0500 (0.0500) time: 0.2769 data: 0.0001 max mem: 21002 Epoch: [260] Total time: 0:02:51 (0.5503 s / it) Averaged stats: lr: 0.000189 min_lr: 0.000189 loss: 3.6980 (3.6141) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:14 loss: 1.1198 (1.1198) acc1: 81.1198 (81.1198) acc5: 94.9219 (94.9219) time: 8.2469 data: 8.1282 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4410 (1.3684) acc1: 72.5260 (73.0560) acc5: 90.6250 (91.5200) time: 1.0069 data: 0.9081 max mem: 21002 Test: Total time: 0:00:09 (1.0171 s / it) * Acc@1 72.868 Acc@5 91.462 loss 1.375 Accuracy of the model on the 50000 test images: 72.9% Max accuracy: 72.87% Epoch: [261] [ 0/312] eta: 1:01:14 lr: 0.000189 min_lr: 0.000189 loss: 3.6210 (3.6210) weight_decay: 0.0500 (0.0500) time: 11.7785 data: 9.9741 max mem: 21002 Epoch: [261] [ 10/312] eta: 0:07:45 lr: 0.000189 min_lr: 0.000189 loss: 3.9695 (3.7442) weight_decay: 0.0500 (0.0500) time: 1.5428 data: 0.9632 max mem: 21002 Epoch: [261] [ 20/312] eta: 0:05:09 lr: 0.000189 min_lr: 0.000189 loss: 3.7192 (3.5796) weight_decay: 0.0500 (0.0500) time: 0.5239 data: 0.1330 max mem: 21002 Epoch: [261] [ 30/312] eta: 0:03:49 lr: 0.000188 min_lr: 0.000188 loss: 3.5523 (3.5886) weight_decay: 0.0500 (0.0500) time: 0.4111 data: 0.1023 max mem: 21002 Epoch: [261] [ 40/312] eta: 0:03:26 lr: 0.000188 min_lr: 0.000188 loss: 3.5715 (3.5835) weight_decay: 0.0500 (0.0500) time: 0.4428 data: 0.1074 max mem: 21002 Epoch: [261] [ 50/312] eta: 0:03:14 lr: 0.000188 min_lr: 0.000188 loss: 3.6407 (3.5753) weight_decay: 0.0500 (0.0500) time: 0.6300 data: 0.2531 max mem: 21002 Epoch: [261] [ 60/312] eta: 0:02:48 lr: 0.000188 min_lr: 0.000188 loss: 3.7625 (3.5887) weight_decay: 0.0500 (0.0500) time: 0.4781 data: 0.1465 max mem: 21002 Epoch: [261] [ 70/312] eta: 0:02:39 lr: 0.000187 min_lr: 0.000187 loss: 3.5148 (3.5680) weight_decay: 0.0500 (0.0500) time: 0.4473 data: 0.1583 max mem: 21002 Epoch: [261] [ 80/312] eta: 0:02:31 lr: 0.000187 min_lr: 0.000187 loss: 3.3462 (3.5409) weight_decay: 0.0500 (0.0500) time: 0.6174 data: 0.3292 max mem: 21002 Epoch: [261] [ 90/312] eta: 0:02:16 lr: 0.000187 min_lr: 0.000187 loss: 3.4166 (3.5380) weight_decay: 0.0500 (0.0500) time: 0.4573 data: 0.1716 max mem: 21002 Epoch: [261] [100/312] eta: 0:02:11 lr: 0.000186 min_lr: 0.000186 loss: 3.6268 (3.5428) weight_decay: 0.0500 (0.0500) time: 0.4724 data: 0.1860 max mem: 21002 Epoch: [261] [110/312] eta: 0:01:58 lr: 0.000186 min_lr: 0.000186 loss: 3.5335 (3.5390) weight_decay: 0.0500 (0.0500) time: 0.4713 data: 0.1860 max mem: 21002 Epoch: [261] [120/312] eta: 0:01:54 lr: 0.000186 min_lr: 0.000186 loss: 3.6576 (3.5575) weight_decay: 0.0500 (0.0500) time: 0.4814 data: 0.1973 max mem: 21002 Epoch: [261] [130/312] eta: 0:01:49 lr: 0.000185 min_lr: 0.000185 loss: 3.8423 (3.5550) weight_decay: 0.0500 (0.0500) time: 0.6851 data: 0.4009 max mem: 21002 Epoch: [261] [140/312] eta: 0:01:39 lr: 0.000185 min_lr: 0.000185 loss: 3.3750 (3.5377) weight_decay: 0.0500 (0.0500) time: 0.4886 data: 0.2042 max mem: 21002 Epoch: [261] [150/312] eta: 0:01:35 lr: 0.000185 min_lr: 0.000185 loss: 3.5806 (3.5571) weight_decay: 0.0500 (0.0500) time: 0.4770 data: 0.1912 max mem: 21002 Epoch: [261] [160/312] eta: 0:01:29 lr: 0.000185 min_lr: 0.000185 loss: 3.9886 (3.5753) weight_decay: 0.0500 (0.0500) time: 0.6220 data: 0.3355 max mem: 21002 Epoch: [261] [170/312] eta: 0:01:21 lr: 0.000184 min_lr: 0.000184 loss: 3.9031 (3.5721) weight_decay: 0.0500 (0.0500) time: 0.4759 data: 0.1617 max mem: 21002 Epoch: [261] [180/312] eta: 0:01:16 lr: 0.000184 min_lr: 0.000184 loss: 3.8885 (3.5767) weight_decay: 0.0500 (0.0500) time: 0.5232 data: 0.1664 max mem: 21002 Epoch: [261] [190/312] eta: 0:01:08 lr: 0.000184 min_lr: 0.000184 loss: 3.8885 (3.5785) weight_decay: 0.0500 (0.0500) time: 0.4786 data: 0.1494 max mem: 21002 Epoch: [261] [200/312] eta: 0:01:03 lr: 0.000183 min_lr: 0.000183 loss: 3.6065 (3.5734) weight_decay: 0.0500 (0.0500) time: 0.4729 data: 0.1635 max mem: 21002 Epoch: [261] [210/312] eta: 0:00:58 lr: 0.000183 min_lr: 0.000183 loss: 3.5215 (3.5781) weight_decay: 0.0500 (0.0500) time: 0.6420 data: 0.3200 max mem: 21002 Epoch: [261] [220/312] eta: 0:00:51 lr: 0.000183 min_lr: 0.000183 loss: 3.7299 (3.5843) weight_decay: 0.0500 (0.0500) time: 0.4866 data: 0.1571 max mem: 21002 Epoch: [261] [230/312] eta: 0:00:46 lr: 0.000182 min_lr: 0.000182 loss: 3.7088 (3.5858) weight_decay: 0.0500 (0.0500) time: 0.5485 data: 0.1586 max mem: 21002 Epoch: [261] [240/312] eta: 0:00:40 lr: 0.000182 min_lr: 0.000182 loss: 3.7088 (3.5869) weight_decay: 0.0500 (0.0500) time: 0.6307 data: 0.2087 max mem: 21002 Epoch: [261] [250/312] eta: 0:00:34 lr: 0.000182 min_lr: 0.000182 loss: 3.7877 (3.5977) weight_decay: 0.0500 (0.0500) time: 0.4784 data: 0.0933 max mem: 21002 Epoch: [261] [260/312] eta: 0:00:29 lr: 0.000182 min_lr: 0.000182 loss: 3.7923 (3.5960) weight_decay: 0.0500 (0.0500) time: 0.5705 data: 0.1539 max mem: 21002 Epoch: [261] [270/312] eta: 0:00:23 lr: 0.000181 min_lr: 0.000181 loss: 3.8551 (3.6021) weight_decay: 0.0500 (0.0500) time: 0.4916 data: 0.1114 max mem: 21002 Epoch: [261] [280/312] eta: 0:00:18 lr: 0.000181 min_lr: 0.000181 loss: 3.7030 (3.5999) weight_decay: 0.0500 (0.0500) time: 0.5093 data: 0.0619 max mem: 21002 Epoch: [261] [290/312] eta: 0:00:12 lr: 0.000181 min_lr: 0.000181 loss: 3.7040 (3.6014) weight_decay: 0.0500 (0.0500) time: 0.6068 data: 0.0894 max mem: 21002 Epoch: [261] [300/312] eta: 0:00:06 lr: 0.000180 min_lr: 0.000180 loss: 3.8969 (3.6097) weight_decay: 0.0500 (0.0500) time: 0.3880 data: 0.0279 max mem: 21002 Epoch: [261] [310/312] eta: 0:00:01 lr: 0.000180 min_lr: 0.000180 loss: 3.8243 (3.6042) weight_decay: 0.0500 (0.0500) time: 0.2914 data: 0.0001 max mem: 21002 Epoch: [261] [311/312] eta: 0:00:00 lr: 0.000180 min_lr: 0.000180 loss: 3.8357 (3.6069) weight_decay: 0.0500 (0.0500) time: 0.2831 data: 0.0001 max mem: 21002 Epoch: [261] Total time: 0:02:50 (0.5456 s / it) Averaged stats: lr: 0.000180 min_lr: 0.000180 loss: 3.8357 (3.6201) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.0934 (1.0934) acc1: 81.6406 (81.6406) acc5: 95.0521 (95.0521) time: 8.7491 data: 8.6188 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4351 (1.3400) acc1: 72.1354 (72.9760) acc5: 91.5365 (91.5360) time: 1.0623 data: 0.9577 max mem: 21002 Test: Total time: 0:00:09 (1.0791 s / it) * Acc@1 72.728 Acc@5 91.606 loss 1.342 Accuracy of the model on the 50000 test images: 72.7% Max accuracy: 72.87% Epoch: [262] [ 0/312] eta: 1:02:46 lr: 0.000180 min_lr: 0.000180 loss: 4.2683 (4.2683) weight_decay: 0.0500 (0.0500) time: 12.0720 data: 8.4644 max mem: 21002 Epoch: [262] [ 10/312] eta: 0:08:07 lr: 0.000180 min_lr: 0.000180 loss: 2.9971 (3.2840) weight_decay: 0.0500 (0.0500) time: 1.6139 data: 0.9699 max mem: 21002 Epoch: [262] [ 20/312] eta: 0:05:16 lr: 0.000179 min_lr: 0.000179 loss: 3.3845 (3.4533) weight_decay: 0.0500 (0.0500) time: 0.5334 data: 0.1566 max mem: 21002 Epoch: [262] [ 30/312] eta: 0:03:53 lr: 0.000179 min_lr: 0.000179 loss: 3.7731 (3.5553) weight_decay: 0.0500 (0.0500) time: 0.3950 data: 0.0467 max mem: 21002 Epoch: [262] [ 40/312] eta: 0:03:19 lr: 0.000179 min_lr: 0.000179 loss: 3.6106 (3.5174) weight_decay: 0.0500 (0.0500) time: 0.3677 data: 0.0071 max mem: 21002 Epoch: [262] [ 50/312] eta: 0:03:09 lr: 0.000179 min_lr: 0.000179 loss: 3.6647 (3.5325) weight_decay: 0.0500 (0.0500) time: 0.5663 data: 0.0739 max mem: 21002 Epoch: [262] [ 60/312] eta: 0:02:44 lr: 0.000178 min_lr: 0.000178 loss: 3.6647 (3.5356) weight_decay: 0.0500 (0.0500) time: 0.4864 data: 0.0675 max mem: 21002 Epoch: [262] [ 70/312] eta: 0:02:40 lr: 0.000178 min_lr: 0.000178 loss: 3.8118 (3.6007) weight_decay: 0.0500 (0.0500) time: 0.5084 data: 0.0719 max mem: 21002 Epoch: [262] [ 80/312] eta: 0:02:30 lr: 0.000178 min_lr: 0.000178 loss: 3.8697 (3.6095) weight_decay: 0.0500 (0.0500) time: 0.6364 data: 0.0718 max mem: 21002 Epoch: [262] [ 90/312] eta: 0:02:17 lr: 0.000177 min_lr: 0.000177 loss: 3.7179 (3.6142) weight_decay: 0.0500 (0.0500) time: 0.4661 data: 0.0555 max mem: 21002 Epoch: [262] [100/312] eta: 0:02:13 lr: 0.000177 min_lr: 0.000177 loss: 3.7009 (3.5888) weight_decay: 0.0500 (0.0500) time: 0.5609 data: 0.1389 max mem: 21002 Epoch: [262] [110/312] eta: 0:02:01 lr: 0.000177 min_lr: 0.000177 loss: 3.5678 (3.5928) weight_decay: 0.0500 (0.0500) time: 0.5076 data: 0.0840 max mem: 21002 Epoch: [262] [120/312] eta: 0:01:55 lr: 0.000176 min_lr: 0.000176 loss: 3.6740 (3.6151) weight_decay: 0.0500 (0.0500) time: 0.4668 data: 0.0710 max mem: 21002 Epoch: [262] [130/312] eta: 0:01:50 lr: 0.000176 min_lr: 0.000176 loss: 3.8175 (3.6136) weight_decay: 0.0500 (0.0500) time: 0.6476 data: 0.1325 max mem: 21002 Epoch: [262] [140/312] eta: 0:01:40 lr: 0.000176 min_lr: 0.000176 loss: 3.8578 (3.6376) weight_decay: 0.0500 (0.0500) time: 0.4787 data: 0.0730 max mem: 21002 Epoch: [262] [150/312] eta: 0:01:36 lr: 0.000176 min_lr: 0.000176 loss: 3.7757 (3.6202) weight_decay: 0.0500 (0.0500) time: 0.5169 data: 0.0992 max mem: 21002 Epoch: [262] [160/312] eta: 0:01:29 lr: 0.000175 min_lr: 0.000175 loss: 3.5904 (3.6287) weight_decay: 0.0500 (0.0500) time: 0.6168 data: 0.0884 max mem: 21002 Epoch: [262] [170/312] eta: 0:01:22 lr: 0.000175 min_lr: 0.000175 loss: 3.7056 (3.6258) weight_decay: 0.0500 (0.0500) time: 0.4481 data: 0.0541 max mem: 21002 Epoch: [262] [180/312] eta: 0:01:16 lr: 0.000175 min_lr: 0.000175 loss: 3.7781 (3.6282) weight_decay: 0.0500 (0.0500) time: 0.5187 data: 0.1256 max mem: 21002 Epoch: [262] [190/312] eta: 0:01:09 lr: 0.000174 min_lr: 0.000174 loss: 3.8345 (3.6287) weight_decay: 0.0500 (0.0500) time: 0.4823 data: 0.0875 max mem: 21002 Epoch: [262] [200/312] eta: 0:01:04 lr: 0.000174 min_lr: 0.000174 loss: 3.7133 (3.6283) weight_decay: 0.0500 (0.0500) time: 0.4796 data: 0.1114 max mem: 21002 Epoch: [262] [210/312] eta: 0:00:58 lr: 0.000174 min_lr: 0.000174 loss: 3.6405 (3.6137) weight_decay: 0.0500 (0.0500) time: 0.6185 data: 0.1757 max mem: 21002 Epoch: [262] [220/312] eta: 0:00:51 lr: 0.000174 min_lr: 0.000174 loss: 3.5532 (3.6126) weight_decay: 0.0500 (0.0500) time: 0.4899 data: 0.1095 max mem: 21002 Epoch: [262] [230/312] eta: 0:00:46 lr: 0.000173 min_lr: 0.000173 loss: 3.5532 (3.6118) weight_decay: 0.0500 (0.0500) time: 0.5397 data: 0.1177 max mem: 21002 Epoch: [262] [240/312] eta: 0:00:40 lr: 0.000173 min_lr: 0.000173 loss: 3.7290 (3.6188) weight_decay: 0.0500 (0.0500) time: 0.6142 data: 0.1148 max mem: 21002 Epoch: [262] [250/312] eta: 0:00:35 lr: 0.000173 min_lr: 0.000173 loss: 3.7248 (3.6207) weight_decay: 0.0500 (0.0500) time: 0.5209 data: 0.0931 max mem: 21002 Epoch: [262] [260/312] eta: 0:00:29 lr: 0.000172 min_lr: 0.000172 loss: 3.6833 (3.6186) weight_decay: 0.0500 (0.0500) time: 0.6058 data: 0.1221 max mem: 21002 Epoch: [262] [270/312] eta: 0:00:23 lr: 0.000172 min_lr: 0.000172 loss: 3.6833 (3.6149) weight_decay: 0.0500 (0.0500) time: 0.5034 data: 0.0564 max mem: 21002 Epoch: [262] [280/312] eta: 0:00:18 lr: 0.000172 min_lr: 0.000172 loss: 3.7648 (3.6232) weight_decay: 0.0500 (0.0500) time: 0.4819 data: 0.0381 max mem: 21002 Epoch: [262] [290/312] eta: 0:00:12 lr: 0.000171 min_lr: 0.000171 loss: 3.8164 (3.6256) weight_decay: 0.0500 (0.0500) time: 0.5260 data: 0.0777 max mem: 21002 Epoch: [262] [300/312] eta: 0:00:06 lr: 0.000171 min_lr: 0.000171 loss: 3.7039 (3.6290) weight_decay: 0.0500 (0.0500) time: 0.3770 data: 0.0406 max mem: 21002 Epoch: [262] [310/312] eta: 0:00:01 lr: 0.000171 min_lr: 0.000171 loss: 3.6682 (3.6238) weight_decay: 0.0500 (0.0500) time: 0.3219 data: 0.0001 max mem: 21002 Epoch: [262] [311/312] eta: 0:00:00 lr: 0.000171 min_lr: 0.000171 loss: 3.7032 (3.6244) weight_decay: 0.0500 (0.0500) time: 0.3191 data: 0.0001 max mem: 21002 Epoch: [262] Total time: 0:02:50 (0.5465 s / it) Averaged stats: lr: 0.000171 min_lr: 0.000171 loss: 3.7032 (3.6166) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:17 loss: 1.0508 (1.0508) acc1: 80.4688 (80.4688) acc5: 95.1823 (95.1823) time: 8.5805 data: 8.4617 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4247 (1.3278) acc1: 71.3542 (72.5760) acc5: 91.4062 (91.6480) time: 1.0433 data: 0.9460 max mem: 21002 Test: Total time: 0:00:09 (1.0726 s / it) * Acc@1 72.924 Acc@5 91.606 loss 1.332 Accuracy of the model on the 50000 test images: 72.9% Max accuracy: 72.92% Epoch: [263] [ 0/312] eta: 1:03:16 lr: 0.000171 min_lr: 0.000171 loss: 2.3784 (2.3784) weight_decay: 0.0500 (0.0500) time: 12.1693 data: 9.9126 max mem: 21002 Epoch: [263] [ 10/312] eta: 0:08:25 lr: 0.000171 min_lr: 0.000171 loss: 3.8904 (3.6620) weight_decay: 0.0500 (0.0500) time: 1.6740 data: 1.0055 max mem: 21002 Epoch: [263] [ 20/312] eta: 0:04:58 lr: 0.000170 min_lr: 0.000170 loss: 3.7044 (3.5451) weight_decay: 0.0500 (0.0500) time: 0.4651 data: 0.0583 max mem: 21002 Epoch: [263] [ 30/312] eta: 0:03:41 lr: 0.000170 min_lr: 0.000170 loss: 3.5821 (3.5689) weight_decay: 0.0500 (0.0500) time: 0.2976 data: 0.0013 max mem: 21002 Epoch: [263] [ 40/312] eta: 0:03:06 lr: 0.000170 min_lr: 0.000170 loss: 3.5889 (3.5787) weight_decay: 0.0500 (0.0500) time: 0.3349 data: 0.0008 max mem: 21002 Epoch: [263] [ 50/312] eta: 0:02:54 lr: 0.000169 min_lr: 0.000169 loss: 3.1671 (3.5031) weight_decay: 0.0500 (0.0500) time: 0.4837 data: 0.1154 max mem: 21002 Epoch: [263] [ 60/312] eta: 0:02:38 lr: 0.000169 min_lr: 0.000169 loss: 3.5050 (3.5530) weight_decay: 0.0500 (0.0500) time: 0.5092 data: 0.1197 max mem: 21002 Epoch: [263] [ 70/312] eta: 0:02:35 lr: 0.000169 min_lr: 0.000169 loss: 3.7941 (3.5635) weight_decay: 0.0500 (0.0500) time: 0.5751 data: 0.0880 max mem: 21002 Epoch: [263] [ 80/312] eta: 0:02:23 lr: 0.000169 min_lr: 0.000169 loss: 3.7941 (3.5787) weight_decay: 0.0500 (0.0500) time: 0.5960 data: 0.1659 max mem: 21002 Epoch: [263] [ 90/312] eta: 0:02:16 lr: 0.000168 min_lr: 0.000168 loss: 3.5696 (3.5604) weight_decay: 0.0500 (0.0500) time: 0.5172 data: 0.0830 max mem: 21002 Epoch: [263] [100/312] eta: 0:02:12 lr: 0.000168 min_lr: 0.000168 loss: 3.5253 (3.5466) weight_decay: 0.0500 (0.0500) time: 0.6407 data: 0.0059 max mem: 21002 Epoch: [263] [110/312] eta: 0:02:01 lr: 0.000168 min_lr: 0.000168 loss: 3.4545 (3.5325) weight_decay: 0.0500 (0.0500) time: 0.5366 data: 0.0395 max mem: 21002 Epoch: [263] [120/312] eta: 0:01:56 lr: 0.000167 min_lr: 0.000167 loss: 3.1547 (3.5142) weight_decay: 0.0500 (0.0500) time: 0.5313 data: 0.0523 max mem: 21002 Epoch: [263] [130/312] eta: 0:01:46 lr: 0.000167 min_lr: 0.000167 loss: 3.4853 (3.5237) weight_decay: 0.0500 (0.0500) time: 0.5087 data: 0.0256 max mem: 21002 Epoch: [263] [140/312] eta: 0:01:41 lr: 0.000167 min_lr: 0.000167 loss: 3.7815 (3.5471) weight_decay: 0.0500 (0.0500) time: 0.4869 data: 0.0243 max mem: 21002 Epoch: [263] [150/312] eta: 0:01:36 lr: 0.000167 min_lr: 0.000167 loss: 3.7707 (3.5452) weight_decay: 0.0500 (0.0500) time: 0.6792 data: 0.0659 max mem: 21002 Epoch: [263] [160/312] eta: 0:01:28 lr: 0.000166 min_lr: 0.000166 loss: 3.5659 (3.5501) weight_decay: 0.0500 (0.0500) time: 0.4920 data: 0.0493 max mem: 21002 Epoch: [263] [170/312] eta: 0:01:22 lr: 0.000166 min_lr: 0.000166 loss: 3.7438 (3.5557) weight_decay: 0.0500 (0.0500) time: 0.4740 data: 0.0194 max mem: 21002 Epoch: [263] [180/312] eta: 0:01:17 lr: 0.000166 min_lr: 0.000166 loss: 3.8256 (3.5607) weight_decay: 0.0500 (0.0500) time: 0.6571 data: 0.0196 max mem: 21002 Epoch: [263] [190/312] eta: 0:01:09 lr: 0.000165 min_lr: 0.000165 loss: 3.7120 (3.5649) weight_decay: 0.0500 (0.0500) time: 0.4865 data: 0.0135 max mem: 21002 Epoch: [263] [200/312] eta: 0:01:04 lr: 0.000165 min_lr: 0.000165 loss: 3.8202 (3.5766) weight_decay: 0.0500 (0.0500) time: 0.4958 data: 0.0545 max mem: 21002 Epoch: [263] [210/312] eta: 0:00:58 lr: 0.000165 min_lr: 0.000165 loss: 3.8721 (3.5856) weight_decay: 0.0500 (0.0500) time: 0.5208 data: 0.0721 max mem: 21002 Epoch: [263] [220/312] eta: 0:00:52 lr: 0.000165 min_lr: 0.000165 loss: 3.7953 (3.5869) weight_decay: 0.0500 (0.0500) time: 0.4661 data: 0.0311 max mem: 21002 Epoch: [263] [230/312] eta: 0:00:47 lr: 0.000164 min_lr: 0.000164 loss: 3.5260 (3.5835) weight_decay: 0.0500 (0.0500) time: 0.6252 data: 0.0396 max mem: 21002 Epoch: [263] [240/312] eta: 0:00:40 lr: 0.000164 min_lr: 0.000164 loss: 3.8403 (3.5933) weight_decay: 0.0500 (0.0500) time: 0.5101 data: 0.0466 max mem: 21002 Epoch: [263] [250/312] eta: 0:00:35 lr: 0.000164 min_lr: 0.000164 loss: 3.8524 (3.5899) weight_decay: 0.0500 (0.0500) time: 0.4918 data: 0.0442 max mem: 21002 Epoch: [263] [260/312] eta: 0:00:29 lr: 0.000163 min_lr: 0.000163 loss: 3.6647 (3.5932) weight_decay: 0.0500 (0.0500) time: 0.6621 data: 0.0525 max mem: 21002 Epoch: [263] [270/312] eta: 0:00:23 lr: 0.000163 min_lr: 0.000163 loss: 3.5845 (3.5884) weight_decay: 0.0500 (0.0500) time: 0.4911 data: 0.0225 max mem: 21002 Epoch: [263] [280/312] eta: 0:00:18 lr: 0.000163 min_lr: 0.000163 loss: 3.4508 (3.5822) weight_decay: 0.0500 (0.0500) time: 0.4687 data: 0.0440 max mem: 21002 Epoch: [263] [290/312] eta: 0:00:12 lr: 0.000163 min_lr: 0.000163 loss: 3.7527 (3.5938) weight_decay: 0.0500 (0.0500) time: 0.5942 data: 0.0672 max mem: 21002 Epoch: [263] [300/312] eta: 0:00:06 lr: 0.000162 min_lr: 0.000162 loss: 3.8424 (3.5955) weight_decay: 0.0500 (0.0500) time: 0.4350 data: 0.0300 max mem: 21002 Epoch: [263] [310/312] eta: 0:00:01 lr: 0.000162 min_lr: 0.000162 loss: 3.6291 (3.5977) weight_decay: 0.0500 (0.0500) time: 0.2956 data: 0.0001 max mem: 21002 Epoch: [263] [311/312] eta: 0:00:00 lr: 0.000162 min_lr: 0.000162 loss: 3.6291 (3.5987) weight_decay: 0.0500 (0.0500) time: 0.2955 data: 0.0001 max mem: 21002 Epoch: [263] Total time: 0:02:51 (0.5495 s / it) Averaged stats: lr: 0.000162 min_lr: 0.000162 loss: 3.6291 (3.6011) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.0914 (1.0914) acc1: 80.4688 (80.4688) acc5: 95.4427 (95.4427) time: 8.7826 data: 8.6639 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4245 (1.3336) acc1: 72.3958 (73.4400) acc5: 91.5365 (91.7120) time: 1.0612 data: 0.9627 max mem: 21002 Test: Total time: 0:00:09 (1.0738 s / it) * Acc@1 72.856 Acc@5 91.594 loss 1.345 Accuracy of the model on the 50000 test images: 72.9% Max accuracy: 72.92% Epoch: [264] [ 0/312] eta: 1:02:55 lr: 0.000162 min_lr: 0.000162 loss: 3.1852 (3.1852) weight_decay: 0.0500 (0.0500) time: 12.0999 data: 10.8783 max mem: 21002 Epoch: [264] [ 10/312] eta: 0:07:45 lr: 0.000162 min_lr: 0.000162 loss: 4.2198 (3.7921) weight_decay: 0.0500 (0.0500) time: 1.5407 data: 0.9899 max mem: 21002 Epoch: [264] [ 20/312] eta: 0:05:06 lr: 0.000161 min_lr: 0.000161 loss: 3.8817 (3.7210) weight_decay: 0.0500 (0.0500) time: 0.4982 data: 0.0607 max mem: 21002 Epoch: [264] [ 30/312] eta: 0:03:47 lr: 0.000161 min_lr: 0.000161 loss: 3.7361 (3.6666) weight_decay: 0.0500 (0.0500) time: 0.4008 data: 0.0605 max mem: 21002 Epoch: [264] [ 40/312] eta: 0:03:21 lr: 0.000161 min_lr: 0.000161 loss: 3.5564 (3.6544) weight_decay: 0.0500 (0.0500) time: 0.4175 data: 0.0927 max mem: 21002 Epoch: [264] [ 50/312] eta: 0:03:08 lr: 0.000160 min_lr: 0.000160 loss: 3.7612 (3.6616) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.1922 max mem: 21002 Epoch: [264] [ 60/312] eta: 0:02:43 lr: 0.000160 min_lr: 0.000160 loss: 3.7612 (3.6454) weight_decay: 0.0500 (0.0500) time: 0.4568 data: 0.1012 max mem: 21002 Epoch: [264] [ 70/312] eta: 0:02:41 lr: 0.000160 min_lr: 0.000160 loss: 3.8360 (3.6550) weight_decay: 0.0500 (0.0500) time: 0.5378 data: 0.1390 max mem: 21002 Epoch: [264] [ 80/312] eta: 0:02:28 lr: 0.000160 min_lr: 0.000160 loss: 3.7838 (3.6565) weight_decay: 0.0500 (0.0500) time: 0.6205 data: 0.1733 max mem: 21002 Epoch: [264] [ 90/312] eta: 0:02:18 lr: 0.000159 min_lr: 0.000159 loss: 3.9361 (3.7045) weight_decay: 0.0500 (0.0500) time: 0.4761 data: 0.0625 max mem: 21002 Epoch: [264] [100/312] eta: 0:02:14 lr: 0.000159 min_lr: 0.000159 loss: 3.9895 (3.7256) weight_decay: 0.0500 (0.0500) time: 0.6051 data: 0.1094 max mem: 21002 Epoch: [264] [110/312] eta: 0:02:01 lr: 0.000159 min_lr: 0.000159 loss: 3.8860 (3.7211) weight_decay: 0.0500 (0.0500) time: 0.5007 data: 0.0828 max mem: 21002 Epoch: [264] [120/312] eta: 0:01:57 lr: 0.000159 min_lr: 0.000159 loss: 3.6524 (3.7136) weight_decay: 0.0500 (0.0500) time: 0.4839 data: 0.0688 max mem: 21002 Epoch: [264] [130/312] eta: 0:01:51 lr: 0.000158 min_lr: 0.000158 loss: 3.8035 (3.7217) weight_decay: 0.0500 (0.0500) time: 0.6656 data: 0.0993 max mem: 21002 Epoch: [264] [140/312] eta: 0:01:41 lr: 0.000158 min_lr: 0.000158 loss: 3.3622 (3.6662) weight_decay: 0.0500 (0.0500) time: 0.4740 data: 0.0350 max mem: 21002 Epoch: [264] [150/312] eta: 0:01:36 lr: 0.000158 min_lr: 0.000158 loss: 3.2479 (3.6589) weight_decay: 0.0500 (0.0500) time: 0.4810 data: 0.0652 max mem: 21002 Epoch: [264] [160/312] eta: 0:01:28 lr: 0.000157 min_lr: 0.000157 loss: 3.5394 (3.6440) weight_decay: 0.0500 (0.0500) time: 0.5462 data: 0.0967 max mem: 21002 Epoch: [264] [170/312] eta: 0:01:22 lr: 0.000157 min_lr: 0.000157 loss: 3.6134 (3.6461) weight_decay: 0.0500 (0.0500) time: 0.4652 data: 0.0658 max mem: 21002 Epoch: [264] [180/312] eta: 0:01:16 lr: 0.000157 min_lr: 0.000157 loss: 3.7233 (3.6394) weight_decay: 0.0500 (0.0500) time: 0.5670 data: 0.1262 max mem: 21002 Epoch: [264] [190/312] eta: 0:01:09 lr: 0.000157 min_lr: 0.000157 loss: 3.5232 (3.6324) weight_decay: 0.0500 (0.0500) time: 0.4572 data: 0.0964 max mem: 21002 Epoch: [264] [200/312] eta: 0:01:04 lr: 0.000156 min_lr: 0.000156 loss: 3.7227 (3.6326) weight_decay: 0.0500 (0.0500) time: 0.4918 data: 0.1192 max mem: 21002 Epoch: [264] [210/312] eta: 0:00:58 lr: 0.000156 min_lr: 0.000156 loss: 3.8199 (3.6399) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.2359 max mem: 21002 Epoch: [264] [220/312] eta: 0:00:51 lr: 0.000156 min_lr: 0.000156 loss: 3.8731 (3.6456) weight_decay: 0.0500 (0.0500) time: 0.4785 data: 0.1172 max mem: 21002 Epoch: [264] [230/312] eta: 0:00:46 lr: 0.000155 min_lr: 0.000155 loss: 3.8479 (3.6435) weight_decay: 0.0500 (0.0500) time: 0.4935 data: 0.0949 max mem: 21002 Epoch: [264] [240/312] eta: 0:00:40 lr: 0.000155 min_lr: 0.000155 loss: 3.9186 (3.6513) weight_decay: 0.0500 (0.0500) time: 0.5706 data: 0.1591 max mem: 21002 Epoch: [264] [250/312] eta: 0:00:35 lr: 0.000155 min_lr: 0.000155 loss: 3.8391 (3.6402) weight_decay: 0.0500 (0.0500) time: 0.5135 data: 0.1104 max mem: 21002 Epoch: [264] [260/312] eta: 0:00:29 lr: 0.000155 min_lr: 0.000155 loss: 3.5068 (3.6341) weight_decay: 0.0500 (0.0500) time: 0.6290 data: 0.0861 max mem: 21002 Epoch: [264] [270/312] eta: 0:00:23 lr: 0.000154 min_lr: 0.000154 loss: 3.4479 (3.6257) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.0447 max mem: 21002 Epoch: [264] [280/312] eta: 0:00:18 lr: 0.000154 min_lr: 0.000154 loss: 3.5470 (3.6270) weight_decay: 0.0500 (0.0500) time: 0.4907 data: 0.0662 max mem: 21002 Epoch: [264] [290/312] eta: 0:00:12 lr: 0.000154 min_lr: 0.000154 loss: 3.8505 (3.6226) weight_decay: 0.0500 (0.0500) time: 0.6059 data: 0.0854 max mem: 21002 Epoch: [264] [300/312] eta: 0:00:06 lr: 0.000154 min_lr: 0.000154 loss: 3.7903 (3.6269) weight_decay: 0.0500 (0.0500) time: 0.4172 data: 0.0381 max mem: 21002 Epoch: [264] [310/312] eta: 0:00:01 lr: 0.000153 min_lr: 0.000153 loss: 3.8934 (3.6370) weight_decay: 0.0500 (0.0500) time: 0.2911 data: 0.0144 max mem: 21002 Epoch: [264] [311/312] eta: 0:00:00 lr: 0.000153 min_lr: 0.000153 loss: 3.8924 (3.6378) weight_decay: 0.0500 (0.0500) time: 0.2910 data: 0.0144 max mem: 21002 Epoch: [264] Total time: 0:02:51 (0.5489 s / it) Averaged stats: lr: 0.000153 min_lr: 0.000153 loss: 3.8924 (3.5976) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.1840 (1.1840) acc1: 80.2083 (80.2083) acc5: 95.1823 (95.1823) time: 8.8237 data: 8.7050 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.5042 (1.4067) acc1: 73.3073 (73.3760) acc5: 90.8854 (91.3920) time: 1.0740 data: 0.9673 max mem: 21002 Test: Total time: 0:00:09 (1.1044 s / it) * Acc@1 72.894 Acc@5 91.658 loss 1.411 Accuracy of the model on the 50000 test images: 72.9% Max accuracy: 72.92% Epoch: [265] [ 0/312] eta: 1:05:03 lr: 0.000153 min_lr: 0.000153 loss: 3.9088 (3.9088) weight_decay: 0.0500 (0.0500) time: 12.5104 data: 12.2171 max mem: 21002 Epoch: [265] [ 10/312] eta: 0:08:17 lr: 0.000153 min_lr: 0.000153 loss: 3.8821 (3.5967) weight_decay: 0.0500 (0.0500) time: 1.6472 data: 1.1112 max mem: 21002 Epoch: [265] [ 20/312] eta: 0:05:00 lr: 0.000153 min_lr: 0.000153 loss: 3.8848 (3.7710) weight_decay: 0.0500 (0.0500) time: 0.4561 data: 0.0228 max mem: 21002 Epoch: [265] [ 30/312] eta: 0:03:42 lr: 0.000152 min_lr: 0.000152 loss: 3.7172 (3.6437) weight_decay: 0.0500 (0.0500) time: 0.3195 data: 0.0229 max mem: 21002 Epoch: [265] [ 40/312] eta: 0:03:07 lr: 0.000152 min_lr: 0.000152 loss: 3.6780 (3.6756) weight_decay: 0.0500 (0.0500) time: 0.3323 data: 0.0196 max mem: 21002 Epoch: [265] [ 50/312] eta: 0:02:59 lr: 0.000152 min_lr: 0.000152 loss: 3.8151 (3.6741) weight_decay: 0.0500 (0.0500) time: 0.5196 data: 0.1536 max mem: 21002 Epoch: [265] [ 60/312] eta: 0:02:36 lr: 0.000152 min_lr: 0.000152 loss: 3.7430 (3.6496) weight_decay: 0.0500 (0.0500) time: 0.4760 data: 0.1349 max mem: 21002 Epoch: [265] [ 70/312] eta: 0:02:32 lr: 0.000151 min_lr: 0.000151 loss: 3.7430 (3.6497) weight_decay: 0.0500 (0.0500) time: 0.4948 data: 0.1485 max mem: 21002 Epoch: [265] [ 80/312] eta: 0:02:24 lr: 0.000151 min_lr: 0.000151 loss: 3.7273 (3.6546) weight_decay: 0.0500 (0.0500) time: 0.6325 data: 0.2277 max mem: 21002 Epoch: [265] [ 90/312] eta: 0:02:14 lr: 0.000151 min_lr: 0.000151 loss: 3.6851 (3.6397) weight_decay: 0.0500 (0.0500) time: 0.5089 data: 0.1100 max mem: 21002 Epoch: [265] [100/312] eta: 0:02:09 lr: 0.000150 min_lr: 0.000150 loss: 3.4387 (3.6088) weight_decay: 0.0500 (0.0500) time: 0.5558 data: 0.0864 max mem: 21002 Epoch: [265] [110/312] eta: 0:01:58 lr: 0.000150 min_lr: 0.000150 loss: 3.3358 (3.5972) weight_decay: 0.0500 (0.0500) time: 0.4977 data: 0.0826 max mem: 21002 Epoch: [265] [120/312] eta: 0:01:54 lr: 0.000150 min_lr: 0.000150 loss: 3.5990 (3.5928) weight_decay: 0.0500 (0.0500) time: 0.5245 data: 0.0795 max mem: 21002 Epoch: [265] [130/312] eta: 0:01:49 lr: 0.000150 min_lr: 0.000150 loss: 3.6845 (3.6015) weight_decay: 0.0500 (0.0500) time: 0.6864 data: 0.0860 max mem: 21002 Epoch: [265] [140/312] eta: 0:01:39 lr: 0.000149 min_lr: 0.000149 loss: 3.6886 (3.6058) weight_decay: 0.0500 (0.0500) time: 0.4805 data: 0.0376 max mem: 21002 Epoch: [265] [150/312] eta: 0:01:36 lr: 0.000149 min_lr: 0.000149 loss: 3.6302 (3.5902) weight_decay: 0.0500 (0.0500) time: 0.5418 data: 0.0551 max mem: 21002 Epoch: [265] [160/312] eta: 0:01:28 lr: 0.000149 min_lr: 0.000149 loss: 3.0942 (3.5600) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0688 max mem: 21002 Epoch: [265] [170/312] eta: 0:01:22 lr: 0.000149 min_lr: 0.000149 loss: 3.2970 (3.5645) weight_decay: 0.0500 (0.0500) time: 0.4749 data: 0.0188 max mem: 21002 Epoch: [265] [180/312] eta: 0:01:17 lr: 0.000148 min_lr: 0.000148 loss: 3.6498 (3.5650) weight_decay: 0.0500 (0.0500) time: 0.6392 data: 0.0059 max mem: 21002 Epoch: [265] [190/312] eta: 0:01:09 lr: 0.000148 min_lr: 0.000148 loss: 3.5223 (3.5626) weight_decay: 0.0500 (0.0500) time: 0.5115 data: 0.0122 max mem: 21002 Epoch: [265] [200/312] eta: 0:01:05 lr: 0.000148 min_lr: 0.000148 loss: 3.4987 (3.5595) weight_decay: 0.0500 (0.0500) time: 0.5283 data: 0.0200 max mem: 21002 Epoch: [265] [210/312] eta: 0:00:59 lr: 0.000147 min_lr: 0.000147 loss: 3.7823 (3.5715) weight_decay: 0.0500 (0.0500) time: 0.7182 data: 0.0375 max mem: 21002 Epoch: [265] [220/312] eta: 0:00:52 lr: 0.000147 min_lr: 0.000147 loss: 3.9039 (3.5770) weight_decay: 0.0500 (0.0500) time: 0.5044 data: 0.0426 max mem: 21002 Epoch: [265] [230/312] eta: 0:00:47 lr: 0.000147 min_lr: 0.000147 loss: 3.6411 (3.5641) weight_decay: 0.0500 (0.0500) time: 0.5191 data: 0.0567 max mem: 21002 Epoch: [265] [240/312] eta: 0:00:41 lr: 0.000147 min_lr: 0.000147 loss: 3.6411 (3.5744) weight_decay: 0.0500 (0.0500) time: 0.5316 data: 0.0686 max mem: 21002 Epoch: [265] [250/312] eta: 0:00:35 lr: 0.000146 min_lr: 0.000146 loss: 3.8825 (3.5868) weight_decay: 0.0500 (0.0500) time: 0.4981 data: 0.0494 max mem: 21002 Epoch: [265] [260/312] eta: 0:00:29 lr: 0.000146 min_lr: 0.000146 loss: 3.8927 (3.6000) weight_decay: 0.0500 (0.0500) time: 0.6421 data: 0.0385 max mem: 21002 Epoch: [265] [270/312] eta: 0:00:23 lr: 0.000146 min_lr: 0.000146 loss: 3.8927 (3.6073) weight_decay: 0.0500 (0.0500) time: 0.4835 data: 0.0391 max mem: 21002 Epoch: [265] [280/312] eta: 0:00:18 lr: 0.000146 min_lr: 0.000146 loss: 3.8417 (3.6088) weight_decay: 0.0500 (0.0500) time: 0.4845 data: 0.0724 max mem: 21002 Epoch: [265] [290/312] eta: 0:00:12 lr: 0.000145 min_lr: 0.000145 loss: 3.7883 (3.6104) weight_decay: 0.0500 (0.0500) time: 0.5683 data: 0.0729 max mem: 21002 Epoch: [265] [300/312] eta: 0:00:06 lr: 0.000145 min_lr: 0.000145 loss: 3.7060 (3.6060) weight_decay: 0.0500 (0.0500) time: 0.3875 data: 0.0204 max mem: 21002 Epoch: [265] [310/312] eta: 0:00:01 lr: 0.000145 min_lr: 0.000145 loss: 3.5791 (3.6032) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [265] [311/312] eta: 0:00:00 lr: 0.000145 min_lr: 0.000145 loss: 3.5791 (3.6011) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [265] Total time: 0:02:51 (0.5512 s / it) Averaged stats: lr: 0.000145 min_lr: 0.000145 loss: 3.5791 (3.6195) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:20 loss: 0.9893 (0.9893) acc1: 81.3802 (81.3802) acc5: 95.4427 (95.4427) time: 8.9483 data: 8.8296 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3280 (1.2408) acc1: 72.1354 (73.5040) acc5: 91.7969 (91.9680) time: 1.0832 data: 0.9811 max mem: 21002 Test: Total time: 0:00:10 (1.1202 s / it) * Acc@1 73.426 Acc@5 91.856 loss 1.245 Accuracy of the model on the 50000 test images: 73.4% Max accuracy: 73.43% Epoch: [266] [ 0/312] eta: 1:02:44 lr: 0.000145 min_lr: 0.000145 loss: 4.3335 (4.3335) weight_decay: 0.0500 (0.0500) time: 12.0670 data: 9.5352 max mem: 21002 Epoch: [266] [ 10/312] eta: 0:07:44 lr: 0.000144 min_lr: 0.000144 loss: 3.8490 (3.7454) weight_decay: 0.0500 (0.0500) time: 1.5364 data: 1.0466 max mem: 21002 Epoch: [266] [ 20/312] eta: 0:04:58 lr: 0.000144 min_lr: 0.000144 loss: 3.6954 (3.6629) weight_decay: 0.0500 (0.0500) time: 0.4716 data: 0.1878 max mem: 21002 Epoch: [266] [ 30/312] eta: 0:03:41 lr: 0.000144 min_lr: 0.000144 loss: 3.7705 (3.6773) weight_decay: 0.0500 (0.0500) time: 0.3747 data: 0.0892 max mem: 21002 Epoch: [266] [ 40/312] eta: 0:03:23 lr: 0.000144 min_lr: 0.000144 loss: 3.5796 (3.6497) weight_decay: 0.0500 (0.0500) time: 0.4550 data: 0.1614 max mem: 21002 Epoch: [266] [ 50/312] eta: 0:03:10 lr: 0.000143 min_lr: 0.000143 loss: 3.5276 (3.6285) weight_decay: 0.0500 (0.0500) time: 0.6357 data: 0.3442 max mem: 21002 Epoch: [266] [ 60/312] eta: 0:02:45 lr: 0.000143 min_lr: 0.000143 loss: 3.5276 (3.6160) weight_decay: 0.0500 (0.0500) time: 0.4682 data: 0.1834 max mem: 21002 Epoch: [266] [ 70/312] eta: 0:02:37 lr: 0.000143 min_lr: 0.000143 loss: 3.6854 (3.6061) weight_decay: 0.0500 (0.0500) time: 0.4565 data: 0.1702 max mem: 21002 Epoch: [266] [ 80/312] eta: 0:02:30 lr: 0.000143 min_lr: 0.000143 loss: 3.7909 (3.6283) weight_decay: 0.0500 (0.0500) time: 0.6299 data: 0.3450 max mem: 21002 Epoch: [266] [ 90/312] eta: 0:02:15 lr: 0.000142 min_lr: 0.000142 loss: 3.8219 (3.6412) weight_decay: 0.0500 (0.0500) time: 0.4682 data: 0.1826 max mem: 21002 Epoch: [266] [100/312] eta: 0:02:10 lr: 0.000142 min_lr: 0.000142 loss: 3.8119 (3.6224) weight_decay: 0.0500 (0.0500) time: 0.4900 data: 0.1671 max mem: 21002 Epoch: [266] [110/312] eta: 0:01:58 lr: 0.000142 min_lr: 0.000142 loss: 3.5849 (3.5999) weight_decay: 0.0500 (0.0500) time: 0.4802 data: 0.1606 max mem: 21002 Epoch: [266] [120/312] eta: 0:01:54 lr: 0.000142 min_lr: 0.000142 loss: 3.5627 (3.5978) weight_decay: 0.0500 (0.0500) time: 0.4992 data: 0.1460 max mem: 21002 Epoch: [266] [130/312] eta: 0:01:49 lr: 0.000141 min_lr: 0.000141 loss: 3.7534 (3.6045) weight_decay: 0.0500 (0.0500) time: 0.6626 data: 0.2727 max mem: 21002 Epoch: [266] [140/312] eta: 0:01:39 lr: 0.000141 min_lr: 0.000141 loss: 3.5795 (3.5924) weight_decay: 0.0500 (0.0500) time: 0.4483 data: 0.1281 max mem: 21002 Epoch: [266] [150/312] eta: 0:01:34 lr: 0.000141 min_lr: 0.000141 loss: 3.4631 (3.5781) weight_decay: 0.0500 (0.0500) time: 0.5039 data: 0.1433 max mem: 21002 Epoch: [266] [160/312] eta: 0:01:28 lr: 0.000140 min_lr: 0.000140 loss: 3.6549 (3.5900) weight_decay: 0.0500 (0.0500) time: 0.6185 data: 0.2190 max mem: 21002 Epoch: [266] [170/312] eta: 0:01:21 lr: 0.000140 min_lr: 0.000140 loss: 3.8221 (3.6109) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.1145 max mem: 21002 Epoch: [266] [180/312] eta: 0:01:16 lr: 0.000140 min_lr: 0.000140 loss: 3.8918 (3.6137) weight_decay: 0.0500 (0.0500) time: 0.5684 data: 0.1436 max mem: 21002 Epoch: [266] [190/312] eta: 0:01:08 lr: 0.000140 min_lr: 0.000140 loss: 3.6102 (3.6023) weight_decay: 0.0500 (0.0500) time: 0.4847 data: 0.1056 max mem: 21002 Epoch: [266] [200/312] eta: 0:01:03 lr: 0.000139 min_lr: 0.000139 loss: 3.2568 (3.5896) weight_decay: 0.0500 (0.0500) time: 0.4735 data: 0.1426 max mem: 21002 Epoch: [266] [210/312] eta: 0:00:58 lr: 0.000139 min_lr: 0.000139 loss: 3.6082 (3.5956) weight_decay: 0.0500 (0.0500) time: 0.6303 data: 0.2999 max mem: 21002 Epoch: [266] [220/312] eta: 0:00:51 lr: 0.000139 min_lr: 0.000139 loss: 3.8490 (3.5965) weight_decay: 0.0500 (0.0500) time: 0.4442 data: 0.1581 max mem: 21002 Epoch: [266] [230/312] eta: 0:00:46 lr: 0.000139 min_lr: 0.000139 loss: 3.4182 (3.5813) weight_decay: 0.0500 (0.0500) time: 0.4788 data: 0.1629 max mem: 21002 Epoch: [266] [240/312] eta: 0:00:40 lr: 0.000138 min_lr: 0.000138 loss: 3.1601 (3.5733) weight_decay: 0.0500 (0.0500) time: 0.6520 data: 0.3364 max mem: 21002 Epoch: [266] [250/312] eta: 0:00:34 lr: 0.000138 min_lr: 0.000138 loss: 3.5391 (3.5665) weight_decay: 0.0500 (0.0500) time: 0.4594 data: 0.1742 max mem: 21002 Epoch: [266] [260/312] eta: 0:00:29 lr: 0.000138 min_lr: 0.000138 loss: 3.2238 (3.5492) weight_decay: 0.0500 (0.0500) time: 0.5126 data: 0.2120 max mem: 21002 Epoch: [266] [270/312] eta: 0:00:23 lr: 0.000138 min_lr: 0.000138 loss: 3.1979 (3.5438) weight_decay: 0.0500 (0.0500) time: 0.5124 data: 0.2122 max mem: 21002 Epoch: [266] [280/312] eta: 0:00:17 lr: 0.000137 min_lr: 0.000137 loss: 3.5763 (3.5459) weight_decay: 0.0500 (0.0500) time: 0.4626 data: 0.1560 max mem: 21002 Epoch: [266] [290/312] eta: 0:00:12 lr: 0.000137 min_lr: 0.000137 loss: 3.7155 (3.5439) weight_decay: 0.0500 (0.0500) time: 0.6330 data: 0.2836 max mem: 21002 Epoch: [266] [300/312] eta: 0:00:06 lr: 0.000137 min_lr: 0.000137 loss: 3.7155 (3.5448) weight_decay: 0.0500 (0.0500) time: 0.4520 data: 0.1283 max mem: 21002 Epoch: [266] [310/312] eta: 0:00:01 lr: 0.000137 min_lr: 0.000137 loss: 3.7442 (3.5473) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [266] [311/312] eta: 0:00:00 lr: 0.000137 min_lr: 0.000137 loss: 3.7203 (3.5476) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [266] Total time: 0:02:49 (0.5423 s / it) Averaged stats: lr: 0.000137 min_lr: 0.000137 loss: 3.7203 (3.5841) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.1084 (1.1084) acc1: 80.5990 (80.5990) acc5: 95.4427 (95.4427) time: 8.7021 data: 8.5832 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4036 (1.3195) acc1: 73.5677 (73.7600) acc5: 91.7969 (91.7120) time: 1.0521 data: 0.9538 max mem: 21002 Test: Total time: 0:00:09 (1.0674 s / it) * Acc@1 73.372 Acc@5 91.692 loss 1.327 Accuracy of the model on the 50000 test images: 73.4% Max accuracy: 73.43% Epoch: [267] [ 0/312] eta: 1:02:12 lr: 0.000136 min_lr: 0.000136 loss: 3.1321 (3.1321) weight_decay: 0.0500 (0.0500) time: 11.9624 data: 8.8726 max mem: 21002 Epoch: [267] [ 10/312] eta: 0:08:50 lr: 0.000136 min_lr: 0.000136 loss: 3.8266 (3.7410) weight_decay: 0.0500 (0.0500) time: 1.7570 data: 1.1047 max mem: 21002 Epoch: [267] [ 20/312] eta: 0:05:10 lr: 0.000136 min_lr: 0.000136 loss: 3.6187 (3.5736) weight_decay: 0.0500 (0.0500) time: 0.5192 data: 0.1735 max mem: 21002 Epoch: [267] [ 30/312] eta: 0:03:50 lr: 0.000136 min_lr: 0.000136 loss: 3.8184 (3.7144) weight_decay: 0.0500 (0.0500) time: 0.2994 data: 0.0098 max mem: 21002 Epoch: [267] [ 40/312] eta: 0:03:07 lr: 0.000135 min_lr: 0.000135 loss: 3.8266 (3.6832) weight_decay: 0.0500 (0.0500) time: 0.2943 data: 0.0006 max mem: 21002 Epoch: [267] [ 50/312] eta: 0:02:54 lr: 0.000135 min_lr: 0.000135 loss: 3.7865 (3.6680) weight_decay: 0.0500 (0.0500) time: 0.4334 data: 0.0525 max mem: 21002 Epoch: [267] [ 60/312] eta: 0:02:39 lr: 0.000135 min_lr: 0.000135 loss: 3.6645 (3.6263) weight_decay: 0.0500 (0.0500) time: 0.5246 data: 0.1002 max mem: 21002 Epoch: [267] [ 70/312] eta: 0:02:30 lr: 0.000135 min_lr: 0.000135 loss: 3.6645 (3.6211) weight_decay: 0.0500 (0.0500) time: 0.5138 data: 0.0881 max mem: 21002 Epoch: [267] [ 80/312] eta: 0:02:23 lr: 0.000134 min_lr: 0.000134 loss: 3.7298 (3.6100) weight_decay: 0.0500 (0.0500) time: 0.5631 data: 0.1027 max mem: 21002 Epoch: [267] [ 90/312] eta: 0:02:13 lr: 0.000134 min_lr: 0.000134 loss: 3.7169 (3.5890) weight_decay: 0.0500 (0.0500) time: 0.5213 data: 0.1315 max mem: 21002 Epoch: [267] [100/312] eta: 0:02:06 lr: 0.000134 min_lr: 0.000134 loss: 3.2267 (3.5612) weight_decay: 0.0500 (0.0500) time: 0.5218 data: 0.1075 max mem: 21002 Epoch: [267] [110/312] eta: 0:01:59 lr: 0.000134 min_lr: 0.000134 loss: 3.2267 (3.5490) weight_decay: 0.0500 (0.0500) time: 0.5483 data: 0.0822 max mem: 21002 Epoch: [267] [120/312] eta: 0:01:51 lr: 0.000133 min_lr: 0.000133 loss: 3.5801 (3.5567) weight_decay: 0.0500 (0.0500) time: 0.5023 data: 0.0554 max mem: 21002 Epoch: [267] [130/312] eta: 0:01:46 lr: 0.000133 min_lr: 0.000133 loss: 3.8087 (3.5697) weight_decay: 0.0500 (0.0500) time: 0.5432 data: 0.0591 max mem: 21002 Epoch: [267] [140/312] eta: 0:01:40 lr: 0.000133 min_lr: 0.000133 loss: 3.8255 (3.5836) weight_decay: 0.0500 (0.0500) time: 0.5989 data: 0.0697 max mem: 21002 Epoch: [267] [150/312] eta: 0:01:33 lr: 0.000133 min_lr: 0.000133 loss: 3.9414 (3.5941) weight_decay: 0.0500 (0.0500) time: 0.5274 data: 0.0679 max mem: 21002 Epoch: [267] [160/312] eta: 0:01:27 lr: 0.000132 min_lr: 0.000132 loss: 3.6693 (3.5947) weight_decay: 0.0500 (0.0500) time: 0.5331 data: 0.0983 max mem: 21002 Epoch: [267] [170/312] eta: 0:01:21 lr: 0.000132 min_lr: 0.000132 loss: 3.6693 (3.5866) weight_decay: 0.0500 (0.0500) time: 0.5545 data: 0.0851 max mem: 21002 Epoch: [267] [180/312] eta: 0:01:15 lr: 0.000132 min_lr: 0.000132 loss: 3.8087 (3.5800) weight_decay: 0.0500 (0.0500) time: 0.5054 data: 0.0860 max mem: 21002 Epoch: [267] [190/312] eta: 0:01:09 lr: 0.000132 min_lr: 0.000132 loss: 3.4498 (3.5746) weight_decay: 0.0500 (0.0500) time: 0.5604 data: 0.0851 max mem: 21002 Epoch: [267] [200/312] eta: 0:01:03 lr: 0.000131 min_lr: 0.000131 loss: 3.5142 (3.5788) weight_decay: 0.0500 (0.0500) time: 0.5406 data: 0.0658 max mem: 21002 Epoch: [267] [210/312] eta: 0:00:57 lr: 0.000131 min_lr: 0.000131 loss: 3.4480 (3.5729) weight_decay: 0.0500 (0.0500) time: 0.5202 data: 0.0834 max mem: 21002 Epoch: [267] [220/312] eta: 0:00:52 lr: 0.000131 min_lr: 0.000131 loss: 3.6915 (3.5837) weight_decay: 0.0500 (0.0500) time: 0.6090 data: 0.0625 max mem: 21002 Epoch: [267] [230/312] eta: 0:00:46 lr: 0.000131 min_lr: 0.000131 loss: 3.7430 (3.5805) weight_decay: 0.0500 (0.0500) time: 0.5259 data: 0.0643 max mem: 21002 Epoch: [267] [240/312] eta: 0:00:40 lr: 0.000130 min_lr: 0.000130 loss: 3.5969 (3.5828) weight_decay: 0.0500 (0.0500) time: 0.5211 data: 0.0858 max mem: 21002 Epoch: [267] [250/312] eta: 0:00:34 lr: 0.000130 min_lr: 0.000130 loss: 3.7867 (3.5825) weight_decay: 0.0500 (0.0500) time: 0.5068 data: 0.0419 max mem: 21002 Epoch: [267] [260/312] eta: 0:00:29 lr: 0.000130 min_lr: 0.000130 loss: 3.6494 (3.5843) weight_decay: 0.0500 (0.0500) time: 0.4936 data: 0.0393 max mem: 21002 Epoch: [267] [270/312] eta: 0:00:23 lr: 0.000130 min_lr: 0.000130 loss: 3.7890 (3.5982) weight_decay: 0.0500 (0.0500) time: 0.6068 data: 0.0558 max mem: 21002 Epoch: [267] [280/312] eta: 0:00:17 lr: 0.000129 min_lr: 0.000129 loss: 3.9424 (3.5979) weight_decay: 0.0500 (0.0500) time: 0.5611 data: 0.0519 max mem: 21002 Epoch: [267] [290/312] eta: 0:00:12 lr: 0.000129 min_lr: 0.000129 loss: 3.8031 (3.6036) weight_decay: 0.0500 (0.0500) time: 0.5213 data: 0.0799 max mem: 21002 Epoch: [267] [300/312] eta: 0:00:06 lr: 0.000129 min_lr: 0.000129 loss: 3.4498 (3.5848) weight_decay: 0.0500 (0.0500) time: 0.4874 data: 0.0603 max mem: 21002 Epoch: [267] [310/312] eta: 0:00:01 lr: 0.000129 min_lr: 0.000129 loss: 3.2499 (3.5802) weight_decay: 0.0500 (0.0500) time: 0.3631 data: 0.0090 max mem: 21002 Epoch: [267] [311/312] eta: 0:00:00 lr: 0.000129 min_lr: 0.000129 loss: 3.2499 (3.5804) weight_decay: 0.0500 (0.0500) time: 0.3628 data: 0.0090 max mem: 21002 Epoch: [267] Total time: 0:02:51 (0.5504 s / it) Averaged stats: lr: 0.000129 min_lr: 0.000129 loss: 3.2499 (3.5898) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.0077 (1.0077) acc1: 81.2500 (81.2500) acc5: 95.8333 (95.8333) time: 8.7994 data: 8.6807 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3176 (1.2585) acc1: 72.7865 (73.6800) acc5: 91.4062 (91.9840) time: 1.0651 data: 0.9646 max mem: 21002 Test: Total time: 0:00:10 (1.1142 s / it) * Acc@1 73.386 Acc@5 91.914 loss 1.265 Accuracy of the model on the 50000 test images: 73.4% Max accuracy: 73.43% Epoch: [268] [ 0/312] eta: 1:00:50 lr: 0.000128 min_lr: 0.000128 loss: 3.8452 (3.8452) weight_decay: 0.0500 (0.0500) time: 11.6994 data: 10.7288 max mem: 21002 Epoch: [268] [ 10/312] eta: 0:07:39 lr: 0.000128 min_lr: 0.000128 loss: 3.5264 (3.2814) weight_decay: 0.0500 (0.0500) time: 1.5208 data: 1.0727 max mem: 21002 Epoch: [268] [ 20/312] eta: 0:05:00 lr: 0.000128 min_lr: 0.000128 loss: 3.5706 (3.4466) weight_decay: 0.0500 (0.0500) time: 0.4966 data: 0.1568 max mem: 21002 Epoch: [268] [ 30/312] eta: 0:03:43 lr: 0.000128 min_lr: 0.000128 loss: 3.6286 (3.4919) weight_decay: 0.0500 (0.0500) time: 0.3898 data: 0.1036 max mem: 21002 Epoch: [268] [ 40/312] eta: 0:03:17 lr: 0.000127 min_lr: 0.000127 loss: 3.6467 (3.4902) weight_decay: 0.0500 (0.0500) time: 0.4065 data: 0.1197 max mem: 21002 Epoch: [268] [ 50/312] eta: 0:03:08 lr: 0.000127 min_lr: 0.000127 loss: 3.6467 (3.5150) weight_decay: 0.0500 (0.0500) time: 0.6114 data: 0.2881 max mem: 21002 Epoch: [268] [ 60/312] eta: 0:02:43 lr: 0.000127 min_lr: 0.000127 loss: 3.4519 (3.4829) weight_decay: 0.0500 (0.0500) time: 0.4932 data: 0.1690 max mem: 21002 Epoch: [268] [ 70/312] eta: 0:02:36 lr: 0.000127 min_lr: 0.000127 loss: 3.5082 (3.5091) weight_decay: 0.0500 (0.0500) time: 0.4621 data: 0.1539 max mem: 21002 Epoch: [268] [ 80/312] eta: 0:02:30 lr: 0.000126 min_lr: 0.000126 loss: 3.7592 (3.5053) weight_decay: 0.0500 (0.0500) time: 0.6511 data: 0.3348 max mem: 21002 Epoch: [268] [ 90/312] eta: 0:02:17 lr: 0.000126 min_lr: 0.000126 loss: 3.4259 (3.4864) weight_decay: 0.0500 (0.0500) time: 0.5105 data: 0.2156 max mem: 21002 Epoch: [268] [100/312] eta: 0:02:12 lr: 0.000126 min_lr: 0.000126 loss: 3.5081 (3.4773) weight_decay: 0.0500 (0.0500) time: 0.5285 data: 0.1983 max mem: 21002 Epoch: [268] [110/312] eta: 0:02:00 lr: 0.000126 min_lr: 0.000126 loss: 3.5755 (3.4855) weight_decay: 0.0500 (0.0500) time: 0.4954 data: 0.1641 max mem: 21002 Epoch: [268] [120/312] eta: 0:01:55 lr: 0.000125 min_lr: 0.000125 loss: 3.6802 (3.4811) weight_decay: 0.0500 (0.0500) time: 0.4910 data: 0.1236 max mem: 21002 Epoch: [268] [130/312] eta: 0:01:50 lr: 0.000125 min_lr: 0.000125 loss: 3.6802 (3.5027) weight_decay: 0.0500 (0.0500) time: 0.6669 data: 0.2559 max mem: 21002 Epoch: [268] [140/312] eta: 0:01:40 lr: 0.000125 min_lr: 0.000125 loss: 3.8275 (3.5106) weight_decay: 0.0500 (0.0500) time: 0.4635 data: 0.1328 max mem: 21002 Epoch: [268] [150/312] eta: 0:01:36 lr: 0.000125 min_lr: 0.000125 loss: 3.5602 (3.5030) weight_decay: 0.0500 (0.0500) time: 0.5307 data: 0.1693 max mem: 21002 Epoch: [268] [160/312] eta: 0:01:29 lr: 0.000124 min_lr: 0.000124 loss: 3.4545 (3.4974) weight_decay: 0.0500 (0.0500) time: 0.6388 data: 0.2363 max mem: 21002 Epoch: [268] [170/312] eta: 0:01:21 lr: 0.000124 min_lr: 0.000124 loss: 3.4545 (3.4924) weight_decay: 0.0500 (0.0500) time: 0.4264 data: 0.0964 max mem: 21002 Epoch: [268] [180/312] eta: 0:01:16 lr: 0.000124 min_lr: 0.000124 loss: 3.7358 (3.4983) weight_decay: 0.0500 (0.0500) time: 0.5058 data: 0.1815 max mem: 21002 Epoch: [268] [190/312] eta: 0:01:09 lr: 0.000124 min_lr: 0.000124 loss: 3.7427 (3.5124) weight_decay: 0.0500 (0.0500) time: 0.4732 data: 0.1527 max mem: 21002 Epoch: [268] [200/312] eta: 0:01:04 lr: 0.000123 min_lr: 0.000123 loss: 3.7180 (3.5162) weight_decay: 0.0500 (0.0500) time: 0.5025 data: 0.1831 max mem: 21002 Epoch: [268] [210/312] eta: 0:00:59 lr: 0.000123 min_lr: 0.000123 loss: 3.5351 (3.5135) weight_decay: 0.0500 (0.0500) time: 0.7067 data: 0.3250 max mem: 21002 Epoch: [268] [220/312] eta: 0:00:52 lr: 0.000123 min_lr: 0.000123 loss: 3.4878 (3.5122) weight_decay: 0.0500 (0.0500) time: 0.4894 data: 0.1429 max mem: 21002 Epoch: [268] [230/312] eta: 0:00:46 lr: 0.000123 min_lr: 0.000123 loss: 3.7260 (3.5147) weight_decay: 0.0500 (0.0500) time: 0.4896 data: 0.1236 max mem: 21002 Epoch: [268] [240/312] eta: 0:00:41 lr: 0.000123 min_lr: 0.000123 loss: 3.6443 (3.5176) weight_decay: 0.0500 (0.0500) time: 0.6360 data: 0.1799 max mem: 21002 Epoch: [268] [250/312] eta: 0:00:35 lr: 0.000122 min_lr: 0.000122 loss: 3.3092 (3.5136) weight_decay: 0.0500 (0.0500) time: 0.5073 data: 0.1337 max mem: 21002 Epoch: [268] [260/312] eta: 0:00:29 lr: 0.000122 min_lr: 0.000122 loss: 3.3153 (3.5133) weight_decay: 0.0500 (0.0500) time: 0.5550 data: 0.1977 max mem: 21002 Epoch: [268] [270/312] eta: 0:00:23 lr: 0.000122 min_lr: 0.000122 loss: 3.6701 (3.5122) weight_decay: 0.0500 (0.0500) time: 0.4799 data: 0.1214 max mem: 21002 Epoch: [268] [280/312] eta: 0:00:18 lr: 0.000122 min_lr: 0.000122 loss: 3.8565 (3.5242) weight_decay: 0.0500 (0.0500) time: 0.4878 data: 0.1084 max mem: 21002 Epoch: [268] [290/312] eta: 0:00:12 lr: 0.000121 min_lr: 0.000121 loss: 3.8565 (3.5248) weight_decay: 0.0500 (0.0500) time: 0.6034 data: 0.1666 max mem: 21002 Epoch: [268] [300/312] eta: 0:00:06 lr: 0.000121 min_lr: 0.000121 loss: 3.6081 (3.5250) weight_decay: 0.0500 (0.0500) time: 0.3983 data: 0.0587 max mem: 21002 Epoch: [268] [310/312] eta: 0:00:01 lr: 0.000121 min_lr: 0.000121 loss: 3.4810 (3.5199) weight_decay: 0.0500 (0.0500) time: 0.2776 data: 0.0001 max mem: 21002 Epoch: [268] [311/312] eta: 0:00:00 lr: 0.000121 min_lr: 0.000121 loss: 3.5202 (3.5207) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [268] Total time: 0:02:50 (0.5473 s / it) Averaged stats: lr: 0.000121 min_lr: 0.000121 loss: 3.5202 (3.5960) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.0071 (1.0071) acc1: 82.1615 (82.1615) acc5: 95.5729 (95.5729) time: 8.8362 data: 8.7176 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3402 (1.2585) acc1: 72.2656 (73.8560) acc5: 91.4062 (91.6320) time: 1.0673 data: 0.9687 max mem: 21002 Test: Total time: 0:00:09 (1.0783 s / it) * Acc@1 73.342 Acc@5 91.832 loss 1.267 Accuracy of the model on the 50000 test images: 73.3% Max accuracy: 73.43% Epoch: [269] [ 0/312] eta: 1:01:40 lr: 0.000121 min_lr: 0.000121 loss: 3.3006 (3.3006) weight_decay: 0.0500 (0.0500) time: 11.8612 data: 10.9832 max mem: 21002 Epoch: [269] [ 10/312] eta: 0:08:03 lr: 0.000120 min_lr: 0.000120 loss: 3.8350 (3.6855) weight_decay: 0.0500 (0.0500) time: 1.6009 data: 0.9990 max mem: 21002 Epoch: [269] [ 20/312] eta: 0:05:08 lr: 0.000120 min_lr: 0.000120 loss: 3.7593 (3.6140) weight_decay: 0.0500 (0.0500) time: 0.5146 data: 0.0868 max mem: 21002 Epoch: [269] [ 30/312] eta: 0:03:48 lr: 0.000120 min_lr: 0.000120 loss: 3.6616 (3.5713) weight_decay: 0.0500 (0.0500) time: 0.3737 data: 0.0868 max mem: 21002 Epoch: [269] [ 40/312] eta: 0:03:16 lr: 0.000120 min_lr: 0.000120 loss: 3.6616 (3.5946) weight_decay: 0.0500 (0.0500) time: 0.3744 data: 0.0778 max mem: 21002 Epoch: [269] [ 50/312] eta: 0:03:04 lr: 0.000120 min_lr: 0.000120 loss: 3.8604 (3.6519) weight_decay: 0.0500 (0.0500) time: 0.5448 data: 0.2287 max mem: 21002 Epoch: [269] [ 60/312] eta: 0:02:42 lr: 0.000119 min_lr: 0.000119 loss: 3.8604 (3.6593) weight_decay: 0.0500 (0.0500) time: 0.4845 data: 0.1515 max mem: 21002 Epoch: [269] [ 70/312] eta: 0:02:37 lr: 0.000119 min_lr: 0.000119 loss: 3.7876 (3.6696) weight_decay: 0.0500 (0.0500) time: 0.5085 data: 0.1704 max mem: 21002 Epoch: [269] [ 80/312] eta: 0:02:29 lr: 0.000119 min_lr: 0.000119 loss: 3.8730 (3.6975) weight_decay: 0.0500 (0.0500) time: 0.6443 data: 0.3322 max mem: 21002 Epoch: [269] [ 90/312] eta: 0:02:16 lr: 0.000119 min_lr: 0.000119 loss: 3.7051 (3.6713) weight_decay: 0.0500 (0.0500) time: 0.4890 data: 0.1624 max mem: 21002 Epoch: [269] [100/312] eta: 0:02:11 lr: 0.000118 min_lr: 0.000118 loss: 3.7003 (3.6754) weight_decay: 0.0500 (0.0500) time: 0.5264 data: 0.1507 max mem: 21002 Epoch: [269] [110/312] eta: 0:01:59 lr: 0.000118 min_lr: 0.000118 loss: 3.5530 (3.6520) weight_decay: 0.0500 (0.0500) time: 0.4826 data: 0.1506 max mem: 21002 Epoch: [269] [120/312] eta: 0:01:54 lr: 0.000118 min_lr: 0.000118 loss: 3.7181 (3.6611) weight_decay: 0.0500 (0.0500) time: 0.4776 data: 0.1501 max mem: 21002 Epoch: [269] [130/312] eta: 0:01:49 lr: 0.000118 min_lr: 0.000118 loss: 3.7905 (3.6580) weight_decay: 0.0500 (0.0500) time: 0.6466 data: 0.2923 max mem: 21002 Epoch: [269] [140/312] eta: 0:01:40 lr: 0.000117 min_lr: 0.000117 loss: 3.7340 (3.6690) weight_decay: 0.0500 (0.0500) time: 0.5051 data: 0.1429 max mem: 21002 Epoch: [269] [150/312] eta: 0:01:35 lr: 0.000117 min_lr: 0.000117 loss: 3.6788 (3.6549) weight_decay: 0.0500 (0.0500) time: 0.5389 data: 0.1430 max mem: 21002 Epoch: [269] [160/312] eta: 0:01:29 lr: 0.000117 min_lr: 0.000117 loss: 3.6735 (3.6546) weight_decay: 0.0500 (0.0500) time: 0.5997 data: 0.2456 max mem: 21002 Epoch: [269] [170/312] eta: 0:01:22 lr: 0.000117 min_lr: 0.000117 loss: 3.8736 (3.6589) weight_decay: 0.0500 (0.0500) time: 0.4779 data: 0.1035 max mem: 21002 Epoch: [269] [180/312] eta: 0:01:16 lr: 0.000116 min_lr: 0.000116 loss: 3.8227 (3.6578) weight_decay: 0.0500 (0.0500) time: 0.5537 data: 0.1069 max mem: 21002 Epoch: [269] [190/312] eta: 0:01:09 lr: 0.000116 min_lr: 0.000116 loss: 3.5059 (3.6554) weight_decay: 0.0500 (0.0500) time: 0.4751 data: 0.1076 max mem: 21002 Epoch: [269] [200/312] eta: 0:01:04 lr: 0.000116 min_lr: 0.000116 loss: 3.5059 (3.6471) weight_decay: 0.0500 (0.0500) time: 0.4924 data: 0.0930 max mem: 21002 Epoch: [269] [210/312] eta: 0:00:58 lr: 0.000116 min_lr: 0.000116 loss: 3.8394 (3.6526) weight_decay: 0.0500 (0.0500) time: 0.6724 data: 0.1865 max mem: 21002 Epoch: [269] [220/312] eta: 0:00:52 lr: 0.000115 min_lr: 0.000115 loss: 3.8333 (3.6462) weight_decay: 0.0500 (0.0500) time: 0.4822 data: 0.0957 max mem: 21002 Epoch: [269] [230/312] eta: 0:00:46 lr: 0.000115 min_lr: 0.000115 loss: 3.8213 (3.6507) weight_decay: 0.0500 (0.0500) time: 0.5177 data: 0.0766 max mem: 21002 Epoch: [269] [240/312] eta: 0:00:40 lr: 0.000115 min_lr: 0.000115 loss: 3.6893 (3.6472) weight_decay: 0.0500 (0.0500) time: 0.6007 data: 0.1219 max mem: 21002 Epoch: [269] [250/312] eta: 0:00:35 lr: 0.000115 min_lr: 0.000115 loss: 3.6893 (3.6502) weight_decay: 0.0500 (0.0500) time: 0.5048 data: 0.0467 max mem: 21002 Epoch: [269] [260/312] eta: 0:00:29 lr: 0.000114 min_lr: 0.000114 loss: 3.8436 (3.6525) weight_decay: 0.0500 (0.0500) time: 0.6058 data: 0.0402 max mem: 21002 Epoch: [269] [270/312] eta: 0:00:23 lr: 0.000114 min_lr: 0.000114 loss: 3.5132 (3.6358) weight_decay: 0.0500 (0.0500) time: 0.4893 data: 0.0402 max mem: 21002 Epoch: [269] [280/312] eta: 0:00:18 lr: 0.000114 min_lr: 0.000114 loss: 3.3109 (3.6332) weight_decay: 0.0500 (0.0500) time: 0.4926 data: 0.0408 max mem: 21002 Epoch: [269] [290/312] eta: 0:00:12 lr: 0.000114 min_lr: 0.000114 loss: 3.5631 (3.6288) weight_decay: 0.0500 (0.0500) time: 0.5627 data: 0.0445 max mem: 21002 Epoch: [269] [300/312] eta: 0:00:06 lr: 0.000113 min_lr: 0.000113 loss: 3.7309 (3.6300) weight_decay: 0.0500 (0.0500) time: 0.3813 data: 0.0042 max mem: 21002 Epoch: [269] [310/312] eta: 0:00:01 lr: 0.000113 min_lr: 0.000113 loss: 3.8408 (3.6257) weight_decay: 0.0500 (0.0500) time: 0.3022 data: 0.0001 max mem: 21002 Epoch: [269] [311/312] eta: 0:00:00 lr: 0.000113 min_lr: 0.000113 loss: 3.8408 (3.6276) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [269] Total time: 0:02:50 (0.5474 s / it) Averaged stats: lr: 0.000113 min_lr: 0.000113 loss: 3.8408 (3.5778) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.0543 (1.0543) acc1: 81.1198 (81.1198) acc5: 95.7031 (95.7031) time: 8.4674 data: 8.3491 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4124 (1.3008) acc1: 71.8750 (73.3120) acc5: 91.1458 (91.6960) time: 1.0241 data: 0.9278 max mem: 21002 Test: Total time: 0:00:09 (1.0361 s / it) * Acc@1 73.454 Acc@5 91.890 loss 1.308 Accuracy of the model on the 50000 test images: 73.5% Max accuracy: 73.45% Epoch: [270] [ 0/312] eta: 1:00:46 lr: 0.000113 min_lr: 0.000113 loss: 3.3902 (3.3902) weight_decay: 0.0500 (0.0500) time: 11.6865 data: 11.3696 max mem: 21002 Epoch: [270] [ 10/312] eta: 0:07:59 lr: 0.000113 min_lr: 0.000113 loss: 3.6925 (3.6341) weight_decay: 0.0500 (0.0500) time: 1.5866 data: 1.0592 max mem: 21002 Epoch: [270] [ 20/312] eta: 0:05:10 lr: 0.000113 min_lr: 0.000113 loss: 3.7592 (3.7540) weight_decay: 0.0500 (0.0500) time: 0.5305 data: 0.0774 max mem: 21002 Epoch: [270] [ 30/312] eta: 0:03:49 lr: 0.000112 min_lr: 0.000112 loss: 3.8670 (3.7462) weight_decay: 0.0500 (0.0500) time: 0.3897 data: 0.0645 max mem: 21002 Epoch: [270] [ 40/312] eta: 0:03:15 lr: 0.000112 min_lr: 0.000112 loss: 3.7367 (3.7123) weight_decay: 0.0500 (0.0500) time: 0.3565 data: 0.0663 max mem: 21002 Epoch: [270] [ 50/312] eta: 0:03:10 lr: 0.000112 min_lr: 0.000112 loss: 3.8717 (3.7207) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.2544 max mem: 21002 Epoch: [270] [ 60/312] eta: 0:02:45 lr: 0.000112 min_lr: 0.000112 loss: 3.6916 (3.6950) weight_decay: 0.0500 (0.0500) time: 0.5267 data: 0.1896 max mem: 21002 Epoch: [270] [ 70/312] eta: 0:02:37 lr: 0.000112 min_lr: 0.000112 loss: 3.5916 (3.6818) weight_decay: 0.0500 (0.0500) time: 0.4525 data: 0.1026 max mem: 21002 Epoch: [270] [ 80/312] eta: 0:02:26 lr: 0.000111 min_lr: 0.000111 loss: 3.7876 (3.6601) weight_decay: 0.0500 (0.0500) time: 0.5556 data: 0.1869 max mem: 21002 Epoch: [270] [ 90/312] eta: 0:02:16 lr: 0.000111 min_lr: 0.000111 loss: 3.6491 (3.6358) weight_decay: 0.0500 (0.0500) time: 0.4906 data: 0.1228 max mem: 21002 Epoch: [270] [100/312] eta: 0:02:10 lr: 0.000111 min_lr: 0.000111 loss: 3.4453 (3.6193) weight_decay: 0.0500 (0.0500) time: 0.5542 data: 0.1090 max mem: 21002 Epoch: [270] [110/312] eta: 0:01:58 lr: 0.000111 min_lr: 0.000111 loss: 3.4453 (3.6019) weight_decay: 0.0500 (0.0500) time: 0.4578 data: 0.0742 max mem: 21002 Epoch: [270] [120/312] eta: 0:01:54 lr: 0.000110 min_lr: 0.000110 loss: 3.2366 (3.5676) weight_decay: 0.0500 (0.0500) time: 0.5026 data: 0.1292 max mem: 21002 Epoch: [270] [130/312] eta: 0:01:50 lr: 0.000110 min_lr: 0.000110 loss: 3.4559 (3.5766) weight_decay: 0.0500 (0.0500) time: 0.7210 data: 0.2347 max mem: 21002 Epoch: [270] [140/312] eta: 0:01:40 lr: 0.000110 min_lr: 0.000110 loss: 3.8810 (3.6048) weight_decay: 0.0500 (0.0500) time: 0.5103 data: 0.1092 max mem: 21002 Epoch: [270] [150/312] eta: 0:01:35 lr: 0.000110 min_lr: 0.000110 loss: 3.9248 (3.6144) weight_decay: 0.0500 (0.0500) time: 0.4610 data: 0.0508 max mem: 21002 Epoch: [270] [160/312] eta: 0:01:29 lr: 0.000109 min_lr: 0.000109 loss: 3.7759 (3.6254) weight_decay: 0.0500 (0.0500) time: 0.6023 data: 0.0516 max mem: 21002 Epoch: [270] [170/312] eta: 0:01:21 lr: 0.000109 min_lr: 0.000109 loss: 3.5980 (3.6241) weight_decay: 0.0500 (0.0500) time: 0.4909 data: 0.0508 max mem: 21002 Epoch: [270] [180/312] eta: 0:01:17 lr: 0.000109 min_lr: 0.000109 loss: 3.4990 (3.6276) weight_decay: 0.0500 (0.0500) time: 0.5638 data: 0.1125 max mem: 21002 Epoch: [270] [190/312] eta: 0:01:09 lr: 0.000109 min_lr: 0.000109 loss: 3.7611 (3.6300) weight_decay: 0.0500 (0.0500) time: 0.5011 data: 0.0635 max mem: 21002 Epoch: [270] [200/312] eta: 0:01:04 lr: 0.000109 min_lr: 0.000109 loss: 3.6610 (3.6310) weight_decay: 0.0500 (0.0500) time: 0.4835 data: 0.0530 max mem: 21002 Epoch: [270] [210/312] eta: 0:00:59 lr: 0.000108 min_lr: 0.000108 loss: 3.6610 (3.6348) weight_decay: 0.0500 (0.0500) time: 0.6774 data: 0.0989 max mem: 21002 Epoch: [270] [220/312] eta: 0:00:52 lr: 0.000108 min_lr: 0.000108 loss: 3.5337 (3.6295) weight_decay: 0.0500 (0.0500) time: 0.4846 data: 0.0515 max mem: 21002 Epoch: [270] [230/312] eta: 0:00:47 lr: 0.000108 min_lr: 0.000108 loss: 3.4073 (3.6259) weight_decay: 0.0500 (0.0500) time: 0.5142 data: 0.0653 max mem: 21002 Epoch: [270] [240/312] eta: 0:00:41 lr: 0.000108 min_lr: 0.000108 loss: 3.4864 (3.6128) weight_decay: 0.0500 (0.0500) time: 0.6430 data: 0.0614 max mem: 21002 Epoch: [270] [250/312] eta: 0:00:34 lr: 0.000107 min_lr: 0.000107 loss: 3.4562 (3.6131) weight_decay: 0.0500 (0.0500) time: 0.4417 data: 0.0055 max mem: 21002 Epoch: [270] [260/312] eta: 0:00:29 lr: 0.000107 min_lr: 0.000107 loss: 3.7413 (3.6146) weight_decay: 0.0500 (0.0500) time: 0.5075 data: 0.0437 max mem: 21002 Epoch: [270] [270/312] eta: 0:00:23 lr: 0.000107 min_lr: 0.000107 loss: 3.7832 (3.6209) weight_decay: 0.0500 (0.0500) time: 0.4966 data: 0.0397 max mem: 21002 Epoch: [270] [280/312] eta: 0:00:18 lr: 0.000107 min_lr: 0.000107 loss: 3.7431 (3.6182) weight_decay: 0.0500 (0.0500) time: 0.5143 data: 0.0093 max mem: 21002 Epoch: [270] [290/312] eta: 0:00:12 lr: 0.000106 min_lr: 0.000106 loss: 3.6331 (3.6229) weight_decay: 0.0500 (0.0500) time: 0.6247 data: 0.0199 max mem: 21002 Epoch: [270] [300/312] eta: 0:00:06 lr: 0.000106 min_lr: 0.000106 loss: 3.8603 (3.6236) weight_decay: 0.0500 (0.0500) time: 0.4171 data: 0.0248 max mem: 21002 Epoch: [270] [310/312] eta: 0:00:01 lr: 0.000106 min_lr: 0.000106 loss: 3.7463 (3.6268) weight_decay: 0.0500 (0.0500) time: 0.2943 data: 0.0138 max mem: 21002 Epoch: [270] [311/312] eta: 0:00:00 lr: 0.000106 min_lr: 0.000106 loss: 3.7335 (3.6230) weight_decay: 0.0500 (0.0500) time: 0.2942 data: 0.0138 max mem: 21002 Epoch: [270] Total time: 0:02:51 (0.5482 s / it) Averaged stats: lr: 0.000106 min_lr: 0.000106 loss: 3.7335 (3.5974) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.0351 (1.0351) acc1: 81.2500 (81.2500) acc5: 95.4427 (95.4427) time: 8.8071 data: 8.6898 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3834 (1.3025) acc1: 72.1354 (73.4720) acc5: 91.2760 (91.7120) time: 1.0671 data: 0.9656 max mem: 21002 Test: Total time: 0:00:09 (1.0897 s / it) * Acc@1 73.200 Acc@5 91.874 loss 1.307 Accuracy of the model on the 50000 test images: 73.2% Max accuracy: 73.45% Epoch: [271] [ 0/312] eta: 0:59:07 lr: 0.000106 min_lr: 0.000106 loss: 2.3529 (2.3529) weight_decay: 0.0500 (0.0500) time: 11.3692 data: 8.4362 max mem: 21002 Epoch: [271] [ 10/312] eta: 0:07:57 lr: 0.000106 min_lr: 0.000106 loss: 3.3594 (3.3035) weight_decay: 0.0500 (0.0500) time: 1.5816 data: 1.0043 max mem: 21002 Epoch: [271] [ 20/312] eta: 0:05:28 lr: 0.000105 min_lr: 0.000105 loss: 3.6362 (3.5156) weight_decay: 0.0500 (0.0500) time: 0.6135 data: 0.1776 max mem: 21002 Epoch: [271] [ 30/312] eta: 0:04:01 lr: 0.000105 min_lr: 0.000105 loss: 3.8549 (3.6202) weight_decay: 0.0500 (0.0500) time: 0.4589 data: 0.0474 max mem: 21002 Epoch: [271] [ 40/312] eta: 0:03:26 lr: 0.000105 min_lr: 0.000105 loss: 3.9185 (3.6483) weight_decay: 0.0500 (0.0500) time: 0.3765 data: 0.0008 max mem: 21002 Epoch: [271] [ 50/312] eta: 0:03:15 lr: 0.000105 min_lr: 0.000105 loss: 3.7389 (3.6562) weight_decay: 0.0500 (0.0500) time: 0.5751 data: 0.0075 max mem: 21002 Epoch: [271] [ 60/312] eta: 0:02:49 lr: 0.000105 min_lr: 0.000105 loss: 3.6885 (3.6114) weight_decay: 0.0500 (0.0500) time: 0.4985 data: 0.0187 max mem: 21002 Epoch: [271] [ 70/312] eta: 0:02:46 lr: 0.000104 min_lr: 0.000104 loss: 3.6448 (3.6387) weight_decay: 0.0500 (0.0500) time: 0.5331 data: 0.0482 max mem: 21002 Epoch: [271] [ 80/312] eta: 0:02:34 lr: 0.000104 min_lr: 0.000104 loss: 3.6692 (3.6308) weight_decay: 0.0500 (0.0500) time: 0.6442 data: 0.0433 max mem: 21002 Epoch: [271] [ 90/312] eta: 0:02:22 lr: 0.000104 min_lr: 0.000104 loss: 3.7157 (3.6340) weight_decay: 0.0500 (0.0500) time: 0.4928 data: 0.0429 max mem: 21002 Epoch: [271] [100/312] eta: 0:02:16 lr: 0.000104 min_lr: 0.000104 loss: 3.7258 (3.6141) weight_decay: 0.0500 (0.0500) time: 0.5435 data: 0.0525 max mem: 21002 Epoch: [271] [110/312] eta: 0:02:05 lr: 0.000103 min_lr: 0.000103 loss: 3.5534 (3.6105) weight_decay: 0.0500 (0.0500) time: 0.5096 data: 0.0167 max mem: 21002 Epoch: [271] [120/312] eta: 0:01:59 lr: 0.000103 min_lr: 0.000103 loss: 3.8534 (3.6230) weight_decay: 0.0500 (0.0500) time: 0.5185 data: 0.0318 max mem: 21002 Epoch: [271] [130/312] eta: 0:01:52 lr: 0.000103 min_lr: 0.000103 loss: 3.8922 (3.6297) weight_decay: 0.0500 (0.0500) time: 0.6225 data: 0.0366 max mem: 21002 Epoch: [271] [140/312] eta: 0:01:44 lr: 0.000103 min_lr: 0.000103 loss: 3.6690 (3.6247) weight_decay: 0.0500 (0.0500) time: 0.5006 data: 0.0639 max mem: 21002 Epoch: [271] [150/312] eta: 0:01:38 lr: 0.000102 min_lr: 0.000102 loss: 3.5418 (3.6120) weight_decay: 0.0500 (0.0500) time: 0.5126 data: 0.0765 max mem: 21002 Epoch: [271] [160/312] eta: 0:01:31 lr: 0.000102 min_lr: 0.000102 loss: 3.5418 (3.6136) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0181 max mem: 21002 Epoch: [271] [170/312] eta: 0:01:24 lr: 0.000102 min_lr: 0.000102 loss: 3.6361 (3.6173) weight_decay: 0.0500 (0.0500) time: 0.5293 data: 0.0773 max mem: 21002 Epoch: [271] [180/312] eta: 0:01:17 lr: 0.000102 min_lr: 0.000102 loss: 3.4565 (3.6026) weight_decay: 0.0500 (0.0500) time: 0.4904 data: 0.0941 max mem: 21002 Epoch: [271] [190/312] eta: 0:01:10 lr: 0.000102 min_lr: 0.000102 loss: 3.3657 (3.5959) weight_decay: 0.0500 (0.0500) time: 0.3886 data: 0.0179 max mem: 21002 Epoch: [271] [200/312] eta: 0:01:05 lr: 0.000101 min_lr: 0.000101 loss: 3.6952 (3.6013) weight_decay: 0.0500 (0.0500) time: 0.4978 data: 0.0186 max mem: 21002 Epoch: [271] [210/312] eta: 0:00:59 lr: 0.000101 min_lr: 0.000101 loss: 3.8338 (3.6007) weight_decay: 0.0500 (0.0500) time: 0.6683 data: 0.0229 max mem: 21002 Epoch: [271] [220/312] eta: 0:00:52 lr: 0.000101 min_lr: 0.000101 loss: 3.8338 (3.6115) weight_decay: 0.0500 (0.0500) time: 0.4904 data: 0.0220 max mem: 21002 Epoch: [271] [230/312] eta: 0:00:47 lr: 0.000101 min_lr: 0.000101 loss: 3.6898 (3.6053) weight_decay: 0.0500 (0.0500) time: 0.5227 data: 0.0645 max mem: 21002 Epoch: [271] [240/312] eta: 0:00:41 lr: 0.000100 min_lr: 0.000100 loss: 3.3987 (3.5982) weight_decay: 0.0500 (0.0500) time: 0.6440 data: 0.0483 max mem: 21002 Epoch: [271] [250/312] eta: 0:00:35 lr: 0.000100 min_lr: 0.000100 loss: 3.7428 (3.6017) weight_decay: 0.0500 (0.0500) time: 0.4855 data: 0.0492 max mem: 21002 Epoch: [271] [260/312] eta: 0:00:29 lr: 0.000100 min_lr: 0.000100 loss: 3.8300 (3.6167) weight_decay: 0.0500 (0.0500) time: 0.5220 data: 0.0936 max mem: 21002 Epoch: [271] [270/312] eta: 0:00:23 lr: 0.000100 min_lr: 0.000100 loss: 3.7919 (3.6153) weight_decay: 0.0500 (0.0500) time: 0.4789 data: 0.0454 max mem: 21002 Epoch: [271] [280/312] eta: 0:00:18 lr: 0.000100 min_lr: 0.000100 loss: 3.7471 (3.6232) weight_decay: 0.0500 (0.0500) time: 0.4913 data: 0.0476 max mem: 21002 Epoch: [271] [290/312] eta: 0:00:12 lr: 0.000099 min_lr: 0.000099 loss: 3.8128 (3.6281) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0683 max mem: 21002 Epoch: [271] [300/312] eta: 0:00:06 lr: 0.000099 min_lr: 0.000099 loss: 3.7565 (3.6248) weight_decay: 0.0500 (0.0500) time: 0.3864 data: 0.0211 max mem: 21002 Epoch: [271] [310/312] eta: 0:00:01 lr: 0.000099 min_lr: 0.000099 loss: 3.7379 (3.6250) weight_decay: 0.0500 (0.0500) time: 0.2777 data: 0.0001 max mem: 21002 Epoch: [271] [311/312] eta: 0:00:00 lr: 0.000099 min_lr: 0.000099 loss: 3.7379 (3.6211) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [271] Total time: 0:02:51 (0.5506 s / it) Averaged stats: lr: 0.000099 min_lr: 0.000099 loss: 3.7379 (3.5714) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:13 loss: 1.0202 (1.0202) acc1: 81.1198 (81.1198) acc5: 95.4427 (95.4427) time: 8.1536 data: 8.0362 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3671 (1.2670) acc1: 73.4375 (73.9360) acc5: 91.2760 (91.8720) time: 1.0404 data: 0.9419 max mem: 21002 Test: Total time: 0:00:09 (1.0542 s / it) * Acc@1 73.620 Acc@5 92.002 loss 1.274 Accuracy of the model on the 50000 test images: 73.6% Max accuracy: 73.62% Epoch: [272] [ 0/312] eta: 0:57:40 lr: 0.000099 min_lr: 0.000099 loss: 3.3520 (3.3520) weight_decay: 0.0500 (0.0500) time: 11.0903 data: 10.2575 max mem: 21002 Epoch: [272] [ 10/312] eta: 0:07:00 lr: 0.000099 min_lr: 0.000099 loss: 3.8114 (3.8552) weight_decay: 0.0500 (0.0500) time: 1.3927 data: 1.0477 max mem: 21002 Epoch: [272] [ 20/312] eta: 0:05:02 lr: 0.000098 min_lr: 0.000098 loss: 3.8720 (3.8017) weight_decay: 0.0500 (0.0500) time: 0.5326 data: 0.2319 max mem: 21002 Epoch: [272] [ 30/312] eta: 0:03:44 lr: 0.000098 min_lr: 0.000098 loss: 3.5477 (3.7020) weight_decay: 0.0500 (0.0500) time: 0.4675 data: 0.1689 max mem: 21002 Epoch: [272] [ 40/312] eta: 0:03:26 lr: 0.000098 min_lr: 0.000098 loss: 3.4832 (3.6559) weight_decay: 0.0500 (0.0500) time: 0.4661 data: 0.1784 max mem: 21002 Epoch: [272] [ 50/312] eta: 0:03:15 lr: 0.000098 min_lr: 0.000098 loss: 3.6691 (3.6598) weight_decay: 0.0500 (0.0500) time: 0.6706 data: 0.3351 max mem: 21002 Epoch: [272] [ 60/312] eta: 0:02:50 lr: 0.000098 min_lr: 0.000098 loss: 3.7768 (3.6636) weight_decay: 0.0500 (0.0500) time: 0.5086 data: 0.1728 max mem: 21002 Epoch: [272] [ 70/312] eta: 0:02:44 lr: 0.000097 min_lr: 0.000097 loss: 3.5571 (3.6152) weight_decay: 0.0500 (0.0500) time: 0.5029 data: 0.1558 max mem: 21002 Epoch: [272] [ 80/312] eta: 0:02:33 lr: 0.000097 min_lr: 0.000097 loss: 3.2743 (3.5863) weight_decay: 0.0500 (0.0500) time: 0.6128 data: 0.2380 max mem: 21002 Epoch: [272] [ 90/312] eta: 0:02:20 lr: 0.000097 min_lr: 0.000097 loss: 3.2743 (3.5566) weight_decay: 0.0500 (0.0500) time: 0.4700 data: 0.1392 max mem: 21002 Epoch: [272] [100/312] eta: 0:02:16 lr: 0.000097 min_lr: 0.000097 loss: 3.2780 (3.5247) weight_decay: 0.0500 (0.0500) time: 0.5722 data: 0.2016 max mem: 21002 Epoch: [272] [110/312] eta: 0:02:03 lr: 0.000096 min_lr: 0.000096 loss: 3.7119 (3.5557) weight_decay: 0.0500 (0.0500) time: 0.5156 data: 0.1605 max mem: 21002 Epoch: [272] [120/312] eta: 0:01:58 lr: 0.000096 min_lr: 0.000096 loss: 3.8299 (3.5784) weight_decay: 0.0500 (0.0500) time: 0.4785 data: 0.1166 max mem: 21002 Epoch: [272] [130/312] eta: 0:01:53 lr: 0.000096 min_lr: 0.000096 loss: 3.8185 (3.5831) weight_decay: 0.0500 (0.0500) time: 0.6755 data: 0.2051 max mem: 21002 Epoch: [272] [140/312] eta: 0:01:43 lr: 0.000096 min_lr: 0.000096 loss: 3.6550 (3.5753) weight_decay: 0.0500 (0.0500) time: 0.5107 data: 0.1144 max mem: 21002 Epoch: [272] [150/312] eta: 0:01:36 lr: 0.000096 min_lr: 0.000096 loss: 3.6550 (3.5692) weight_decay: 0.0500 (0.0500) time: 0.4265 data: 0.1400 max mem: 21002 Epoch: [272] [160/312] eta: 0:01:30 lr: 0.000095 min_lr: 0.000095 loss: 3.5166 (3.5655) weight_decay: 0.0500 (0.0500) time: 0.5616 data: 0.2712 max mem: 21002 Epoch: [272] [170/312] eta: 0:01:22 lr: 0.000095 min_lr: 0.000095 loss: 3.5166 (3.5594) weight_decay: 0.0500 (0.0500) time: 0.4457 data: 0.1571 max mem: 21002 Epoch: [272] [180/312] eta: 0:01:17 lr: 0.000095 min_lr: 0.000095 loss: 3.8815 (3.5770) weight_decay: 0.0500 (0.0500) time: 0.5004 data: 0.1742 max mem: 21002 Epoch: [272] [190/312] eta: 0:01:09 lr: 0.000095 min_lr: 0.000095 loss: 3.8416 (3.5748) weight_decay: 0.0500 (0.0500) time: 0.5015 data: 0.1742 max mem: 21002 Epoch: [272] [200/312] eta: 0:01:04 lr: 0.000094 min_lr: 0.000094 loss: 3.5034 (3.5620) weight_decay: 0.0500 (0.0500) time: 0.5261 data: 0.1453 max mem: 21002 Epoch: [272] [210/312] eta: 0:00:59 lr: 0.000094 min_lr: 0.000094 loss: 3.2230 (3.5553) weight_decay: 0.0500 (0.0500) time: 0.6866 data: 0.2206 max mem: 21002 Epoch: [272] [220/312] eta: 0:00:52 lr: 0.000094 min_lr: 0.000094 loss: 3.6349 (3.5634) weight_decay: 0.0500 (0.0500) time: 0.4782 data: 0.1028 max mem: 21002 Epoch: [272] [230/312] eta: 0:00:47 lr: 0.000094 min_lr: 0.000094 loss: 3.6349 (3.5632) weight_decay: 0.0500 (0.0500) time: 0.5318 data: 0.1198 max mem: 21002 Epoch: [272] [240/312] eta: 0:00:41 lr: 0.000094 min_lr: 0.000094 loss: 3.4475 (3.5650) weight_decay: 0.0500 (0.0500) time: 0.6242 data: 0.1102 max mem: 21002 Epoch: [272] [250/312] eta: 0:00:35 lr: 0.000093 min_lr: 0.000093 loss: 3.7223 (3.5741) weight_decay: 0.0500 (0.0500) time: 0.5731 data: 0.0863 max mem: 21002 Epoch: [272] [260/312] eta: 0:00:29 lr: 0.000093 min_lr: 0.000093 loss: 3.7368 (3.5734) weight_decay: 0.0500 (0.0500) time: 0.5361 data: 0.0859 max mem: 21002 Epoch: [272] [270/312] eta: 0:00:23 lr: 0.000093 min_lr: 0.000093 loss: 3.7684 (3.5709) weight_decay: 0.0500 (0.0500) time: 0.3720 data: 0.0174 max mem: 21002 Epoch: [272] [280/312] eta: 0:00:18 lr: 0.000093 min_lr: 0.000093 loss: 3.7293 (3.5707) weight_decay: 0.0500 (0.0500) time: 0.5185 data: 0.0173 max mem: 21002 Epoch: [272] [290/312] eta: 0:00:12 lr: 0.000093 min_lr: 0.000093 loss: 3.5957 (3.5659) weight_decay: 0.0500 (0.0500) time: 0.6092 data: 0.0354 max mem: 21002 Epoch: [272] [300/312] eta: 0:00:06 lr: 0.000092 min_lr: 0.000092 loss: 3.5957 (3.5656) weight_decay: 0.0500 (0.0500) time: 0.3726 data: 0.0185 max mem: 21002 Epoch: [272] [310/312] eta: 0:00:01 lr: 0.000092 min_lr: 0.000092 loss: 3.8155 (3.5674) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [272] [311/312] eta: 0:00:00 lr: 0.000092 min_lr: 0.000092 loss: 3.8155 (3.5682) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [272] Total time: 0:02:51 (0.5489 s / it) Averaged stats: lr: 0.000092 min_lr: 0.000092 loss: 3.8155 (3.5982) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.0507 (1.0507) acc1: 80.9896 (80.9896) acc5: 95.9635 (95.9635) time: 8.7468 data: 8.6281 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3920 (1.2974) acc1: 72.6562 (73.9040) acc5: 91.4062 (92.0800) time: 1.0579 data: 0.9587 max mem: 21002 Test: Total time: 0:00:09 (1.0894 s / it) * Acc@1 73.558 Acc@5 91.952 loss 1.303 Accuracy of the model on the 50000 test images: 73.6% Max accuracy: 73.62% Epoch: [273] [ 0/312] eta: 0:58:04 lr: 0.000092 min_lr: 0.000092 loss: 2.5531 (2.5531) weight_decay: 0.0500 (0.0500) time: 11.1680 data: 9.1910 max mem: 21002 Epoch: [273] [ 10/312] eta: 0:07:52 lr: 0.000092 min_lr: 0.000092 loss: 3.1952 (3.2337) weight_decay: 0.0500 (0.0500) time: 1.5653 data: 0.9930 max mem: 21002 Epoch: [273] [ 20/312] eta: 0:05:07 lr: 0.000092 min_lr: 0.000092 loss: 3.4745 (3.3875) weight_decay: 0.0500 (0.0500) time: 0.5466 data: 0.0870 max mem: 21002 Epoch: [273] [ 30/312] eta: 0:03:47 lr: 0.000091 min_lr: 0.000091 loss: 3.6604 (3.4974) weight_decay: 0.0500 (0.0500) time: 0.3919 data: 0.0059 max mem: 21002 Epoch: [273] [ 40/312] eta: 0:03:24 lr: 0.000091 min_lr: 0.000091 loss: 3.8706 (3.5433) weight_decay: 0.0500 (0.0500) time: 0.4336 data: 0.0623 max mem: 21002 Epoch: [273] [ 50/312] eta: 0:03:12 lr: 0.000091 min_lr: 0.000091 loss: 3.6635 (3.5402) weight_decay: 0.0500 (0.0500) time: 0.6172 data: 0.1856 max mem: 21002 Epoch: [273] [ 60/312] eta: 0:02:46 lr: 0.000091 min_lr: 0.000091 loss: 3.5761 (3.5170) weight_decay: 0.0500 (0.0500) time: 0.4744 data: 0.1293 max mem: 21002 Epoch: [273] [ 70/312] eta: 0:02:41 lr: 0.000091 min_lr: 0.000091 loss: 3.7336 (3.5321) weight_decay: 0.0500 (0.0500) time: 0.4949 data: 0.1016 max mem: 21002 Epoch: [273] [ 80/312] eta: 0:02:34 lr: 0.000090 min_lr: 0.000090 loss: 3.7877 (3.5493) weight_decay: 0.0500 (0.0500) time: 0.6763 data: 0.1732 max mem: 21002 Epoch: [273] [ 90/312] eta: 0:02:18 lr: 0.000090 min_lr: 0.000090 loss: 3.8722 (3.5616) weight_decay: 0.0500 (0.0500) time: 0.4814 data: 0.0779 max mem: 21002 Epoch: [273] [100/312] eta: 0:02:13 lr: 0.000090 min_lr: 0.000090 loss: 3.8722 (3.5797) weight_decay: 0.0500 (0.0500) time: 0.4944 data: 0.0652 max mem: 21002 Epoch: [273] [110/312] eta: 0:02:01 lr: 0.000090 min_lr: 0.000090 loss: 3.7060 (3.5641) weight_decay: 0.0500 (0.0500) time: 0.4850 data: 0.0649 max mem: 21002 Epoch: [273] [120/312] eta: 0:01:56 lr: 0.000089 min_lr: 0.000089 loss: 3.5591 (3.5521) weight_decay: 0.0500 (0.0500) time: 0.4943 data: 0.0294 max mem: 21002 Epoch: [273] [130/312] eta: 0:01:51 lr: 0.000089 min_lr: 0.000089 loss: 3.5302 (3.5613) weight_decay: 0.0500 (0.0500) time: 0.6954 data: 0.0315 max mem: 21002 Epoch: [273] [140/312] eta: 0:01:41 lr: 0.000089 min_lr: 0.000089 loss: 3.5416 (3.5561) weight_decay: 0.0500 (0.0500) time: 0.4963 data: 0.0135 max mem: 21002 Epoch: [273] [150/312] eta: 0:01:37 lr: 0.000089 min_lr: 0.000089 loss: 3.5846 (3.5602) weight_decay: 0.0500 (0.0500) time: 0.5089 data: 0.0297 max mem: 21002 Epoch: [273] [160/312] eta: 0:01:31 lr: 0.000089 min_lr: 0.000089 loss: 3.6440 (3.5564) weight_decay: 0.0500 (0.0500) time: 0.6814 data: 0.0435 max mem: 21002 Epoch: [273] [170/312] eta: 0:01:24 lr: 0.000088 min_lr: 0.000088 loss: 3.4204 (3.5506) weight_decay: 0.0500 (0.0500) time: 0.5281 data: 0.0504 max mem: 21002 Epoch: [273] [180/312] eta: 0:01:18 lr: 0.000088 min_lr: 0.000088 loss: 3.6235 (3.5614) weight_decay: 0.0500 (0.0500) time: 0.5436 data: 0.0654 max mem: 21002 Epoch: [273] [190/312] eta: 0:01:10 lr: 0.000088 min_lr: 0.000088 loss: 3.6235 (3.5507) weight_decay: 0.0500 (0.0500) time: 0.4850 data: 0.0358 max mem: 21002 Epoch: [273] [200/312] eta: 0:01:05 lr: 0.000088 min_lr: 0.000088 loss: 3.4651 (3.5594) weight_decay: 0.0500 (0.0500) time: 0.4796 data: 0.0093 max mem: 21002 Epoch: [273] [210/312] eta: 0:00:59 lr: 0.000088 min_lr: 0.000088 loss: 3.8447 (3.5664) weight_decay: 0.0500 (0.0500) time: 0.6502 data: 0.0154 max mem: 21002 Epoch: [273] [220/312] eta: 0:00:53 lr: 0.000087 min_lr: 0.000087 loss: 3.5337 (3.5469) weight_decay: 0.0500 (0.0500) time: 0.4959 data: 0.0417 max mem: 21002 Epoch: [273] [230/312] eta: 0:00:47 lr: 0.000087 min_lr: 0.000087 loss: 3.3498 (3.5488) weight_decay: 0.0500 (0.0500) time: 0.5364 data: 0.0810 max mem: 21002 Epoch: [273] [240/312] eta: 0:00:41 lr: 0.000087 min_lr: 0.000087 loss: 3.7171 (3.5512) weight_decay: 0.0500 (0.0500) time: 0.6405 data: 0.0470 max mem: 21002 Epoch: [273] [250/312] eta: 0:00:35 lr: 0.000087 min_lr: 0.000087 loss: 3.7158 (3.5578) weight_decay: 0.0500 (0.0500) time: 0.4721 data: 0.0241 max mem: 21002 Epoch: [273] [260/312] eta: 0:00:29 lr: 0.000087 min_lr: 0.000087 loss: 3.6548 (3.5510) weight_decay: 0.0500 (0.0500) time: 0.4782 data: 0.0403 max mem: 21002 Epoch: [273] [270/312] eta: 0:00:23 lr: 0.000086 min_lr: 0.000086 loss: 3.6907 (3.5524) weight_decay: 0.0500 (0.0500) time: 0.4503 data: 0.0365 max mem: 21002 Epoch: [273] [280/312] eta: 0:00:18 lr: 0.000086 min_lr: 0.000086 loss: 3.4963 (3.5497) weight_decay: 0.0500 (0.0500) time: 0.5228 data: 0.0681 max mem: 21002 Epoch: [273] [290/312] eta: 0:00:12 lr: 0.000086 min_lr: 0.000086 loss: 3.4504 (3.5514) weight_decay: 0.0500 (0.0500) time: 0.5944 data: 0.0634 max mem: 21002 Epoch: [273] [300/312] eta: 0:00:06 lr: 0.000086 min_lr: 0.000086 loss: 3.7381 (3.5455) weight_decay: 0.0500 (0.0500) time: 0.3751 data: 0.0154 max mem: 21002 Epoch: [273] [310/312] eta: 0:00:01 lr: 0.000086 min_lr: 0.000086 loss: 3.5818 (3.5423) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [273] [311/312] eta: 0:00:00 lr: 0.000085 min_lr: 0.000085 loss: 3.3585 (3.5393) weight_decay: 0.0500 (0.0500) time: 0.2773 data: 0.0001 max mem: 21002 Epoch: [273] Total time: 0:02:52 (0.5514 s / it) Averaged stats: lr: 0.000085 min_lr: 0.000085 loss: 3.3585 (3.5915) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.0087 (1.0087) acc1: 81.6406 (81.6406) acc5: 96.2240 (96.2240) time: 8.8532 data: 8.7347 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3499 (1.2584) acc1: 72.1354 (73.7120) acc5: 91.5365 (92.0000) time: 1.0687 data: 0.9706 max mem: 21002 Test: Total time: 0:00:09 (1.0827 s / it) * Acc@1 73.692 Acc@5 92.050 loss 1.261 Accuracy of the model on the 50000 test images: 73.7% Max accuracy: 73.69% Epoch: [274] [ 0/312] eta: 1:04:40 lr: 0.000085 min_lr: 0.000085 loss: 4.2487 (4.2487) weight_decay: 0.0500 (0.0500) time: 12.4381 data: 8.7398 max mem: 21002 Epoch: [274] [ 10/312] eta: 0:08:32 lr: 0.000085 min_lr: 0.000085 loss: 3.9121 (3.7245) weight_decay: 0.0500 (0.0500) time: 1.6969 data: 1.0796 max mem: 21002 Epoch: [274] [ 20/312] eta: 0:05:09 lr: 0.000085 min_lr: 0.000085 loss: 3.7268 (3.6307) weight_decay: 0.0500 (0.0500) time: 0.4897 data: 0.1573 max mem: 21002 Epoch: [274] [ 30/312] eta: 0:03:48 lr: 0.000085 min_lr: 0.000085 loss: 3.4955 (3.5972) weight_decay: 0.0500 (0.0500) time: 0.3234 data: 0.0010 max mem: 21002 Epoch: [274] [ 40/312] eta: 0:03:21 lr: 0.000085 min_lr: 0.000085 loss: 3.6564 (3.6445) weight_decay: 0.0500 (0.0500) time: 0.4061 data: 0.0767 max mem: 21002 Epoch: [274] [ 50/312] eta: 0:03:10 lr: 0.000084 min_lr: 0.000084 loss: 3.8311 (3.6344) weight_decay: 0.0500 (0.0500) time: 0.5997 data: 0.1887 max mem: 21002 Epoch: [274] [ 60/312] eta: 0:02:46 lr: 0.000084 min_lr: 0.000084 loss: 3.8540 (3.6462) weight_decay: 0.0500 (0.0500) time: 0.4947 data: 0.1230 max mem: 21002 Epoch: [274] [ 70/312] eta: 0:02:37 lr: 0.000084 min_lr: 0.000084 loss: 3.8540 (3.6415) weight_decay: 0.0500 (0.0500) time: 0.4587 data: 0.0963 max mem: 21002 Epoch: [274] [ 80/312] eta: 0:02:29 lr: 0.000084 min_lr: 0.000084 loss: 3.8069 (3.6543) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.1477 max mem: 21002 Epoch: [274] [ 90/312] eta: 0:02:16 lr: 0.000084 min_lr: 0.000084 loss: 3.8495 (3.6777) weight_decay: 0.0500 (0.0500) time: 0.4885 data: 0.1205 max mem: 21002 Epoch: [274] [100/312] eta: 0:02:10 lr: 0.000083 min_lr: 0.000083 loss: 3.9220 (3.6844) weight_decay: 0.0500 (0.0500) time: 0.5130 data: 0.1861 max mem: 21002 Epoch: [274] [110/312] eta: 0:01:59 lr: 0.000083 min_lr: 0.000083 loss: 3.8825 (3.6890) weight_decay: 0.0500 (0.0500) time: 0.4812 data: 0.1494 max mem: 21002 Epoch: [274] [120/312] eta: 0:01:56 lr: 0.000083 min_lr: 0.000083 loss: 3.6642 (3.6734) weight_decay: 0.0500 (0.0500) time: 0.5411 data: 0.1521 max mem: 21002 Epoch: [274] [130/312] eta: 0:01:50 lr: 0.000083 min_lr: 0.000083 loss: 3.5529 (3.6514) weight_decay: 0.0500 (0.0500) time: 0.6848 data: 0.2163 max mem: 21002 Epoch: [274] [140/312] eta: 0:01:41 lr: 0.000083 min_lr: 0.000083 loss: 3.4497 (3.6297) weight_decay: 0.0500 (0.0500) time: 0.4873 data: 0.1185 max mem: 21002 Epoch: [274] [150/312] eta: 0:01:35 lr: 0.000082 min_lr: 0.000082 loss: 3.4511 (3.6277) weight_decay: 0.0500 (0.0500) time: 0.4789 data: 0.1165 max mem: 21002 Epoch: [274] [160/312] eta: 0:01:28 lr: 0.000082 min_lr: 0.000082 loss: 3.7196 (3.6303) weight_decay: 0.0500 (0.0500) time: 0.5609 data: 0.1599 max mem: 21002 Epoch: [274] [170/312] eta: 0:01:21 lr: 0.000082 min_lr: 0.000082 loss: 3.6400 (3.6185) weight_decay: 0.0500 (0.0500) time: 0.4882 data: 0.1313 max mem: 21002 Epoch: [274] [180/312] eta: 0:01:17 lr: 0.000082 min_lr: 0.000082 loss: 3.6820 (3.6219) weight_decay: 0.0500 (0.0500) time: 0.5785 data: 0.1756 max mem: 21002 Epoch: [274] [190/312] eta: 0:01:09 lr: 0.000082 min_lr: 0.000082 loss: 3.5680 (3.6132) weight_decay: 0.0500 (0.0500) time: 0.4975 data: 0.1295 max mem: 21002 Epoch: [274] [200/312] eta: 0:01:04 lr: 0.000081 min_lr: 0.000081 loss: 3.7111 (3.6213) weight_decay: 0.0500 (0.0500) time: 0.4873 data: 0.1419 max mem: 21002 Epoch: [274] [210/312] eta: 0:00:59 lr: 0.000081 min_lr: 0.000081 loss: 3.7863 (3.6243) weight_decay: 0.0500 (0.0500) time: 0.7083 data: 0.2693 max mem: 21002 Epoch: [274] [220/312] eta: 0:00:52 lr: 0.000081 min_lr: 0.000081 loss: 3.5270 (3.6137) weight_decay: 0.0500 (0.0500) time: 0.5375 data: 0.1564 max mem: 21002 Epoch: [274] [230/312] eta: 0:00:47 lr: 0.000081 min_lr: 0.000081 loss: 3.3845 (3.6114) weight_decay: 0.0500 (0.0500) time: 0.4867 data: 0.1133 max mem: 21002 Epoch: [274] [240/312] eta: 0:00:41 lr: 0.000081 min_lr: 0.000081 loss: 3.7130 (3.6139) weight_decay: 0.0500 (0.0500) time: 0.6309 data: 0.2149 max mem: 21002 Epoch: [274] [250/312] eta: 0:00:35 lr: 0.000080 min_lr: 0.000080 loss: 3.7347 (3.6114) weight_decay: 0.0500 (0.0500) time: 0.4930 data: 0.1214 max mem: 21002 Epoch: [274] [260/312] eta: 0:00:29 lr: 0.000080 min_lr: 0.000080 loss: 3.3360 (3.5987) weight_decay: 0.0500 (0.0500) time: 0.5155 data: 0.1161 max mem: 21002 Epoch: [274] [270/312] eta: 0:00:23 lr: 0.000080 min_lr: 0.000080 loss: 3.5155 (3.6015) weight_decay: 0.0500 (0.0500) time: 0.4781 data: 0.1169 max mem: 21002 Epoch: [274] [280/312] eta: 0:00:18 lr: 0.000080 min_lr: 0.000080 loss: 3.8285 (3.6027) weight_decay: 0.0500 (0.0500) time: 0.4846 data: 0.1157 max mem: 21002 Epoch: [274] [290/312] eta: 0:00:12 lr: 0.000080 min_lr: 0.000080 loss: 3.6660 (3.6031) weight_decay: 0.0500 (0.0500) time: 0.6407 data: 0.2263 max mem: 21002 Epoch: [274] [300/312] eta: 0:00:06 lr: 0.000079 min_lr: 0.000079 loss: 3.5425 (3.5960) weight_decay: 0.0500 (0.0500) time: 0.4418 data: 0.1118 max mem: 21002 Epoch: [274] [310/312] eta: 0:00:01 lr: 0.000079 min_lr: 0.000079 loss: 3.7263 (3.6020) weight_decay: 0.0500 (0.0500) time: 0.3046 data: 0.0102 max mem: 21002 Epoch: [274] [311/312] eta: 0:00:00 lr: 0.000079 min_lr: 0.000079 loss: 3.7254 (3.5999) weight_decay: 0.0500 (0.0500) time: 0.3045 data: 0.0102 max mem: 21002 Epoch: [274] Total time: 0:02:52 (0.5526 s / it) Averaged stats: lr: 0.000079 min_lr: 0.000079 loss: 3.7254 (3.5718) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.0535 (1.0535) acc1: 80.3385 (80.3385) acc5: 95.0521 (95.0521) time: 8.4903 data: 8.3722 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3928 (1.3019) acc1: 72.5260 (73.4240) acc5: 91.0156 (91.5520) time: 1.0298 data: 0.9303 max mem: 21002 Test: Total time: 0:00:09 (1.0428 s / it) * Acc@1 73.536 Acc@5 91.844 loss 1.308 Accuracy of the model on the 50000 test images: 73.5% Max accuracy: 73.69% Epoch: [275] [ 0/312] eta: 1:09:15 lr: 0.000079 min_lr: 0.000079 loss: 2.9091 (2.9091) weight_decay: 0.0500 (0.0500) time: 13.3183 data: 9.2037 max mem: 21002 Epoch: [275] [ 10/312] eta: 0:08:48 lr: 0.000079 min_lr: 0.000079 loss: 3.8147 (3.8857) weight_decay: 0.0500 (0.0500) time: 1.7496 data: 1.0506 max mem: 21002 Epoch: [275] [ 20/312] eta: 0:05:23 lr: 0.000079 min_lr: 0.000079 loss: 3.6633 (3.5808) weight_decay: 0.0500 (0.0500) time: 0.4975 data: 0.1296 max mem: 21002 Epoch: [275] [ 30/312] eta: 0:03:58 lr: 0.000079 min_lr: 0.000079 loss: 3.4891 (3.5794) weight_decay: 0.0500 (0.0500) time: 0.3474 data: 0.0127 max mem: 21002 Epoch: [275] [ 40/312] eta: 0:03:13 lr: 0.000078 min_lr: 0.000078 loss: 3.5190 (3.5647) weight_decay: 0.0500 (0.0500) time: 0.2922 data: 0.0020 max mem: 21002 Epoch: [275] [ 50/312] eta: 0:03:03 lr: 0.000078 min_lr: 0.000078 loss: 3.5190 (3.5656) weight_decay: 0.0500 (0.0500) time: 0.4729 data: 0.0105 max mem: 21002 Epoch: [275] [ 60/312] eta: 0:02:43 lr: 0.000078 min_lr: 0.000078 loss: 3.4303 (3.5113) weight_decay: 0.0500 (0.0500) time: 0.5263 data: 0.0658 max mem: 21002 Epoch: [275] [ 70/312] eta: 0:02:36 lr: 0.000078 min_lr: 0.000078 loss: 3.6753 (3.5617) weight_decay: 0.0500 (0.0500) time: 0.5060 data: 0.0929 max mem: 21002 Epoch: [275] [ 80/312] eta: 0:02:30 lr: 0.000078 min_lr: 0.000078 loss: 3.8081 (3.5524) weight_decay: 0.0500 (0.0500) time: 0.6492 data: 0.0873 max mem: 21002 Epoch: [275] [ 90/312] eta: 0:02:18 lr: 0.000077 min_lr: 0.000077 loss: 3.7789 (3.5611) weight_decay: 0.0500 (0.0500) time: 0.5462 data: 0.1127 max mem: 21002 Epoch: [275] [100/312] eta: 0:02:13 lr: 0.000077 min_lr: 0.000077 loss: 3.6976 (3.5749) weight_decay: 0.0500 (0.0500) time: 0.5476 data: 0.1152 max mem: 21002 Epoch: [275] [110/312] eta: 0:02:01 lr: 0.000077 min_lr: 0.000077 loss: 3.5984 (3.5651) weight_decay: 0.0500 (0.0500) time: 0.5042 data: 0.0720 max mem: 21002 Epoch: [275] [120/312] eta: 0:01:55 lr: 0.000077 min_lr: 0.000077 loss: 3.5499 (3.5531) weight_decay: 0.0500 (0.0500) time: 0.4635 data: 0.0480 max mem: 21002 Epoch: [275] [130/312] eta: 0:01:50 lr: 0.000077 min_lr: 0.000077 loss: 3.6477 (3.5576) weight_decay: 0.0500 (0.0500) time: 0.6392 data: 0.0826 max mem: 21002 Epoch: [275] [140/312] eta: 0:01:41 lr: 0.000076 min_lr: 0.000076 loss: 3.4478 (3.5455) weight_decay: 0.0500 (0.0500) time: 0.5207 data: 0.0996 max mem: 21002 Epoch: [275] [150/312] eta: 0:01:36 lr: 0.000076 min_lr: 0.000076 loss: 3.6779 (3.5643) weight_decay: 0.0500 (0.0500) time: 0.5200 data: 0.1018 max mem: 21002 Epoch: [275] [160/312] eta: 0:01:30 lr: 0.000076 min_lr: 0.000076 loss: 3.7341 (3.5551) weight_decay: 0.0500 (0.0500) time: 0.6425 data: 0.1061 max mem: 21002 Epoch: [275] [170/312] eta: 0:01:22 lr: 0.000076 min_lr: 0.000076 loss: 3.6472 (3.5534) weight_decay: 0.0500 (0.0500) time: 0.4906 data: 0.0910 max mem: 21002 Epoch: [275] [180/312] eta: 0:01:17 lr: 0.000076 min_lr: 0.000076 loss: 3.8077 (3.5693) weight_decay: 0.0500 (0.0500) time: 0.5177 data: 0.0942 max mem: 21002 Epoch: [275] [190/312] eta: 0:01:10 lr: 0.000075 min_lr: 0.000075 loss: 3.7524 (3.5777) weight_decay: 0.0500 (0.0500) time: 0.4980 data: 0.0755 max mem: 21002 Epoch: [275] [200/312] eta: 0:01:04 lr: 0.000075 min_lr: 0.000075 loss: 3.6552 (3.5824) weight_decay: 0.0500 (0.0500) time: 0.4745 data: 0.0493 max mem: 21002 Epoch: [275] [210/312] eta: 0:00:59 lr: 0.000075 min_lr: 0.000075 loss: 3.4174 (3.5712) weight_decay: 0.0500 (0.0500) time: 0.6432 data: 0.0894 max mem: 21002 Epoch: [275] [220/312] eta: 0:00:52 lr: 0.000075 min_lr: 0.000075 loss: 3.4174 (3.5674) weight_decay: 0.0500 (0.0500) time: 0.5486 data: 0.1157 max mem: 21002 Epoch: [275] [230/312] eta: 0:00:47 lr: 0.000075 min_lr: 0.000075 loss: 3.6752 (3.5725) weight_decay: 0.0500 (0.0500) time: 0.5090 data: 0.0930 max mem: 21002 Epoch: [275] [240/312] eta: 0:00:41 lr: 0.000074 min_lr: 0.000074 loss: 3.6752 (3.5680) weight_decay: 0.0500 (0.0500) time: 0.6060 data: 0.0893 max mem: 21002 Epoch: [275] [250/312] eta: 0:00:35 lr: 0.000074 min_lr: 0.000074 loss: 3.5154 (3.5695) weight_decay: 0.0500 (0.0500) time: 0.5588 data: 0.1063 max mem: 21002 Epoch: [275] [260/312] eta: 0:00:29 lr: 0.000074 min_lr: 0.000074 loss: 3.6210 (3.5645) weight_decay: 0.0500 (0.0500) time: 0.4934 data: 0.0844 max mem: 21002 Epoch: [275] [270/312] eta: 0:00:23 lr: 0.000074 min_lr: 0.000074 loss: 3.4873 (3.5527) weight_decay: 0.0500 (0.0500) time: 0.4678 data: 0.0704 max mem: 21002 Epoch: [275] [280/312] eta: 0:00:18 lr: 0.000074 min_lr: 0.000074 loss: 3.5675 (3.5608) weight_decay: 0.0500 (0.0500) time: 0.5269 data: 0.0851 max mem: 21002 Epoch: [275] [290/312] eta: 0:00:12 lr: 0.000073 min_lr: 0.000073 loss: 3.8606 (3.5655) weight_decay: 0.0500 (0.0500) time: 0.6085 data: 0.1068 max mem: 21002 Epoch: [275] [300/312] eta: 0:00:06 lr: 0.000073 min_lr: 0.000073 loss: 3.6521 (3.5648) weight_decay: 0.0500 (0.0500) time: 0.4563 data: 0.0724 max mem: 21002 Epoch: [275] [310/312] eta: 0:00:01 lr: 0.000073 min_lr: 0.000073 loss: 3.5108 (3.5601) weight_decay: 0.0500 (0.0500) time: 0.2934 data: 0.0086 max mem: 21002 Epoch: [275] [311/312] eta: 0:00:00 lr: 0.000073 min_lr: 0.000073 loss: 3.5108 (3.5574) weight_decay: 0.0500 (0.0500) time: 0.2933 data: 0.0086 max mem: 21002 Epoch: [275] Total time: 0:02:52 (0.5539 s / it) Averaged stats: lr: 0.000073 min_lr: 0.000073 loss: 3.5108 (3.5804) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.0021 (1.0021) acc1: 81.2500 (81.2500) acc5: 95.3125 (95.3125) time: 8.4605 data: 8.3423 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3356 (1.2570) acc1: 72.3958 (73.5040) acc5: 91.5365 (91.8080) time: 1.0260 data: 0.9270 max mem: 21002 Test: Total time: 0:00:09 (1.0384 s / it) * Acc@1 73.588 Acc@5 91.954 loss 1.260 Accuracy of the model on the 50000 test images: 73.6% Max accuracy: 73.69% Epoch: [276] [ 0/312] eta: 1:02:19 lr: 0.000073 min_lr: 0.000073 loss: 3.5127 (3.5127) weight_decay: 0.0500 (0.0500) time: 11.9851 data: 8.1573 max mem: 21002 Epoch: [276] [ 10/312] eta: 0:08:03 lr: 0.000073 min_lr: 0.000073 loss: 3.6729 (3.5423) weight_decay: 0.0500 (0.0500) time: 1.6000 data: 0.9964 max mem: 21002 Epoch: [276] [ 20/312] eta: 0:05:17 lr: 0.000073 min_lr: 0.000073 loss: 3.6076 (3.5259) weight_decay: 0.0500 (0.0500) time: 0.5411 data: 0.1405 max mem: 21002 Epoch: [276] [ 30/312] eta: 0:03:54 lr: 0.000072 min_lr: 0.000072 loss: 3.6076 (3.5211) weight_decay: 0.0500 (0.0500) time: 0.4062 data: 0.0008 max mem: 21002 Epoch: [276] [ 40/312] eta: 0:03:25 lr: 0.000072 min_lr: 0.000072 loss: 3.7013 (3.5154) weight_decay: 0.0500 (0.0500) time: 0.4058 data: 0.0009 max mem: 21002 Epoch: [276] [ 50/312] eta: 0:03:18 lr: 0.000072 min_lr: 0.000072 loss: 3.5285 (3.4829) weight_decay: 0.0500 (0.0500) time: 0.6414 data: 0.0693 max mem: 21002 Epoch: [276] [ 60/312] eta: 0:02:51 lr: 0.000072 min_lr: 0.000072 loss: 3.4464 (3.4896) weight_decay: 0.0500 (0.0500) time: 0.5284 data: 0.0693 max mem: 21002 Epoch: [276] [ 70/312] eta: 0:02:43 lr: 0.000072 min_lr: 0.000072 loss: 3.6327 (3.5133) weight_decay: 0.0500 (0.0500) time: 0.4769 data: 0.0315 max mem: 21002 Epoch: [276] [ 80/312] eta: 0:02:35 lr: 0.000072 min_lr: 0.000072 loss: 3.6758 (3.5179) weight_decay: 0.0500 (0.0500) time: 0.6419 data: 0.0314 max mem: 21002 Epoch: [276] [ 90/312] eta: 0:02:20 lr: 0.000071 min_lr: 0.000071 loss: 3.5814 (3.5091) weight_decay: 0.0500 (0.0500) time: 0.4841 data: 0.0301 max mem: 21002 Epoch: [276] [100/312] eta: 0:02:14 lr: 0.000071 min_lr: 0.000071 loss: 3.5814 (3.5175) weight_decay: 0.0500 (0.0500) time: 0.4944 data: 0.0632 max mem: 21002 Epoch: [276] [110/312] eta: 0:02:02 lr: 0.000071 min_lr: 0.000071 loss: 3.7151 (3.5282) weight_decay: 0.0500 (0.0500) time: 0.4655 data: 0.0343 max mem: 21002 Epoch: [276] [120/312] eta: 0:01:57 lr: 0.000071 min_lr: 0.000071 loss: 3.8195 (3.5496) weight_decay: 0.0500 (0.0500) time: 0.4818 data: 0.0191 max mem: 21002 Epoch: [276] [130/312] eta: 0:01:52 lr: 0.000071 min_lr: 0.000071 loss: 3.6086 (3.5479) weight_decay: 0.0500 (0.0500) time: 0.6998 data: 0.0485 max mem: 21002 Epoch: [276] [140/312] eta: 0:01:42 lr: 0.000070 min_lr: 0.000070 loss: 3.5655 (3.5465) weight_decay: 0.0500 (0.0500) time: 0.5061 data: 0.0315 max mem: 21002 Epoch: [276] [150/312] eta: 0:01:37 lr: 0.000070 min_lr: 0.000070 loss: 3.6132 (3.5562) weight_decay: 0.0500 (0.0500) time: 0.5050 data: 0.0177 max mem: 21002 Epoch: [276] [160/312] eta: 0:01:32 lr: 0.000070 min_lr: 0.000070 loss: 3.6443 (3.5560) weight_decay: 0.0500 (0.0500) time: 0.6803 data: 0.0170 max mem: 21002 Epoch: [276] [170/312] eta: 0:01:24 lr: 0.000070 min_lr: 0.000070 loss: 3.7344 (3.5700) weight_decay: 0.0500 (0.0500) time: 0.5013 data: 0.0370 max mem: 21002 Epoch: [276] [180/312] eta: 0:01:18 lr: 0.000070 min_lr: 0.000070 loss: 3.7738 (3.5598) weight_decay: 0.0500 (0.0500) time: 0.5141 data: 0.0668 max mem: 21002 Epoch: [276] [190/312] eta: 0:01:10 lr: 0.000069 min_lr: 0.000069 loss: 3.7485 (3.5699) weight_decay: 0.0500 (0.0500) time: 0.4787 data: 0.0306 max mem: 21002 Epoch: [276] [200/312] eta: 0:01:05 lr: 0.000069 min_lr: 0.000069 loss: 3.7869 (3.5703) weight_decay: 0.0500 (0.0500) time: 0.4807 data: 0.0292 max mem: 21002 Epoch: [276] [210/312] eta: 0:01:00 lr: 0.000069 min_lr: 0.000069 loss: 3.7818 (3.5754) weight_decay: 0.0500 (0.0500) time: 0.6792 data: 0.0624 max mem: 21002 Epoch: [276] [220/312] eta: 0:00:52 lr: 0.000069 min_lr: 0.000069 loss: 3.7789 (3.5867) weight_decay: 0.0500 (0.0500) time: 0.4884 data: 0.0339 max mem: 21002 Epoch: [276] [230/312] eta: 0:00:47 lr: 0.000069 min_lr: 0.000069 loss: 3.9038 (3.6042) weight_decay: 0.0500 (0.0500) time: 0.4940 data: 0.0455 max mem: 21002 Epoch: [276] [240/312] eta: 0:00:41 lr: 0.000069 min_lr: 0.000069 loss: 3.8317 (3.5990) weight_decay: 0.0500 (0.0500) time: 0.6661 data: 0.0504 max mem: 21002 Epoch: [276] [250/312] eta: 0:00:35 lr: 0.000068 min_lr: 0.000068 loss: 3.6445 (3.5971) weight_decay: 0.0500 (0.0500) time: 0.5159 data: 0.0312 max mem: 21002 Epoch: [276] [260/312] eta: 0:00:30 lr: 0.000068 min_lr: 0.000068 loss: 3.6800 (3.5963) weight_decay: 0.0500 (0.0500) time: 0.5381 data: 0.0368 max mem: 21002 Epoch: [276] [270/312] eta: 0:00:23 lr: 0.000068 min_lr: 0.000068 loss: 3.6800 (3.6046) weight_decay: 0.0500 (0.0500) time: 0.4881 data: 0.0159 max mem: 21002 Epoch: [276] [280/312] eta: 0:00:18 lr: 0.000068 min_lr: 0.000068 loss: 3.5942 (3.5975) weight_decay: 0.0500 (0.0500) time: 0.5581 data: 0.0408 max mem: 21002 Epoch: [276] [290/312] eta: 0:00:12 lr: 0.000068 min_lr: 0.000068 loss: 3.4945 (3.6002) weight_decay: 0.0500 (0.0500) time: 0.6087 data: 0.0358 max mem: 21002 Epoch: [276] [300/312] eta: 0:00:06 lr: 0.000067 min_lr: 0.000067 loss: 3.7309 (3.6029) weight_decay: 0.0500 (0.0500) time: 0.3390 data: 0.0002 max mem: 21002 Epoch: [276] [310/312] eta: 0:00:01 lr: 0.000067 min_lr: 0.000067 loss: 3.8712 (3.6080) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [276] [311/312] eta: 0:00:00 lr: 0.000067 min_lr: 0.000067 loss: 3.8735 (3.6092) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [276] Total time: 0:02:53 (0.5562 s / it) Averaged stats: lr: 0.000067 min_lr: 0.000067 loss: 3.8735 (3.5801) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:11 loss: 1.1077 (1.1077) acc1: 81.3802 (81.3802) acc5: 96.0938 (96.0938) time: 7.9802 data: 7.8606 max mem: 21002 Test: [8/9] eta: 0:00:00 loss: 1.4208 (1.3437) acc1: 72.7865 (73.7440) acc5: 91.4062 (91.9680) time: 0.9715 data: 0.8735 max mem: 21002 Test: Total time: 0:00:08 (0.9837 s / it) * Acc@1 73.644 Acc@5 91.958 loss 1.350 Accuracy of the model on the 50000 test images: 73.6% Max accuracy: 73.69% Epoch: [277] [ 0/312] eta: 1:10:11 lr: 0.000067 min_lr: 0.000067 loss: 4.2867 (4.2867) weight_decay: 0.0500 (0.0500) time: 13.4978 data: 11.6528 max mem: 21002 Epoch: [277] [ 10/312] eta: 0:09:04 lr: 0.000067 min_lr: 0.000067 loss: 3.6758 (3.5229) weight_decay: 0.0500 (0.0500) time: 1.8025 data: 1.0598 max mem: 21002 Epoch: [277] [ 20/312] eta: 0:05:26 lr: 0.000067 min_lr: 0.000067 loss: 3.6758 (3.6311) weight_decay: 0.0500 (0.0500) time: 0.4991 data: 0.0399 max mem: 21002 Epoch: [277] [ 30/312] eta: 0:03:59 lr: 0.000067 min_lr: 0.000067 loss: 3.8202 (3.7040) weight_decay: 0.0500 (0.0500) time: 0.3275 data: 0.0399 max mem: 21002 Epoch: [277] [ 40/312] eta: 0:03:13 lr: 0.000066 min_lr: 0.000066 loss: 3.7169 (3.6393) weight_decay: 0.0500 (0.0500) time: 0.2872 data: 0.0006 max mem: 21002 Epoch: [277] [ 50/312] eta: 0:02:56 lr: 0.000066 min_lr: 0.000066 loss: 3.7169 (3.6411) weight_decay: 0.0500 (0.0500) time: 0.4022 data: 0.0133 max mem: 21002 Epoch: [277] [ 60/312] eta: 0:02:40 lr: 0.000066 min_lr: 0.000066 loss: 3.6208 (3.5903) weight_decay: 0.0500 (0.0500) time: 0.4865 data: 0.0837 max mem: 21002 Epoch: [277] [ 70/312] eta: 0:02:33 lr: 0.000066 min_lr: 0.000066 loss: 3.4157 (3.5458) weight_decay: 0.0500 (0.0500) time: 0.5378 data: 0.1067 max mem: 21002 Epoch: [277] [ 80/312] eta: 0:02:22 lr: 0.000066 min_lr: 0.000066 loss: 3.4157 (3.5421) weight_decay: 0.0500 (0.0500) time: 0.5436 data: 0.1025 max mem: 21002 Epoch: [277] [ 90/312] eta: 0:02:16 lr: 0.000066 min_lr: 0.000066 loss: 3.6179 (3.5566) weight_decay: 0.0500 (0.0500) time: 0.5406 data: 0.1103 max mem: 21002 Epoch: [277] [100/312] eta: 0:02:07 lr: 0.000065 min_lr: 0.000065 loss: 3.8180 (3.5675) weight_decay: 0.0500 (0.0500) time: 0.5535 data: 0.0633 max mem: 21002 Epoch: [277] [110/312] eta: 0:02:01 lr: 0.000065 min_lr: 0.000065 loss: 3.7205 (3.5596) weight_decay: 0.0500 (0.0500) time: 0.5285 data: 0.1079 max mem: 21002 Epoch: [277] [120/312] eta: 0:01:53 lr: 0.000065 min_lr: 0.000065 loss: 3.6382 (3.5556) weight_decay: 0.0500 (0.0500) time: 0.5398 data: 0.1186 max mem: 21002 Epoch: [277] [130/312] eta: 0:01:47 lr: 0.000065 min_lr: 0.000065 loss: 3.6929 (3.5583) weight_decay: 0.0500 (0.0500) time: 0.5576 data: 0.0734 max mem: 21002 Epoch: [277] [140/312] eta: 0:01:42 lr: 0.000065 min_lr: 0.000065 loss: 3.6227 (3.5461) weight_decay: 0.0500 (0.0500) time: 0.6050 data: 0.0950 max mem: 21002 Epoch: [277] [150/312] eta: 0:01:35 lr: 0.000064 min_lr: 0.000064 loss: 3.5948 (3.5576) weight_decay: 0.0500 (0.0500) time: 0.5674 data: 0.0680 max mem: 21002 Epoch: [277] [160/312] eta: 0:01:28 lr: 0.000064 min_lr: 0.000064 loss: 3.8819 (3.5738) weight_decay: 0.0500 (0.0500) time: 0.4940 data: 0.0703 max mem: 21002 Epoch: [277] [170/312] eta: 0:01:23 lr: 0.000064 min_lr: 0.000064 loss: 3.8513 (3.5870) weight_decay: 0.0500 (0.0500) time: 0.5748 data: 0.1080 max mem: 21002 Epoch: [277] [180/312] eta: 0:01:16 lr: 0.000064 min_lr: 0.000064 loss: 3.6584 (3.5812) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0548 max mem: 21002 Epoch: [277] [190/312] eta: 0:01:10 lr: 0.000064 min_lr: 0.000064 loss: 3.5462 (3.5660) weight_decay: 0.0500 (0.0500) time: 0.5031 data: 0.0514 max mem: 21002 Epoch: [277] [200/312] eta: 0:01:04 lr: 0.000064 min_lr: 0.000064 loss: 3.3250 (3.5613) weight_decay: 0.0500 (0.0500) time: 0.5130 data: 0.0510 max mem: 21002 Epoch: [277] [210/312] eta: 0:00:59 lr: 0.000063 min_lr: 0.000063 loss: 3.5522 (3.5562) weight_decay: 0.0500 (0.0500) time: 0.6048 data: 0.0667 max mem: 21002 Epoch: [277] [220/312] eta: 0:00:52 lr: 0.000063 min_lr: 0.000063 loss: 3.5804 (3.5452) weight_decay: 0.0500 (0.0500) time: 0.5715 data: 0.0877 max mem: 21002 Epoch: [277] [230/312] eta: 0:00:47 lr: 0.000063 min_lr: 0.000063 loss: 3.5804 (3.5478) weight_decay: 0.0500 (0.0500) time: 0.5360 data: 0.0714 max mem: 21002 Epoch: [277] [240/312] eta: 0:00:41 lr: 0.000063 min_lr: 0.000063 loss: 3.4148 (3.5404) weight_decay: 0.0500 (0.0500) time: 0.5146 data: 0.0816 max mem: 21002 Epoch: [277] [250/312] eta: 0:00:35 lr: 0.000063 min_lr: 0.000063 loss: 3.3222 (3.5326) weight_decay: 0.0500 (0.0500) time: 0.5386 data: 0.1000 max mem: 21002 Epoch: [277] [260/312] eta: 0:00:29 lr: 0.000063 min_lr: 0.000063 loss: 3.7514 (3.5425) weight_decay: 0.0500 (0.0500) time: 0.6422 data: 0.1059 max mem: 21002 Epoch: [277] [270/312] eta: 0:00:23 lr: 0.000062 min_lr: 0.000062 loss: 3.7514 (3.5385) weight_decay: 0.0500 (0.0500) time: 0.5043 data: 0.0931 max mem: 21002 Epoch: [277] [280/312] eta: 0:00:18 lr: 0.000062 min_lr: 0.000062 loss: 3.6555 (3.5498) weight_decay: 0.0500 (0.0500) time: 0.5412 data: 0.1179 max mem: 21002 Epoch: [277] [290/312] eta: 0:00:12 lr: 0.000062 min_lr: 0.000062 loss: 3.8642 (3.5474) weight_decay: 0.0500 (0.0500) time: 0.5969 data: 0.0918 max mem: 21002 Epoch: [277] [300/312] eta: 0:00:06 lr: 0.000062 min_lr: 0.000062 loss: 3.6350 (3.5470) weight_decay: 0.0500 (0.0500) time: 0.4128 data: 0.0492 max mem: 21002 Epoch: [277] [310/312] eta: 0:00:01 lr: 0.000062 min_lr: 0.000062 loss: 3.6350 (3.5427) weight_decay: 0.0500 (0.0500) time: 0.3029 data: 0.0199 max mem: 21002 Epoch: [277] [311/312] eta: 0:00:00 lr: 0.000062 min_lr: 0.000062 loss: 3.6287 (3.5414) weight_decay: 0.0500 (0.0500) time: 0.3028 data: 0.0199 max mem: 21002 Epoch: [277] Total time: 0:02:53 (0.5571 s / it) Averaged stats: lr: 0.000062 min_lr: 0.000062 loss: 3.6287 (3.5615) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:08 loss: 1.0385 (1.0385) acc1: 80.4688 (80.4688) acc5: 95.8333 (95.8333) time: 7.6162 data: 7.4969 max mem: 21002 Test: [8/9] eta: 0:00:00 loss: 1.3422 (1.2582) acc1: 73.5677 (73.9680) acc5: 91.6667 (92.0800) time: 0.9715 data: 0.8749 max mem: 21002 Test: Total time: 0:00:08 (0.9821 s / it) * Acc@1 73.650 Acc@5 91.988 loss 1.262 Accuracy of the model on the 50000 test images: 73.7% Max accuracy: 73.69% Epoch: [278] [ 0/312] eta: 1:01:13 lr: 0.000062 min_lr: 0.000062 loss: 3.9545 (3.9545) weight_decay: 0.0500 (0.0500) time: 11.7741 data: 9.8301 max mem: 21002 Epoch: [278] [ 10/312] eta: 0:08:09 lr: 0.000061 min_lr: 0.000061 loss: 3.7312 (3.6209) weight_decay: 0.0500 (0.0500) time: 1.6193 data: 1.0363 max mem: 21002 Epoch: [278] [ 20/312] eta: 0:05:20 lr: 0.000061 min_lr: 0.000061 loss: 3.6693 (3.6093) weight_decay: 0.0500 (0.0500) time: 0.5644 data: 0.0836 max mem: 21002 Epoch: [278] [ 30/312] eta: 0:03:56 lr: 0.000061 min_lr: 0.000061 loss: 3.5884 (3.5880) weight_decay: 0.0500 (0.0500) time: 0.4109 data: 0.0055 max mem: 21002 Epoch: [278] [ 40/312] eta: 0:03:31 lr: 0.000061 min_lr: 0.000061 loss: 3.5871 (3.5756) weight_decay: 0.0500 (0.0500) time: 0.4429 data: 0.0009 max mem: 21002 Epoch: [278] [ 50/312] eta: 0:03:20 lr: 0.000061 min_lr: 0.000061 loss: 3.6963 (3.6180) weight_decay: 0.0500 (0.0500) time: 0.6470 data: 0.0010 max mem: 21002 Epoch: [278] [ 60/312] eta: 0:02:52 lr: 0.000061 min_lr: 0.000061 loss: 3.6301 (3.5804) weight_decay: 0.0500 (0.0500) time: 0.4967 data: 0.0010 max mem: 21002 Epoch: [278] [ 70/312] eta: 0:02:46 lr: 0.000060 min_lr: 0.000060 loss: 3.7524 (3.6172) weight_decay: 0.0500 (0.0500) time: 0.4912 data: 0.0011 max mem: 21002 Epoch: [278] [ 80/312] eta: 0:02:36 lr: 0.000060 min_lr: 0.000060 loss: 3.7855 (3.6074) weight_decay: 0.0500 (0.0500) time: 0.6397 data: 0.0120 max mem: 21002 Epoch: [278] [ 90/312] eta: 0:02:24 lr: 0.000060 min_lr: 0.000060 loss: 3.4855 (3.5902) weight_decay: 0.0500 (0.0500) time: 0.5229 data: 0.0119 max mem: 21002 Epoch: [278] [100/312] eta: 0:02:16 lr: 0.000060 min_lr: 0.000060 loss: 3.5411 (3.5890) weight_decay: 0.0500 (0.0500) time: 0.5248 data: 0.0270 max mem: 21002 Epoch: [278] [110/312] eta: 0:02:03 lr: 0.000060 min_lr: 0.000060 loss: 3.5411 (3.5744) weight_decay: 0.0500 (0.0500) time: 0.4431 data: 0.0272 max mem: 21002 Epoch: [278] [120/312] eta: 0:01:58 lr: 0.000060 min_lr: 0.000060 loss: 3.5210 (3.5654) weight_decay: 0.0500 (0.0500) time: 0.4758 data: 0.0377 max mem: 21002 Epoch: [278] [130/312] eta: 0:01:54 lr: 0.000059 min_lr: 0.000059 loss: 3.6597 (3.5801) weight_decay: 0.0500 (0.0500) time: 0.7012 data: 0.0518 max mem: 21002 Epoch: [278] [140/312] eta: 0:01:43 lr: 0.000059 min_lr: 0.000059 loss: 3.8170 (3.5815) weight_decay: 0.0500 (0.0500) time: 0.5238 data: 0.0196 max mem: 21002 Epoch: [278] [150/312] eta: 0:01:38 lr: 0.000059 min_lr: 0.000059 loss: 3.5716 (3.5803) weight_decay: 0.0500 (0.0500) time: 0.5032 data: 0.0203 max mem: 21002 Epoch: [278] [160/312] eta: 0:01:32 lr: 0.000059 min_lr: 0.000059 loss: 3.5200 (3.5733) weight_decay: 0.0500 (0.0500) time: 0.6208 data: 0.0673 max mem: 21002 Epoch: [278] [170/312] eta: 0:01:23 lr: 0.000059 min_lr: 0.000059 loss: 3.4876 (3.5768) weight_decay: 0.0500 (0.0500) time: 0.4165 data: 0.0523 max mem: 21002 Epoch: [278] [180/312] eta: 0:01:19 lr: 0.000058 min_lr: 0.000058 loss: 3.4876 (3.5644) weight_decay: 0.0500 (0.0500) time: 0.5681 data: 0.0196 max mem: 21002 Epoch: [278] [190/312] eta: 0:01:11 lr: 0.000058 min_lr: 0.000058 loss: 3.7357 (3.5704) weight_decay: 0.0500 (0.0500) time: 0.5657 data: 0.0194 max mem: 21002 Epoch: [278] [200/312] eta: 0:01:05 lr: 0.000058 min_lr: 0.000058 loss: 3.7283 (3.5629) weight_decay: 0.0500 (0.0500) time: 0.4114 data: 0.0399 max mem: 21002 Epoch: [278] [210/312] eta: 0:01:00 lr: 0.000058 min_lr: 0.000058 loss: 3.6571 (3.5596) weight_decay: 0.0500 (0.0500) time: 0.6524 data: 0.0882 max mem: 21002 Epoch: [278] [220/312] eta: 0:00:53 lr: 0.000058 min_lr: 0.000058 loss: 3.6571 (3.5642) weight_decay: 0.0500 (0.0500) time: 0.5315 data: 0.0491 max mem: 21002 Epoch: [278] [230/312] eta: 0:00:47 lr: 0.000058 min_lr: 0.000058 loss: 3.6713 (3.5554) weight_decay: 0.0500 (0.0500) time: 0.4695 data: 0.0197 max mem: 21002 Epoch: [278] [240/312] eta: 0:00:42 lr: 0.000057 min_lr: 0.000057 loss: 3.6834 (3.5653) weight_decay: 0.0500 (0.0500) time: 0.6593 data: 0.1038 max mem: 21002 Epoch: [278] [250/312] eta: 0:00:35 lr: 0.000057 min_lr: 0.000057 loss: 3.5519 (3.5542) weight_decay: 0.0500 (0.0500) time: 0.4776 data: 0.0849 max mem: 21002 Epoch: [278] [260/312] eta: 0:00:30 lr: 0.000057 min_lr: 0.000057 loss: 3.6210 (3.5703) weight_decay: 0.0500 (0.0500) time: 0.5220 data: 0.0391 max mem: 21002 Epoch: [278] [270/312] eta: 0:00:23 lr: 0.000057 min_lr: 0.000057 loss: 3.9363 (3.5834) weight_decay: 0.0500 (0.0500) time: 0.5373 data: 0.0391 max mem: 21002 Epoch: [278] [280/312] eta: 0:00:18 lr: 0.000057 min_lr: 0.000057 loss: 3.8221 (3.5906) weight_decay: 0.0500 (0.0500) time: 0.5185 data: 0.0473 max mem: 21002 Epoch: [278] [290/312] eta: 0:00:12 lr: 0.000057 min_lr: 0.000057 loss: 3.7993 (3.5881) weight_decay: 0.0500 (0.0500) time: 0.5987 data: 0.0511 max mem: 21002 Epoch: [278] [300/312] eta: 0:00:06 lr: 0.000056 min_lr: 0.000056 loss: 3.6931 (3.5913) weight_decay: 0.0500 (0.0500) time: 0.3780 data: 0.0042 max mem: 21002 Epoch: [278] [310/312] eta: 0:00:01 lr: 0.000056 min_lr: 0.000056 loss: 3.6888 (3.5922) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [278] [311/312] eta: 0:00:00 lr: 0.000056 min_lr: 0.000056 loss: 3.5470 (3.5920) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [278] Total time: 0:02:53 (0.5563 s / it) Averaged stats: lr: 0.000056 min_lr: 0.000056 loss: 3.5470 (3.5767) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:15 loss: 1.0507 (1.0507) acc1: 81.1198 (81.1198) acc5: 95.7031 (95.7031) time: 8.4250 data: 8.3063 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3810 (1.3015) acc1: 73.1771 (73.6960) acc5: 91.6667 (91.9360) time: 1.0208 data: 0.9230 max mem: 21002 Test: Total time: 0:00:09 (1.0317 s / it) * Acc@1 73.650 Acc@5 91.994 loss 1.308 Accuracy of the model on the 50000 test images: 73.7% Max accuracy: 73.69% Epoch: [279] [ 0/312] eta: 1:04:58 lr: 0.000056 min_lr: 0.000056 loss: 3.9999 (3.9999) weight_decay: 0.0500 (0.0500) time: 12.4946 data: 10.5417 max mem: 21002 Epoch: [279] [ 10/312] eta: 0:08:30 lr: 0.000056 min_lr: 0.000056 loss: 3.7510 (3.8174) weight_decay: 0.0500 (0.0500) time: 1.6888 data: 0.9973 max mem: 21002 Epoch: [279] [ 20/312] eta: 0:05:17 lr: 0.000056 min_lr: 0.000056 loss: 3.7685 (3.8177) weight_decay: 0.0500 (0.0500) time: 0.5178 data: 0.0218 max mem: 21002 Epoch: [279] [ 30/312] eta: 0:03:54 lr: 0.000056 min_lr: 0.000056 loss: 3.7913 (3.7010) weight_decay: 0.0500 (0.0500) time: 0.3600 data: 0.0010 max mem: 21002 Epoch: [279] [ 40/312] eta: 0:03:20 lr: 0.000056 min_lr: 0.000056 loss: 3.4966 (3.6560) weight_decay: 0.0500 (0.0500) time: 0.3700 data: 0.0010 max mem: 21002 Epoch: [279] [ 50/312] eta: 0:03:09 lr: 0.000055 min_lr: 0.000055 loss: 3.5381 (3.6431) weight_decay: 0.0500 (0.0500) time: 0.5507 data: 0.0010 max mem: 21002 Epoch: [279] [ 60/312] eta: 0:02:43 lr: 0.000055 min_lr: 0.000055 loss: 3.5447 (3.5930) weight_decay: 0.0500 (0.0500) time: 0.4710 data: 0.0010 max mem: 21002 Epoch: [279] [ 70/312] eta: 0:02:39 lr: 0.000055 min_lr: 0.000055 loss: 3.7313 (3.6165) weight_decay: 0.0500 (0.0500) time: 0.4976 data: 0.0010 max mem: 21002 Epoch: [279] [ 80/312] eta: 0:02:31 lr: 0.000055 min_lr: 0.000055 loss: 3.6953 (3.5947) weight_decay: 0.0500 (0.0500) time: 0.6637 data: 0.0009 max mem: 21002 Epoch: [279] [ 90/312] eta: 0:02:19 lr: 0.000055 min_lr: 0.000055 loss: 3.4984 (3.5927) weight_decay: 0.0500 (0.0500) time: 0.5169 data: 0.0410 max mem: 21002 Epoch: [279] [100/312] eta: 0:02:13 lr: 0.000055 min_lr: 0.000055 loss: 3.6821 (3.5934) weight_decay: 0.0500 (0.0500) time: 0.5395 data: 0.0666 max mem: 21002 Epoch: [279] [110/312] eta: 0:02:02 lr: 0.000054 min_lr: 0.000054 loss: 3.8279 (3.5926) weight_decay: 0.0500 (0.0500) time: 0.5045 data: 0.0553 max mem: 21002 Epoch: [279] [120/312] eta: 0:01:57 lr: 0.000054 min_lr: 0.000054 loss: 3.4009 (3.5651) weight_decay: 0.0500 (0.0500) time: 0.5030 data: 0.0886 max mem: 21002 Epoch: [279] [130/312] eta: 0:01:50 lr: 0.000054 min_lr: 0.000054 loss: 3.4009 (3.5647) weight_decay: 0.0500 (0.0500) time: 0.6357 data: 0.0841 max mem: 21002 Epoch: [279] [140/312] eta: 0:01:41 lr: 0.000054 min_lr: 0.000054 loss: 3.7455 (3.5697) weight_decay: 0.0500 (0.0500) time: 0.4567 data: 0.0371 max mem: 21002 Epoch: [279] [150/312] eta: 0:01:36 lr: 0.000054 min_lr: 0.000054 loss: 3.8068 (3.5843) weight_decay: 0.0500 (0.0500) time: 0.5238 data: 0.0649 max mem: 21002 Epoch: [279] [160/312] eta: 0:01:29 lr: 0.000054 min_lr: 0.000054 loss: 3.7695 (3.5815) weight_decay: 0.0500 (0.0500) time: 0.6061 data: 0.1066 max mem: 21002 Epoch: [279] [170/312] eta: 0:01:23 lr: 0.000053 min_lr: 0.000053 loss: 3.7957 (3.6001) weight_decay: 0.0500 (0.0500) time: 0.5095 data: 0.1011 max mem: 21002 Epoch: [279] [180/312] eta: 0:01:18 lr: 0.000053 min_lr: 0.000053 loss: 3.8022 (3.6079) weight_decay: 0.0500 (0.0500) time: 0.6049 data: 0.0743 max mem: 21002 Epoch: [279] [190/312] eta: 0:01:10 lr: 0.000053 min_lr: 0.000053 loss: 3.6572 (3.6015) weight_decay: 0.0500 (0.0500) time: 0.5097 data: 0.0647 max mem: 21002 Epoch: [279] [200/312] eta: 0:01:05 lr: 0.000053 min_lr: 0.000053 loss: 3.3351 (3.5945) weight_decay: 0.0500 (0.0500) time: 0.5085 data: 0.0849 max mem: 21002 Epoch: [279] [210/312] eta: 0:00:59 lr: 0.000053 min_lr: 0.000053 loss: 3.5092 (3.5890) weight_decay: 0.0500 (0.0500) time: 0.6502 data: 0.1034 max mem: 21002 Epoch: [279] [220/312] eta: 0:00:53 lr: 0.000053 min_lr: 0.000053 loss: 3.5366 (3.5819) weight_decay: 0.0500 (0.0500) time: 0.5111 data: 0.1043 max mem: 21002 Epoch: [279] [230/312] eta: 0:00:47 lr: 0.000052 min_lr: 0.000052 loss: 3.6181 (3.5829) weight_decay: 0.0500 (0.0500) time: 0.4998 data: 0.0853 max mem: 21002 Epoch: [279] [240/312] eta: 0:00:41 lr: 0.000052 min_lr: 0.000052 loss: 3.5554 (3.5813) weight_decay: 0.0500 (0.0500) time: 0.5486 data: 0.0895 max mem: 21002 Epoch: [279] [250/312] eta: 0:00:35 lr: 0.000052 min_lr: 0.000052 loss: 3.6110 (3.5893) weight_decay: 0.0500 (0.0500) time: 0.5002 data: 0.0884 max mem: 21002 Epoch: [279] [260/312] eta: 0:00:29 lr: 0.000052 min_lr: 0.000052 loss: 3.6385 (3.5870) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0500 max mem: 21002 Epoch: [279] [270/312] eta: 0:00:23 lr: 0.000052 min_lr: 0.000052 loss: 3.6336 (3.5801) weight_decay: 0.0500 (0.0500) time: 0.4976 data: 0.0403 max mem: 21002 Epoch: [279] [280/312] eta: 0:00:18 lr: 0.000052 min_lr: 0.000052 loss: 3.3719 (3.5734) weight_decay: 0.0500 (0.0500) time: 0.5204 data: 0.0827 max mem: 21002 Epoch: [279] [290/312] eta: 0:00:12 lr: 0.000051 min_lr: 0.000051 loss: 3.2787 (3.5647) weight_decay: 0.0500 (0.0500) time: 0.6064 data: 0.0942 max mem: 21002 Epoch: [279] [300/312] eta: 0:00:06 lr: 0.000051 min_lr: 0.000051 loss: 3.1839 (3.5566) weight_decay: 0.0500 (0.0500) time: 0.4059 data: 0.0500 max mem: 21002 Epoch: [279] [310/312] eta: 0:00:01 lr: 0.000051 min_lr: 0.000051 loss: 3.6543 (3.5686) weight_decay: 0.0500 (0.0500) time: 0.2890 data: 0.0124 max mem: 21002 Epoch: [279] [311/312] eta: 0:00:00 lr: 0.000051 min_lr: 0.000051 loss: 3.8015 (3.5696) weight_decay: 0.0500 (0.0500) time: 0.2889 data: 0.0124 max mem: 21002 Epoch: [279] Total time: 0:02:53 (0.5546 s / it) Averaged stats: lr: 0.000051 min_lr: 0.000051 loss: 3.8015 (3.5665) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:12 loss: 1.0866 (1.0866) acc1: 81.1198 (81.1198) acc5: 95.8333 (95.8333) time: 8.0546 data: 7.9362 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4335 (1.3363) acc1: 72.7865 (73.9040) acc5: 91.9271 (92.0480) time: 1.0004 data: 0.9028 max mem: 21002 Test: Total time: 0:00:09 (1.0100 s / it) * Acc@1 73.708 Acc@5 92.024 loss 1.341 Accuracy of the model on the 50000 test images: 73.7% Max accuracy: 73.71% Epoch: [280] [ 0/312] eta: 0:58:46 lr: 0.000051 min_lr: 0.000051 loss: 3.5431 (3.5431) weight_decay: 0.0500 (0.0500) time: 11.3030 data: 9.8386 max mem: 21002 Epoch: [280] [ 10/312] eta: 0:08:29 lr: 0.000051 min_lr: 0.000051 loss: 3.5431 (3.5943) weight_decay: 0.0500 (0.0500) time: 1.6887 data: 1.1095 max mem: 21002 Epoch: [280] [ 20/312] eta: 0:05:12 lr: 0.000051 min_lr: 0.000051 loss: 3.5238 (3.5312) weight_decay: 0.0500 (0.0500) time: 0.5583 data: 0.1226 max mem: 21002 Epoch: [280] [ 30/312] eta: 0:03:50 lr: 0.000051 min_lr: 0.000051 loss: 3.6158 (3.5810) weight_decay: 0.0500 (0.0500) time: 0.3402 data: 0.0046 max mem: 21002 Epoch: [280] [ 40/312] eta: 0:03:14 lr: 0.000050 min_lr: 0.000050 loss: 3.6697 (3.6151) weight_decay: 0.0500 (0.0500) time: 0.3401 data: 0.0013 max mem: 21002 Epoch: [280] [ 50/312] eta: 0:03:02 lr: 0.000050 min_lr: 0.000050 loss: 3.6726 (3.6370) weight_decay: 0.0500 (0.0500) time: 0.5093 data: 0.0285 max mem: 21002 Epoch: [280] [ 60/312] eta: 0:02:40 lr: 0.000050 min_lr: 0.000050 loss: 3.8673 (3.6483) weight_decay: 0.0500 (0.0500) time: 0.4765 data: 0.0449 max mem: 21002 Epoch: [280] [ 70/312] eta: 0:02:35 lr: 0.000050 min_lr: 0.000050 loss: 3.7028 (3.6332) weight_decay: 0.0500 (0.0500) time: 0.4999 data: 0.0746 max mem: 21002 Epoch: [280] [ 80/312] eta: 0:02:25 lr: 0.000050 min_lr: 0.000050 loss: 3.6632 (3.6351) weight_decay: 0.0500 (0.0500) time: 0.6009 data: 0.1029 max mem: 21002 Epoch: [280] [ 90/312] eta: 0:02:15 lr: 0.000050 min_lr: 0.000050 loss: 3.5751 (3.6196) weight_decay: 0.0500 (0.0500) time: 0.4893 data: 0.0767 max mem: 21002 Epoch: [280] [100/312] eta: 0:02:11 lr: 0.000050 min_lr: 0.000050 loss: 3.5016 (3.6085) weight_decay: 0.0500 (0.0500) time: 0.6032 data: 0.0827 max mem: 21002 Epoch: [280] [110/312] eta: 0:02:00 lr: 0.000049 min_lr: 0.000049 loss: 3.5016 (3.6038) weight_decay: 0.0500 (0.0500) time: 0.5406 data: 0.0666 max mem: 21002 Epoch: [280] [120/312] eta: 0:01:55 lr: 0.000049 min_lr: 0.000049 loss: 3.4848 (3.5943) weight_decay: 0.0500 (0.0500) time: 0.4877 data: 0.0374 max mem: 21002 Epoch: [280] [130/312] eta: 0:01:50 lr: 0.000049 min_lr: 0.000049 loss: 3.6746 (3.5955) weight_decay: 0.0500 (0.0500) time: 0.6550 data: 0.0872 max mem: 21002 Epoch: [280] [140/312] eta: 0:01:40 lr: 0.000049 min_lr: 0.000049 loss: 3.4459 (3.5666) weight_decay: 0.0500 (0.0500) time: 0.4811 data: 0.0711 max mem: 21002 Epoch: [280] [150/312] eta: 0:01:35 lr: 0.000049 min_lr: 0.000049 loss: 3.3882 (3.5711) weight_decay: 0.0500 (0.0500) time: 0.5078 data: 0.0416 max mem: 21002 Epoch: [280] [160/312] eta: 0:01:28 lr: 0.000049 min_lr: 0.000049 loss: 3.6710 (3.5762) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0504 max mem: 21002 Epoch: [280] [170/312] eta: 0:01:22 lr: 0.000048 min_lr: 0.000048 loss: 3.7123 (3.5898) weight_decay: 0.0500 (0.0500) time: 0.4781 data: 0.0541 max mem: 21002 Epoch: [280] [180/312] eta: 0:01:17 lr: 0.000048 min_lr: 0.000048 loss: 3.7571 (3.5834) weight_decay: 0.0500 (0.0500) time: 0.5811 data: 0.0766 max mem: 21002 Epoch: [280] [190/312] eta: 0:01:09 lr: 0.000048 min_lr: 0.000048 loss: 3.7767 (3.5877) weight_decay: 0.0500 (0.0500) time: 0.5219 data: 0.0786 max mem: 21002 Epoch: [280] [200/312] eta: 0:01:04 lr: 0.000048 min_lr: 0.000048 loss: 3.8386 (3.5894) weight_decay: 0.0500 (0.0500) time: 0.5398 data: 0.0945 max mem: 21002 Epoch: [280] [210/312] eta: 0:00:59 lr: 0.000048 min_lr: 0.000048 loss: 3.7886 (3.5915) weight_decay: 0.0500 (0.0500) time: 0.6868 data: 0.1008 max mem: 21002 Epoch: [280] [220/312] eta: 0:00:52 lr: 0.000048 min_lr: 0.000048 loss: 3.5654 (3.5882) weight_decay: 0.0500 (0.0500) time: 0.4986 data: 0.0665 max mem: 21002 Epoch: [280] [230/312] eta: 0:00:47 lr: 0.000048 min_lr: 0.000048 loss: 3.5798 (3.5843) weight_decay: 0.0500 (0.0500) time: 0.4928 data: 0.0650 max mem: 21002 Epoch: [280] [240/312] eta: 0:00:41 lr: 0.000047 min_lr: 0.000047 loss: 3.6906 (3.5879) weight_decay: 0.0500 (0.0500) time: 0.6064 data: 0.0637 max mem: 21002 Epoch: [280] [250/312] eta: 0:00:35 lr: 0.000047 min_lr: 0.000047 loss: 3.5602 (3.5761) weight_decay: 0.0500 (0.0500) time: 0.5119 data: 0.1009 max mem: 21002 Epoch: [280] [260/312] eta: 0:00:29 lr: 0.000047 min_lr: 0.000047 loss: 3.3766 (3.5700) weight_decay: 0.0500 (0.0500) time: 0.5538 data: 0.0968 max mem: 21002 Epoch: [280] [270/312] eta: 0:00:23 lr: 0.000047 min_lr: 0.000047 loss: 3.6814 (3.5710) weight_decay: 0.0500 (0.0500) time: 0.5523 data: 0.0753 max mem: 21002 Epoch: [280] [280/312] eta: 0:00:18 lr: 0.000047 min_lr: 0.000047 loss: 3.7234 (3.5627) weight_decay: 0.0500 (0.0500) time: 0.4906 data: 0.0659 max mem: 21002 Epoch: [280] [290/312] eta: 0:00:12 lr: 0.000047 min_lr: 0.000047 loss: 3.5634 (3.5608) weight_decay: 0.0500 (0.0500) time: 0.5664 data: 0.0297 max mem: 21002 Epoch: [280] [300/312] eta: 0:00:06 lr: 0.000046 min_lr: 0.000046 loss: 3.6327 (3.5630) weight_decay: 0.0500 (0.0500) time: 0.4504 data: 0.0330 max mem: 21002 Epoch: [280] [310/312] eta: 0:00:01 lr: 0.000046 min_lr: 0.000046 loss: 3.8362 (3.5695) weight_decay: 0.0500 (0.0500) time: 0.2847 data: 0.0078 max mem: 21002 Epoch: [280] [311/312] eta: 0:00:00 lr: 0.000046 min_lr: 0.000046 loss: 3.8362 (3.5701) weight_decay: 0.0500 (0.0500) time: 0.2845 data: 0.0078 max mem: 21002 Epoch: [280] Total time: 0:02:52 (0.5539 s / it) Averaged stats: lr: 0.000046 min_lr: 0.000046 loss: 3.8362 (3.5750) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.1049 (1.1049) acc1: 80.9896 (80.9896) acc5: 95.8333 (95.8333) time: 8.6935 data: 8.5751 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4375 (1.3423) acc1: 72.6562 (73.8400) acc5: 91.7969 (91.8720) time: 1.0521 data: 0.9529 max mem: 21002 Test: Total time: 0:00:09 (1.0624 s / it) * Acc@1 73.586 Acc@5 91.964 loss 1.348 Accuracy of the model on the 50000 test images: 73.6% Max accuracy: 73.71% Epoch: [281] [ 0/312] eta: 0:54:04 lr: 0.000046 min_lr: 0.000046 loss: 4.3696 (4.3696) weight_decay: 0.0500 (0.0500) time: 10.3981 data: 9.3807 max mem: 21002 Epoch: [281] [ 10/312] eta: 0:08:01 lr: 0.000046 min_lr: 0.000046 loss: 3.9012 (3.6595) weight_decay: 0.0500 (0.0500) time: 1.5957 data: 1.0439 max mem: 21002 Epoch: [281] [ 20/312] eta: 0:05:20 lr: 0.000046 min_lr: 0.000046 loss: 3.9012 (3.7414) weight_decay: 0.0500 (0.0500) time: 0.6330 data: 0.1479 max mem: 21002 Epoch: [281] [ 30/312] eta: 0:03:56 lr: 0.000046 min_lr: 0.000046 loss: 3.7914 (3.6112) weight_decay: 0.0500 (0.0500) time: 0.4228 data: 0.0433 max mem: 21002 Epoch: [281] [ 40/312] eta: 0:03:17 lr: 0.000046 min_lr: 0.000046 loss: 3.7391 (3.6263) weight_decay: 0.0500 (0.0500) time: 0.3325 data: 0.0011 max mem: 21002 Epoch: [281] [ 50/312] eta: 0:03:06 lr: 0.000046 min_lr: 0.000046 loss: 3.7964 (3.6422) weight_decay: 0.0500 (0.0500) time: 0.5148 data: 0.1101 max mem: 21002 Epoch: [281] [ 60/312] eta: 0:02:42 lr: 0.000045 min_lr: 0.000045 loss: 3.6622 (3.6296) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.1229 max mem: 21002 Epoch: [281] [ 70/312] eta: 0:02:40 lr: 0.000045 min_lr: 0.000045 loss: 3.7213 (3.6213) weight_decay: 0.0500 (0.0500) time: 0.5368 data: 0.1322 max mem: 21002 Epoch: [281] [ 80/312] eta: 0:02:28 lr: 0.000045 min_lr: 0.000045 loss: 3.7074 (3.6008) weight_decay: 0.0500 (0.0500) time: 0.6270 data: 0.1191 max mem: 21002 Epoch: [281] [ 90/312] eta: 0:02:17 lr: 0.000045 min_lr: 0.000045 loss: 3.2943 (3.5803) weight_decay: 0.0500 (0.0500) time: 0.4729 data: 0.0769 max mem: 21002 Epoch: [281] [100/312] eta: 0:02:12 lr: 0.000045 min_lr: 0.000045 loss: 3.6911 (3.5907) weight_decay: 0.0500 (0.0500) time: 0.5668 data: 0.1399 max mem: 21002 Epoch: [281] [110/312] eta: 0:02:00 lr: 0.000045 min_lr: 0.000045 loss: 3.7382 (3.6082) weight_decay: 0.0500 (0.0500) time: 0.4851 data: 0.0681 max mem: 21002 Epoch: [281] [120/312] eta: 0:01:55 lr: 0.000044 min_lr: 0.000044 loss: 3.7382 (3.5994) weight_decay: 0.0500 (0.0500) time: 0.4798 data: 0.0767 max mem: 21002 Epoch: [281] [130/312] eta: 0:01:50 lr: 0.000044 min_lr: 0.000044 loss: 3.4197 (3.5798) weight_decay: 0.0500 (0.0500) time: 0.6820 data: 0.1408 max mem: 21002 Epoch: [281] [140/312] eta: 0:01:40 lr: 0.000044 min_lr: 0.000044 loss: 3.5313 (3.5753) weight_decay: 0.0500 (0.0500) time: 0.4908 data: 0.0692 max mem: 21002 Epoch: [281] [150/312] eta: 0:01:35 lr: 0.000044 min_lr: 0.000044 loss: 3.7480 (3.5964) weight_decay: 0.0500 (0.0500) time: 0.4667 data: 0.0676 max mem: 21002 Epoch: [281] [160/312] eta: 0:01:29 lr: 0.000044 min_lr: 0.000044 loss: 3.8970 (3.6032) weight_decay: 0.0500 (0.0500) time: 0.6068 data: 0.0683 max mem: 21002 Epoch: [281] [170/312] eta: 0:01:22 lr: 0.000044 min_lr: 0.000044 loss: 3.8171 (3.6060) weight_decay: 0.0500 (0.0500) time: 0.5243 data: 0.0591 max mem: 21002 Epoch: [281] [180/312] eta: 0:01:17 lr: 0.000044 min_lr: 0.000044 loss: 3.8281 (3.6164) weight_decay: 0.0500 (0.0500) time: 0.5679 data: 0.1164 max mem: 21002 Epoch: [281] [190/312] eta: 0:01:09 lr: 0.000043 min_lr: 0.000043 loss: 3.8281 (3.6168) weight_decay: 0.0500 (0.0500) time: 0.4898 data: 0.0588 max mem: 21002 Epoch: [281] [200/312] eta: 0:01:04 lr: 0.000043 min_lr: 0.000043 loss: 3.8039 (3.6175) weight_decay: 0.0500 (0.0500) time: 0.4916 data: 0.0461 max mem: 21002 Epoch: [281] [210/312] eta: 0:00:59 lr: 0.000043 min_lr: 0.000043 loss: 3.7203 (3.6156) weight_decay: 0.0500 (0.0500) time: 0.6312 data: 0.1018 max mem: 21002 Epoch: [281] [220/312] eta: 0:00:52 lr: 0.000043 min_lr: 0.000043 loss: 3.7479 (3.6161) weight_decay: 0.0500 (0.0500) time: 0.5084 data: 0.0570 max mem: 21002 Epoch: [281] [230/312] eta: 0:00:47 lr: 0.000043 min_lr: 0.000043 loss: 3.6831 (3.6112) weight_decay: 0.0500 (0.0500) time: 0.5230 data: 0.0291 max mem: 21002 Epoch: [281] [240/312] eta: 0:00:41 lr: 0.000043 min_lr: 0.000043 loss: 3.6595 (3.6165) weight_decay: 0.0500 (0.0500) time: 0.5600 data: 0.0286 max mem: 21002 Epoch: [281] [250/312] eta: 0:00:35 lr: 0.000043 min_lr: 0.000043 loss: 3.5057 (3.6141) weight_decay: 0.0500 (0.0500) time: 0.5317 data: 0.0450 max mem: 21002 Epoch: [281] [260/312] eta: 0:00:29 lr: 0.000042 min_lr: 0.000042 loss: 3.5057 (3.6195) weight_decay: 0.0500 (0.0500) time: 0.5679 data: 0.0875 max mem: 21002 Epoch: [281] [270/312] eta: 0:00:23 lr: 0.000042 min_lr: 0.000042 loss: 3.5310 (3.6062) weight_decay: 0.0500 (0.0500) time: 0.5238 data: 0.0440 max mem: 21002 Epoch: [281] [280/312] eta: 0:00:18 lr: 0.000042 min_lr: 0.000042 loss: 3.2179 (3.5968) weight_decay: 0.0500 (0.0500) time: 0.5971 data: 0.0552 max mem: 21002 Epoch: [281] [290/312] eta: 0:00:12 lr: 0.000042 min_lr: 0.000042 loss: 3.3775 (3.5960) weight_decay: 0.0500 (0.0500) time: 0.5524 data: 0.0629 max mem: 21002 Epoch: [281] [300/312] eta: 0:00:06 lr: 0.000042 min_lr: 0.000042 loss: 3.6684 (3.6052) weight_decay: 0.0500 (0.0500) time: 0.3396 data: 0.0089 max mem: 21002 Epoch: [281] [310/312] eta: 0:00:01 lr: 0.000042 min_lr: 0.000042 loss: 3.8474 (3.6084) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [281] [311/312] eta: 0:00:00 lr: 0.000042 min_lr: 0.000042 loss: 3.8474 (3.6084) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [281] Total time: 0:02:51 (0.5503 s / it) Averaged stats: lr: 0.000042 min_lr: 0.000042 loss: 3.8474 (3.5969) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.1083 (1.1083) acc1: 81.2500 (81.2500) acc5: 95.8333 (95.8333) time: 8.8682 data: 8.7500 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4293 (1.3371) acc1: 73.0469 (73.7280) acc5: 91.1458 (91.9360) time: 1.0733 data: 0.9723 max mem: 21002 Test: Total time: 0:00:09 (1.1013 s / it) * Acc@1 73.702 Acc@5 92.020 loss 1.343 Accuracy of the model on the 50000 test images: 73.7% Max accuracy: 73.71% Epoch: [282] [ 0/312] eta: 1:03:27 lr: 0.000042 min_lr: 0.000042 loss: 3.5746 (3.5746) weight_decay: 0.0500 (0.0500) time: 12.2039 data: 10.4284 max mem: 21002 Epoch: [282] [ 10/312] eta: 0:07:55 lr: 0.000041 min_lr: 0.000041 loss: 3.6864 (3.5272) weight_decay: 0.0500 (0.0500) time: 1.5757 data: 1.0435 max mem: 21002 Epoch: [282] [ 20/312] eta: 0:05:32 lr: 0.000041 min_lr: 0.000041 loss: 3.6527 (3.4874) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.1475 max mem: 21002 Epoch: [282] [ 30/312] eta: 0:04:04 lr: 0.000041 min_lr: 0.000041 loss: 3.5054 (3.4740) weight_decay: 0.0500 (0.0500) time: 0.4752 data: 0.0954 max mem: 21002 Epoch: [282] [ 40/312] eta: 0:03:28 lr: 0.000041 min_lr: 0.000041 loss: 3.6432 (3.5322) weight_decay: 0.0500 (0.0500) time: 0.3778 data: 0.0291 max mem: 21002 Epoch: [282] [ 50/312] eta: 0:03:19 lr: 0.000041 min_lr: 0.000041 loss: 3.7923 (3.5429) weight_decay: 0.0500 (0.0500) time: 0.5976 data: 0.1237 max mem: 21002 Epoch: [282] [ 60/312] eta: 0:02:52 lr: 0.000041 min_lr: 0.000041 loss: 3.7080 (3.5681) weight_decay: 0.0500 (0.0500) time: 0.5087 data: 0.0954 max mem: 21002 Epoch: [282] [ 70/312] eta: 0:02:44 lr: 0.000041 min_lr: 0.000041 loss: 3.7080 (3.5634) weight_decay: 0.0500 (0.0500) time: 0.4766 data: 0.0941 max mem: 21002 Epoch: [282] [ 80/312] eta: 0:02:33 lr: 0.000040 min_lr: 0.000040 loss: 3.6864 (3.5730) weight_decay: 0.0500 (0.0500) time: 0.5965 data: 0.1163 max mem: 21002 Epoch: [282] [ 90/312] eta: 0:02:21 lr: 0.000040 min_lr: 0.000040 loss: 3.5638 (3.5600) weight_decay: 0.0500 (0.0500) time: 0.4781 data: 0.0961 max mem: 21002 Epoch: [282] [100/312] eta: 0:02:16 lr: 0.000040 min_lr: 0.000040 loss: 3.6003 (3.5834) weight_decay: 0.0500 (0.0500) time: 0.5766 data: 0.1452 max mem: 21002 Epoch: [282] [110/312] eta: 0:02:04 lr: 0.000040 min_lr: 0.000040 loss: 3.6095 (3.5770) weight_decay: 0.0500 (0.0500) time: 0.5137 data: 0.0800 max mem: 21002 Epoch: [282] [120/312] eta: 0:01:58 lr: 0.000040 min_lr: 0.000040 loss: 3.5635 (3.5732) weight_decay: 0.0500 (0.0500) time: 0.4687 data: 0.0548 max mem: 21002 Epoch: [282] [130/312] eta: 0:01:53 lr: 0.000040 min_lr: 0.000040 loss: 3.6227 (3.5751) weight_decay: 0.0500 (0.0500) time: 0.6659 data: 0.1120 max mem: 21002 Epoch: [282] [140/312] eta: 0:01:43 lr: 0.000040 min_lr: 0.000040 loss: 3.6910 (3.5583) weight_decay: 0.0500 (0.0500) time: 0.5026 data: 0.0761 max mem: 21002 Epoch: [282] [150/312] eta: 0:01:38 lr: 0.000040 min_lr: 0.000040 loss: 3.0700 (3.5456) weight_decay: 0.0500 (0.0500) time: 0.5224 data: 0.0891 max mem: 21002 Epoch: [282] [160/312] eta: 0:01:32 lr: 0.000039 min_lr: 0.000039 loss: 3.5941 (3.5595) weight_decay: 0.0500 (0.0500) time: 0.6504 data: 0.0788 max mem: 21002 Epoch: [282] [170/312] eta: 0:01:23 lr: 0.000039 min_lr: 0.000039 loss: 3.8284 (3.5667) weight_decay: 0.0500 (0.0500) time: 0.4509 data: 0.0257 max mem: 21002 Epoch: [282] [180/312] eta: 0:01:19 lr: 0.000039 min_lr: 0.000039 loss: 3.7574 (3.5734) weight_decay: 0.0500 (0.0500) time: 0.5599 data: 0.1131 max mem: 21002 Epoch: [282] [190/312] eta: 0:01:11 lr: 0.000039 min_lr: 0.000039 loss: 3.6784 (3.5595) weight_decay: 0.0500 (0.0500) time: 0.5549 data: 0.1082 max mem: 21002 Epoch: [282] [200/312] eta: 0:01:05 lr: 0.000039 min_lr: 0.000039 loss: 3.6774 (3.5625) weight_decay: 0.0500 (0.0500) time: 0.4395 data: 0.0314 max mem: 21002 Epoch: [282] [210/312] eta: 0:01:00 lr: 0.000039 min_lr: 0.000039 loss: 3.7429 (3.5634) weight_decay: 0.0500 (0.0500) time: 0.6465 data: 0.0613 max mem: 21002 Epoch: [282] [220/312] eta: 0:00:53 lr: 0.000039 min_lr: 0.000039 loss: 3.7341 (3.5725) weight_decay: 0.0500 (0.0500) time: 0.5643 data: 0.0983 max mem: 21002 Epoch: [282] [230/312] eta: 0:00:48 lr: 0.000038 min_lr: 0.000038 loss: 3.7341 (3.5741) weight_decay: 0.0500 (0.0500) time: 0.5231 data: 0.0723 max mem: 21002 Epoch: [282] [240/312] eta: 0:00:42 lr: 0.000038 min_lr: 0.000038 loss: 3.5200 (3.5726) weight_decay: 0.0500 (0.0500) time: 0.6653 data: 0.0514 max mem: 21002 Epoch: [282] [250/312] eta: 0:00:35 lr: 0.000038 min_lr: 0.000038 loss: 3.3319 (3.5596) weight_decay: 0.0500 (0.0500) time: 0.5104 data: 0.0591 max mem: 21002 Epoch: [282] [260/312] eta: 0:00:30 lr: 0.000038 min_lr: 0.000038 loss: 3.0955 (3.5484) weight_decay: 0.0500 (0.0500) time: 0.5191 data: 0.0640 max mem: 21002 Epoch: [282] [270/312] eta: 0:00:24 lr: 0.000038 min_lr: 0.000038 loss: 3.0955 (3.5387) weight_decay: 0.0500 (0.0500) time: 0.5112 data: 0.0532 max mem: 21002 Epoch: [282] [280/312] eta: 0:00:18 lr: 0.000038 min_lr: 0.000038 loss: 3.1838 (3.5420) weight_decay: 0.0500 (0.0500) time: 0.4920 data: 0.0706 max mem: 21002 Epoch: [282] [290/312] eta: 0:00:12 lr: 0.000038 min_lr: 0.000038 loss: 3.7427 (3.5445) weight_decay: 0.0500 (0.0500) time: 0.5395 data: 0.0821 max mem: 21002 Epoch: [282] [300/312] eta: 0:00:06 lr: 0.000037 min_lr: 0.000037 loss: 3.5066 (3.5359) weight_decay: 0.0500 (0.0500) time: 0.3533 data: 0.0329 max mem: 21002 Epoch: [282] [310/312] eta: 0:00:01 lr: 0.000037 min_lr: 0.000037 loss: 3.3262 (3.5290) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [282] [311/312] eta: 0:00:00 lr: 0.000037 min_lr: 0.000037 loss: 3.3262 (3.5306) weight_decay: 0.0500 (0.0500) time: 0.2769 data: 0.0001 max mem: 21002 Epoch: [282] Total time: 0:02:53 (0.5566 s / it) Averaged stats: lr: 0.000037 min_lr: 0.000037 loss: 3.3262 (3.5708) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 0.9875 (0.9875) acc1: 81.3802 (81.3802) acc5: 95.9635 (95.9635) time: 8.4842 data: 8.3652 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3176 (1.2318) acc1: 73.5677 (74.0480) acc5: 91.6667 (92.0800) time: 1.0279 data: 0.9295 max mem: 21002 Test: Total time: 0:00:09 (1.0379 s / it) * Acc@1 73.986 Acc@5 92.112 loss 1.236 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 73.99% Epoch: [283] [ 0/312] eta: 1:01:51 lr: 0.000037 min_lr: 0.000037 loss: 3.8518 (3.8518) weight_decay: 0.0500 (0.0500) time: 11.8950 data: 11.5969 max mem: 21002 Epoch: [283] [ 10/312] eta: 0:08:32 lr: 0.000037 min_lr: 0.000037 loss: 3.6693 (3.5963) weight_decay: 0.0500 (0.0500) time: 1.6954 data: 1.0548 max mem: 21002 Epoch: [283] [ 20/312] eta: 0:05:03 lr: 0.000037 min_lr: 0.000037 loss: 3.6693 (3.5998) weight_decay: 0.0500 (0.0500) time: 0.4954 data: 0.0058 max mem: 21002 Epoch: [283] [ 30/312] eta: 0:03:45 lr: 0.000037 min_lr: 0.000037 loss: 3.6971 (3.5850) weight_decay: 0.0500 (0.0500) time: 0.3051 data: 0.0062 max mem: 21002 Epoch: [283] [ 40/312] eta: 0:03:03 lr: 0.000037 min_lr: 0.000037 loss: 3.6853 (3.5933) weight_decay: 0.0500 (0.0500) time: 0.2936 data: 0.0012 max mem: 21002 Epoch: [283] [ 50/312] eta: 0:02:53 lr: 0.000037 min_lr: 0.000037 loss: 3.7177 (3.6093) weight_decay: 0.0500 (0.0500) time: 0.4521 data: 0.1067 max mem: 21002 Epoch: [283] [ 60/312] eta: 0:02:35 lr: 0.000036 min_lr: 0.000036 loss: 3.6680 (3.5782) weight_decay: 0.0500 (0.0500) time: 0.4941 data: 0.1243 max mem: 21002 Epoch: [283] [ 70/312] eta: 0:02:29 lr: 0.000036 min_lr: 0.000036 loss: 3.5522 (3.5660) weight_decay: 0.0500 (0.0500) time: 0.5089 data: 0.1280 max mem: 21002 Epoch: [283] [ 80/312] eta: 0:02:25 lr: 0.000036 min_lr: 0.000036 loss: 3.7652 (3.6120) weight_decay: 0.0500 (0.0500) time: 0.6577 data: 0.1863 max mem: 21002 Epoch: [283] [ 90/312] eta: 0:02:14 lr: 0.000036 min_lr: 0.000036 loss: 3.8056 (3.6174) weight_decay: 0.0500 (0.0500) time: 0.5483 data: 0.1017 max mem: 21002 Epoch: [283] [100/312] eta: 0:02:08 lr: 0.000036 min_lr: 0.000036 loss: 3.7608 (3.5995) weight_decay: 0.0500 (0.0500) time: 0.5213 data: 0.0581 max mem: 21002 Epoch: [283] [110/312] eta: 0:01:57 lr: 0.000036 min_lr: 0.000036 loss: 3.1185 (3.5573) weight_decay: 0.0500 (0.0500) time: 0.4894 data: 0.0709 max mem: 21002 Epoch: [283] [120/312] eta: 0:01:52 lr: 0.000036 min_lr: 0.000036 loss: 3.2324 (3.5603) weight_decay: 0.0500 (0.0500) time: 0.4898 data: 0.0724 max mem: 21002 Epoch: [283] [130/312] eta: 0:01:47 lr: 0.000036 min_lr: 0.000036 loss: 3.6979 (3.5494) weight_decay: 0.0500 (0.0500) time: 0.6292 data: 0.0887 max mem: 21002 Epoch: [283] [140/312] eta: 0:01:38 lr: 0.000035 min_lr: 0.000035 loss: 3.6942 (3.5568) weight_decay: 0.0500 (0.0500) time: 0.5097 data: 0.0919 max mem: 21002 Epoch: [283] [150/312] eta: 0:01:34 lr: 0.000035 min_lr: 0.000035 loss: 3.6942 (3.5649) weight_decay: 0.0500 (0.0500) time: 0.5271 data: 0.1007 max mem: 21002 Epoch: [283] [160/312] eta: 0:01:27 lr: 0.000035 min_lr: 0.000035 loss: 3.5958 (3.5585) weight_decay: 0.0500 (0.0500) time: 0.6016 data: 0.0954 max mem: 21002 Epoch: [283] [170/312] eta: 0:01:20 lr: 0.000035 min_lr: 0.000035 loss: 3.6187 (3.5585) weight_decay: 0.0500 (0.0500) time: 0.4702 data: 0.0740 max mem: 21002 Epoch: [283] [180/312] eta: 0:01:15 lr: 0.000035 min_lr: 0.000035 loss: 3.7579 (3.5692) weight_decay: 0.0500 (0.0500) time: 0.5489 data: 0.1152 max mem: 21002 Epoch: [283] [190/312] eta: 0:01:08 lr: 0.000035 min_lr: 0.000035 loss: 3.8786 (3.5779) weight_decay: 0.0500 (0.0500) time: 0.5126 data: 0.0739 max mem: 21002 Epoch: [283] [200/312] eta: 0:01:03 lr: 0.000035 min_lr: 0.000035 loss: 3.8148 (3.5845) weight_decay: 0.0500 (0.0500) time: 0.4929 data: 0.0489 max mem: 21002 Epoch: [283] [210/312] eta: 0:00:58 lr: 0.000034 min_lr: 0.000034 loss: 3.7090 (3.5839) weight_decay: 0.0500 (0.0500) time: 0.6479 data: 0.0871 max mem: 21002 Epoch: [283] [220/312] eta: 0:00:51 lr: 0.000034 min_lr: 0.000034 loss: 3.7090 (3.5929) weight_decay: 0.0500 (0.0500) time: 0.5151 data: 0.0493 max mem: 21002 Epoch: [283] [230/312] eta: 0:00:46 lr: 0.000034 min_lr: 0.000034 loss: 3.7768 (3.5984) weight_decay: 0.0500 (0.0500) time: 0.4981 data: 0.0743 max mem: 21002 Epoch: [283] [240/312] eta: 0:00:40 lr: 0.000034 min_lr: 0.000034 loss: 3.8320 (3.6168) weight_decay: 0.0500 (0.0500) time: 0.5967 data: 0.0649 max mem: 21002 Epoch: [283] [250/312] eta: 0:00:34 lr: 0.000034 min_lr: 0.000034 loss: 3.8935 (3.6261) weight_decay: 0.0500 (0.0500) time: 0.5389 data: 0.0697 max mem: 21002 Epoch: [283] [260/312] eta: 0:00:29 lr: 0.000034 min_lr: 0.000034 loss: 3.8741 (3.6229) weight_decay: 0.0500 (0.0500) time: 0.5480 data: 0.1165 max mem: 21002 Epoch: [283] [270/312] eta: 0:00:23 lr: 0.000034 min_lr: 0.000034 loss: 3.6531 (3.6211) weight_decay: 0.0500 (0.0500) time: 0.4950 data: 0.0539 max mem: 21002 Epoch: [283] [280/312] eta: 0:00:17 lr: 0.000034 min_lr: 0.000034 loss: 3.6531 (3.6226) weight_decay: 0.0500 (0.0500) time: 0.4917 data: 0.0419 max mem: 21002 Epoch: [283] [290/312] eta: 0:00:12 lr: 0.000033 min_lr: 0.000033 loss: 3.8954 (3.6335) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0567 max mem: 21002 Epoch: [283] [300/312] eta: 0:00:06 lr: 0.000033 min_lr: 0.000033 loss: 3.7894 (3.6348) weight_decay: 0.0500 (0.0500) time: 0.4811 data: 0.0209 max mem: 21002 Epoch: [283] [310/312] eta: 0:00:01 lr: 0.000033 min_lr: 0.000033 loss: 3.6824 (3.6303) weight_decay: 0.0500 (0.0500) time: 0.3225 data: 0.0001 max mem: 21002 Epoch: [283] [311/312] eta: 0:00:00 lr: 0.000033 min_lr: 0.000033 loss: 3.7527 (3.6308) weight_decay: 0.0500 (0.0500) time: 0.3222 data: 0.0001 max mem: 21002 Epoch: [283] Total time: 0:02:50 (0.5479 s / it) Averaged stats: lr: 0.000033 min_lr: 0.000033 loss: 3.7527 (3.5814) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:13 loss: 1.0585 (1.0585) acc1: 81.3802 (81.3802) acc5: 96.0938 (96.0938) time: 8.1594 data: 8.0431 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3832 (1.3002) acc1: 73.3073 (74.0800) acc5: 92.0573 (92.2560) time: 1.0085 data: 0.9095 max mem: 21002 Test: Total time: 0:00:09 (1.0193 s / it) * Acc@1 73.912 Acc@5 92.152 loss 1.305 Accuracy of the model on the 50000 test images: 73.9% Max accuracy: 73.99% Epoch: [284] [ 0/312] eta: 0:57:15 lr: 0.000033 min_lr: 0.000033 loss: 3.6591 (3.6591) weight_decay: 0.0500 (0.0500) time: 11.0098 data: 9.0775 max mem: 21002 Epoch: [284] [ 10/312] eta: 0:07:48 lr: 0.000033 min_lr: 0.000033 loss: 3.5778 (3.4120) weight_decay: 0.0500 (0.0500) time: 1.5524 data: 1.1155 max mem: 21002 Epoch: [284] [ 20/312] eta: 0:05:11 lr: 0.000033 min_lr: 0.000033 loss: 3.6195 (3.5190) weight_decay: 0.0500 (0.0500) time: 0.5699 data: 0.2374 max mem: 21002 Epoch: [284] [ 30/312] eta: 0:03:50 lr: 0.000033 min_lr: 0.000033 loss: 3.7316 (3.5806) weight_decay: 0.0500 (0.0500) time: 0.4116 data: 0.0791 max mem: 21002 Epoch: [284] [ 40/312] eta: 0:03:28 lr: 0.000033 min_lr: 0.000033 loss: 3.7316 (3.5757) weight_decay: 0.0500 (0.0500) time: 0.4518 data: 0.0778 max mem: 21002 Epoch: [284] [ 50/312] eta: 0:03:18 lr: 0.000032 min_lr: 0.000032 loss: 3.7756 (3.5908) weight_decay: 0.0500 (0.0500) time: 0.6665 data: 0.1894 max mem: 21002 Epoch: [284] [ 60/312] eta: 0:02:51 lr: 0.000032 min_lr: 0.000032 loss: 3.7756 (3.5791) weight_decay: 0.0500 (0.0500) time: 0.5043 data: 0.1134 max mem: 21002 Epoch: [284] [ 70/312] eta: 0:02:43 lr: 0.000032 min_lr: 0.000032 loss: 3.6401 (3.5496) weight_decay: 0.0500 (0.0500) time: 0.4660 data: 0.0699 max mem: 21002 Epoch: [284] [ 80/312] eta: 0:02:28 lr: 0.000032 min_lr: 0.000032 loss: 3.4464 (3.5397) weight_decay: 0.0500 (0.0500) time: 0.5149 data: 0.0697 max mem: 21002 Epoch: [284] [ 90/312] eta: 0:02:22 lr: 0.000032 min_lr: 0.000032 loss: 3.6905 (3.5541) weight_decay: 0.0500 (0.0500) time: 0.5156 data: 0.1042 max mem: 21002 Epoch: [284] [100/312] eta: 0:02:17 lr: 0.000032 min_lr: 0.000032 loss: 3.6459 (3.5447) weight_decay: 0.0500 (0.0500) time: 0.6734 data: 0.1801 max mem: 21002 Epoch: [284] [110/312] eta: 0:02:04 lr: 0.000032 min_lr: 0.000032 loss: 3.5967 (3.5386) weight_decay: 0.0500 (0.0500) time: 0.4961 data: 0.0765 max mem: 21002 Epoch: [284] [120/312] eta: 0:01:59 lr: 0.000032 min_lr: 0.000032 loss: 3.3890 (3.5242) weight_decay: 0.0500 (0.0500) time: 0.4875 data: 0.0665 max mem: 21002 Epoch: [284] [130/312] eta: 0:01:53 lr: 0.000031 min_lr: 0.000031 loss: 3.5455 (3.5375) weight_decay: 0.0500 (0.0500) time: 0.6865 data: 0.1044 max mem: 21002 Epoch: [284] [140/312] eta: 0:01:43 lr: 0.000031 min_lr: 0.000031 loss: 3.5886 (3.5374) weight_decay: 0.0500 (0.0500) time: 0.4921 data: 0.0390 max mem: 21002 Epoch: [284] [150/312] eta: 0:01:36 lr: 0.000031 min_lr: 0.000031 loss: 3.5554 (3.5452) weight_decay: 0.0500 (0.0500) time: 0.4206 data: 0.0083 max mem: 21002 Epoch: [284] [160/312] eta: 0:01:30 lr: 0.000031 min_lr: 0.000031 loss: 3.8431 (3.5752) weight_decay: 0.0500 (0.0500) time: 0.5458 data: 0.0080 max mem: 21002 Epoch: [284] [170/312] eta: 0:01:23 lr: 0.000031 min_lr: 0.000031 loss: 3.9767 (3.5821) weight_decay: 0.0500 (0.0500) time: 0.4832 data: 0.0641 max mem: 21002 Epoch: [284] [180/312] eta: 0:01:18 lr: 0.000031 min_lr: 0.000031 loss: 3.8747 (3.5893) weight_decay: 0.0500 (0.0500) time: 0.5773 data: 0.1469 max mem: 21002 Epoch: [284] [190/312] eta: 0:01:10 lr: 0.000031 min_lr: 0.000031 loss: 3.6416 (3.5862) weight_decay: 0.0500 (0.0500) time: 0.5107 data: 0.0837 max mem: 21002 Epoch: [284] [200/312] eta: 0:01:05 lr: 0.000031 min_lr: 0.000031 loss: 3.6416 (3.5963) weight_decay: 0.0500 (0.0500) time: 0.4905 data: 0.0401 max mem: 21002 Epoch: [284] [210/312] eta: 0:00:59 lr: 0.000030 min_lr: 0.000030 loss: 3.7678 (3.6043) weight_decay: 0.0500 (0.0500) time: 0.6526 data: 0.0472 max mem: 21002 Epoch: [284] [220/312] eta: 0:00:52 lr: 0.000030 min_lr: 0.000030 loss: 3.7349 (3.6140) weight_decay: 0.0500 (0.0500) time: 0.4666 data: 0.0246 max mem: 21002 Epoch: [284] [230/312] eta: 0:00:47 lr: 0.000030 min_lr: 0.000030 loss: 3.7125 (3.6137) weight_decay: 0.0500 (0.0500) time: 0.4904 data: 0.0290 max mem: 21002 Epoch: [284] [240/312] eta: 0:00:41 lr: 0.000030 min_lr: 0.000030 loss: 3.7165 (3.6225) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0274 max mem: 21002 Epoch: [284] [250/312] eta: 0:00:35 lr: 0.000030 min_lr: 0.000030 loss: 3.8765 (3.6293) weight_decay: 0.0500 (0.0500) time: 0.5117 data: 0.0447 max mem: 21002 Epoch: [284] [260/312] eta: 0:00:29 lr: 0.000030 min_lr: 0.000030 loss: 3.7874 (3.6280) weight_decay: 0.0500 (0.0500) time: 0.5498 data: 0.0510 max mem: 21002 Epoch: [284] [270/312] eta: 0:00:23 lr: 0.000030 min_lr: 0.000030 loss: 3.7671 (3.6302) weight_decay: 0.0500 (0.0500) time: 0.4438 data: 0.0220 max mem: 21002 Epoch: [284] [280/312] eta: 0:00:18 lr: 0.000030 min_lr: 0.000030 loss: 3.6828 (3.6203) weight_decay: 0.0500 (0.0500) time: 0.5771 data: 0.1186 max mem: 21002 Epoch: [284] [290/312] eta: 0:00:12 lr: 0.000030 min_lr: 0.000030 loss: 3.4660 (3.6159) weight_decay: 0.0500 (0.0500) time: 0.6446 data: 0.1184 max mem: 21002 Epoch: [284] [300/312] eta: 0:00:06 lr: 0.000029 min_lr: 0.000029 loss: 3.4660 (3.6099) weight_decay: 0.0500 (0.0500) time: 0.3619 data: 0.0113 max mem: 21002 Epoch: [284] [310/312] eta: 0:00:01 lr: 0.000029 min_lr: 0.000029 loss: 3.4282 (3.6052) weight_decay: 0.0500 (0.0500) time: 0.2877 data: 0.0113 max mem: 21002 Epoch: [284] [311/312] eta: 0:00:00 lr: 0.000029 min_lr: 0.000029 loss: 3.4282 (3.6061) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [284] Total time: 0:02:52 (0.5522 s / it) Averaged stats: lr: 0.000029 min_lr: 0.000029 loss: 3.4282 (3.5721) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.0166 (1.0166) acc1: 81.2500 (81.2500) acc5: 96.0938 (96.0938) time: 8.6790 data: 8.5605 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3536 (1.2649) acc1: 73.1771 (74.1440) acc5: 91.6667 (92.2240) time: 1.0533 data: 0.9567 max mem: 21002 Test: Total time: 0:00:09 (1.0634 s / it) * Acc@1 73.890 Acc@5 92.200 loss 1.269 Accuracy of the model on the 50000 test images: 73.9% Max accuracy: 73.99% Epoch: [285] [ 0/312] eta: 1:01:04 lr: 0.000029 min_lr: 0.000029 loss: 2.7616 (2.7616) weight_decay: 0.0500 (0.0500) time: 11.7468 data: 8.4125 max mem: 21002 Epoch: [285] [ 10/312] eta: 0:08:24 lr: 0.000029 min_lr: 0.000029 loss: 3.6388 (3.3763) weight_decay: 0.0500 (0.0500) time: 1.6698 data: 1.0022 max mem: 21002 Epoch: [285] [ 20/312] eta: 0:05:13 lr: 0.000029 min_lr: 0.000029 loss: 3.6388 (3.4596) weight_decay: 0.0500 (0.0500) time: 0.5392 data: 0.1573 max mem: 21002 Epoch: [285] [ 30/312] eta: 0:03:51 lr: 0.000029 min_lr: 0.000029 loss: 3.6805 (3.5212) weight_decay: 0.0500 (0.0500) time: 0.3536 data: 0.0270 max mem: 21002 Epoch: [285] [ 40/312] eta: 0:03:11 lr: 0.000029 min_lr: 0.000029 loss: 3.7890 (3.5654) weight_decay: 0.0500 (0.0500) time: 0.3200 data: 0.0009 max mem: 21002 Epoch: [285] [ 50/312] eta: 0:03:04 lr: 0.000029 min_lr: 0.000029 loss: 3.7996 (3.5845) weight_decay: 0.0500 (0.0500) time: 0.5230 data: 0.0581 max mem: 21002 Epoch: [285] [ 60/312] eta: 0:02:40 lr: 0.000029 min_lr: 0.000029 loss: 3.8338 (3.5700) weight_decay: 0.0500 (0.0500) time: 0.4945 data: 0.0579 max mem: 21002 Epoch: [285] [ 70/312] eta: 0:02:34 lr: 0.000028 min_lr: 0.000028 loss: 3.7732 (3.5741) weight_decay: 0.0500 (0.0500) time: 0.4693 data: 0.0493 max mem: 21002 Epoch: [285] [ 80/312] eta: 0:02:26 lr: 0.000028 min_lr: 0.000028 loss: 3.6407 (3.5952) weight_decay: 0.0500 (0.0500) time: 0.6141 data: 0.0492 max mem: 21002 Epoch: [285] [ 90/312] eta: 0:02:13 lr: 0.000028 min_lr: 0.000028 loss: 3.6407 (3.5891) weight_decay: 0.0500 (0.0500) time: 0.4819 data: 0.0365 max mem: 21002 Epoch: [285] [100/312] eta: 0:02:10 lr: 0.000028 min_lr: 0.000028 loss: 3.3077 (3.5497) weight_decay: 0.0500 (0.0500) time: 0.5524 data: 0.0536 max mem: 21002 Epoch: [285] [110/312] eta: 0:01:58 lr: 0.000028 min_lr: 0.000028 loss: 3.4448 (3.5500) weight_decay: 0.0500 (0.0500) time: 0.5052 data: 0.0178 max mem: 21002 Epoch: [285] [120/312] eta: 0:01:54 lr: 0.000028 min_lr: 0.000028 loss: 3.4094 (3.5230) weight_decay: 0.0500 (0.0500) time: 0.4898 data: 0.0204 max mem: 21002 Epoch: [285] [130/312] eta: 0:01:49 lr: 0.000028 min_lr: 0.000028 loss: 3.5210 (3.5357) weight_decay: 0.0500 (0.0500) time: 0.7000 data: 0.0275 max mem: 21002 Epoch: [285] [140/312] eta: 0:01:40 lr: 0.000028 min_lr: 0.000028 loss: 3.8847 (3.5623) weight_decay: 0.0500 (0.0500) time: 0.5153 data: 0.0267 max mem: 21002 Epoch: [285] [150/312] eta: 0:01:34 lr: 0.000027 min_lr: 0.000027 loss: 3.6164 (3.5510) weight_decay: 0.0500 (0.0500) time: 0.4637 data: 0.0375 max mem: 21002 Epoch: [285] [160/312] eta: 0:01:28 lr: 0.000027 min_lr: 0.000027 loss: 3.5783 (3.5575) weight_decay: 0.0500 (0.0500) time: 0.5618 data: 0.0188 max mem: 21002 Epoch: [285] [170/312] eta: 0:01:21 lr: 0.000027 min_lr: 0.000027 loss: 3.7362 (3.5598) weight_decay: 0.0500 (0.0500) time: 0.4815 data: 0.0326 max mem: 21002 Epoch: [285] [180/312] eta: 0:01:16 lr: 0.000027 min_lr: 0.000027 loss: 3.6072 (3.5566) weight_decay: 0.0500 (0.0500) time: 0.5679 data: 0.0637 max mem: 21002 Epoch: [285] [190/312] eta: 0:01:09 lr: 0.000027 min_lr: 0.000027 loss: 3.8942 (3.5759) weight_decay: 0.0500 (0.0500) time: 0.5089 data: 0.0522 max mem: 21002 Epoch: [285] [200/312] eta: 0:01:04 lr: 0.000027 min_lr: 0.000027 loss: 3.7886 (3.5659) weight_decay: 0.0500 (0.0500) time: 0.5015 data: 0.0452 max mem: 21002 Epoch: [285] [210/312] eta: 0:00:58 lr: 0.000027 min_lr: 0.000027 loss: 3.5396 (3.5656) weight_decay: 0.0500 (0.0500) time: 0.6471 data: 0.0301 max mem: 21002 Epoch: [285] [220/312] eta: 0:00:52 lr: 0.000027 min_lr: 0.000027 loss: 3.5396 (3.5666) weight_decay: 0.0500 (0.0500) time: 0.5082 data: 0.0316 max mem: 21002 Epoch: [285] [230/312] eta: 0:00:46 lr: 0.000027 min_lr: 0.000027 loss: 3.8140 (3.5772) weight_decay: 0.0500 (0.0500) time: 0.5264 data: 0.0665 max mem: 21002 Epoch: [285] [240/312] eta: 0:00:40 lr: 0.000026 min_lr: 0.000026 loss: 3.9079 (3.5868) weight_decay: 0.0500 (0.0500) time: 0.5281 data: 0.0688 max mem: 21002 Epoch: [285] [250/312] eta: 0:00:35 lr: 0.000026 min_lr: 0.000026 loss: 3.8102 (3.5853) weight_decay: 0.0500 (0.0500) time: 0.5295 data: 0.0846 max mem: 21002 Epoch: [285] [260/312] eta: 0:00:29 lr: 0.000026 min_lr: 0.000026 loss: 3.6828 (3.5871) weight_decay: 0.0500 (0.0500) time: 0.6359 data: 0.0697 max mem: 21002 Epoch: [285] [270/312] eta: 0:00:23 lr: 0.000026 min_lr: 0.000026 loss: 3.6828 (3.5834) weight_decay: 0.0500 (0.0500) time: 0.4915 data: 0.0496 max mem: 21002 Epoch: [285] [280/312] eta: 0:00:18 lr: 0.000026 min_lr: 0.000026 loss: 3.7346 (3.5885) weight_decay: 0.0500 (0.0500) time: 0.5349 data: 0.0807 max mem: 21002 Epoch: [285] [290/312] eta: 0:00:12 lr: 0.000026 min_lr: 0.000026 loss: 3.7346 (3.5867) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0514 max mem: 21002 Epoch: [285] [300/312] eta: 0:00:06 lr: 0.000026 min_lr: 0.000026 loss: 3.7708 (3.5896) weight_decay: 0.0500 (0.0500) time: 0.4025 data: 0.0194 max mem: 21002 Epoch: [285] [310/312] eta: 0:00:01 lr: 0.000026 min_lr: 0.000026 loss: 3.7624 (3.5874) weight_decay: 0.0500 (0.0500) time: 0.3050 data: 0.0127 max mem: 21002 Epoch: [285] [311/312] eta: 0:00:00 lr: 0.000026 min_lr: 0.000026 loss: 3.6587 (3.5847) weight_decay: 0.0500 (0.0500) time: 0.2887 data: 0.0126 max mem: 21002 Epoch: [285] Total time: 0:02:51 (0.5492 s / it) Averaged stats: lr: 0.000026 min_lr: 0.000026 loss: 3.6587 (3.5750) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:15 loss: 1.0234 (1.0234) acc1: 81.5104 (81.5104) acc5: 95.9635 (95.9635) time: 8.4013 data: 8.2823 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3445 (1.2597) acc1: 73.5677 (74.1280) acc5: 92.0573 (92.0480) time: 1.0325 data: 0.9336 max mem: 21002 Test: Total time: 0:00:09 (1.0428 s / it) * Acc@1 73.882 Acc@5 92.176 loss 1.265 Accuracy of the model on the 50000 test images: 73.9% Max accuracy: 73.99% Epoch: [286] [ 0/312] eta: 1:00:13 lr: 0.000026 min_lr: 0.000026 loss: 3.3895 (3.3895) weight_decay: 0.0500 (0.0500) time: 11.5825 data: 9.1283 max mem: 21002 Epoch: [286] [ 10/312] eta: 0:07:56 lr: 0.000026 min_lr: 0.000026 loss: 3.6815 (3.7661) weight_decay: 0.0500 (0.0500) time: 1.5782 data: 0.9148 max mem: 21002 Epoch: [286] [ 20/312] eta: 0:05:27 lr: 0.000025 min_lr: 0.000025 loss: 3.6815 (3.6642) weight_decay: 0.0500 (0.0500) time: 0.5995 data: 0.1342 max mem: 21002 Epoch: [286] [ 30/312] eta: 0:04:01 lr: 0.000025 min_lr: 0.000025 loss: 3.6656 (3.5815) weight_decay: 0.0500 (0.0500) time: 0.4590 data: 0.0879 max mem: 21002 Epoch: [286] [ 40/312] eta: 0:03:18 lr: 0.000025 min_lr: 0.000025 loss: 3.0310 (3.5120) weight_decay: 0.0500 (0.0500) time: 0.3160 data: 0.0007 max mem: 21002 Epoch: [286] [ 50/312] eta: 0:03:05 lr: 0.000025 min_lr: 0.000025 loss: 3.5871 (3.5055) weight_decay: 0.0500 (0.0500) time: 0.4803 data: 0.1372 max mem: 21002 Epoch: [286] [ 60/312] eta: 0:02:41 lr: 0.000025 min_lr: 0.000025 loss: 3.6840 (3.5403) weight_decay: 0.0500 (0.0500) time: 0.4585 data: 0.1380 max mem: 21002 Epoch: [286] [ 70/312] eta: 0:02:36 lr: 0.000025 min_lr: 0.000025 loss: 3.9168 (3.5860) weight_decay: 0.0500 (0.0500) time: 0.4893 data: 0.1313 max mem: 21002 Epoch: [286] [ 80/312] eta: 0:02:30 lr: 0.000025 min_lr: 0.000025 loss: 3.9474 (3.6119) weight_decay: 0.0500 (0.0500) time: 0.6819 data: 0.2492 max mem: 21002 Epoch: [286] [ 90/312] eta: 0:02:15 lr: 0.000025 min_lr: 0.000025 loss: 3.6456 (3.6018) weight_decay: 0.0500 (0.0500) time: 0.4811 data: 0.1193 max mem: 21002 Epoch: [286] [100/312] eta: 0:02:11 lr: 0.000025 min_lr: 0.000025 loss: 3.3536 (3.5737) weight_decay: 0.0500 (0.0500) time: 0.5049 data: 0.1064 max mem: 21002 Epoch: [286] [110/312] eta: 0:01:59 lr: 0.000024 min_lr: 0.000024 loss: 3.3161 (3.5658) weight_decay: 0.0500 (0.0500) time: 0.5053 data: 0.1063 max mem: 21002 Epoch: [286] [120/312] eta: 0:01:55 lr: 0.000024 min_lr: 0.000024 loss: 3.4158 (3.5568) weight_decay: 0.0500 (0.0500) time: 0.5026 data: 0.1161 max mem: 21002 Epoch: [286] [130/312] eta: 0:01:51 lr: 0.000024 min_lr: 0.000024 loss: 3.6753 (3.5697) weight_decay: 0.0500 (0.0500) time: 0.7399 data: 0.2044 max mem: 21002 Epoch: [286] [140/312] eta: 0:01:41 lr: 0.000024 min_lr: 0.000024 loss: 3.9095 (3.5862) weight_decay: 0.0500 (0.0500) time: 0.5249 data: 0.0898 max mem: 21002 Epoch: [286] [150/312] eta: 0:01:35 lr: 0.000024 min_lr: 0.000024 loss: 3.8578 (3.5748) weight_decay: 0.0500 (0.0500) time: 0.4478 data: 0.0606 max mem: 21002 Epoch: [286] [160/312] eta: 0:01:30 lr: 0.000024 min_lr: 0.000024 loss: 3.5642 (3.5775) weight_decay: 0.0500 (0.0500) time: 0.6374 data: 0.1329 max mem: 21002 Epoch: [286] [170/312] eta: 0:01:22 lr: 0.000024 min_lr: 0.000024 loss: 3.7960 (3.5942) weight_decay: 0.0500 (0.0500) time: 0.5231 data: 0.1198 max mem: 21002 Epoch: [286] [180/312] eta: 0:01:17 lr: 0.000024 min_lr: 0.000024 loss: 3.6721 (3.5851) weight_decay: 0.0500 (0.0500) time: 0.5208 data: 0.1058 max mem: 21002 Epoch: [286] [190/312] eta: 0:01:09 lr: 0.000024 min_lr: 0.000024 loss: 3.5317 (3.5956) weight_decay: 0.0500 (0.0500) time: 0.4769 data: 0.0597 max mem: 21002 Epoch: [286] [200/312] eta: 0:01:04 lr: 0.000023 min_lr: 0.000023 loss: 3.7619 (3.5963) weight_decay: 0.0500 (0.0500) time: 0.4732 data: 0.0657 max mem: 21002 Epoch: [286] [210/312] eta: 0:00:59 lr: 0.000023 min_lr: 0.000023 loss: 3.6684 (3.5956) weight_decay: 0.0500 (0.0500) time: 0.6961 data: 0.1471 max mem: 21002 Epoch: [286] [220/312] eta: 0:00:52 lr: 0.000023 min_lr: 0.000023 loss: 3.6682 (3.5903) weight_decay: 0.0500 (0.0500) time: 0.5228 data: 0.0822 max mem: 21002 Epoch: [286] [230/312] eta: 0:00:47 lr: 0.000023 min_lr: 0.000023 loss: 3.5151 (3.5829) weight_decay: 0.0500 (0.0500) time: 0.4862 data: 0.0608 max mem: 21002 Epoch: [286] [240/312] eta: 0:00:41 lr: 0.000023 min_lr: 0.000023 loss: 3.5151 (3.5869) weight_decay: 0.0500 (0.0500) time: 0.6360 data: 0.0790 max mem: 21002 Epoch: [286] [250/312] eta: 0:00:35 lr: 0.000023 min_lr: 0.000023 loss: 3.8179 (3.5912) weight_decay: 0.0500 (0.0500) time: 0.4949 data: 0.0456 max mem: 21002 Epoch: [286] [260/312] eta: 0:00:29 lr: 0.000023 min_lr: 0.000023 loss: 3.8712 (3.5985) weight_decay: 0.0500 (0.0500) time: 0.5439 data: 0.0798 max mem: 21002 Epoch: [286] [270/312] eta: 0:00:23 lr: 0.000023 min_lr: 0.000023 loss: 3.8630 (3.5988) weight_decay: 0.0500 (0.0500) time: 0.5021 data: 0.0536 max mem: 21002 Epoch: [286] [280/312] eta: 0:00:18 lr: 0.000023 min_lr: 0.000023 loss: 3.7781 (3.6038) weight_decay: 0.0500 (0.0500) time: 0.4858 data: 0.0322 max mem: 21002 Epoch: [286] [290/312] eta: 0:00:12 lr: 0.000022 min_lr: 0.000022 loss: 3.6998 (3.6045) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0518 max mem: 21002 Epoch: [286] [300/312] eta: 0:00:06 lr: 0.000022 min_lr: 0.000022 loss: 3.6220 (3.6022) weight_decay: 0.0500 (0.0500) time: 0.3915 data: 0.0205 max mem: 21002 Epoch: [286] [310/312] eta: 0:00:01 lr: 0.000022 min_lr: 0.000022 loss: 3.6439 (3.6019) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [286] [311/312] eta: 0:00:00 lr: 0.000022 min_lr: 0.000022 loss: 3.6220 (3.6010) weight_decay: 0.0500 (0.0500) time: 0.2770 data: 0.0001 max mem: 21002 Epoch: [286] Total time: 0:02:51 (0.5506 s / it) Averaged stats: lr: 0.000022 min_lr: 0.000022 loss: 3.6220 (3.5739) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.0496 (1.0496) acc1: 81.3802 (81.3802) acc5: 95.9635 (95.9635) time: 8.5020 data: 8.3832 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3748 (1.2870) acc1: 73.3073 (73.8720) acc5: 91.6667 (92.1760) time: 1.0399 data: 0.9315 max mem: 21002 Test: Total time: 0:00:09 (1.0511 s / it) * Acc@1 73.842 Acc@5 92.160 loss 1.292 Accuracy of the model on the 50000 test images: 73.8% Max accuracy: 73.99% Epoch: [287] [ 0/312] eta: 1:03:50 lr: 0.000022 min_lr: 0.000022 loss: 4.2577 (4.2577) weight_decay: 0.0500 (0.0500) time: 12.2780 data: 9.0011 max mem: 21002 Epoch: [287] [ 10/312] eta: 0:08:35 lr: 0.000022 min_lr: 0.000022 loss: 3.9554 (3.7764) weight_decay: 0.0500 (0.0500) time: 1.7078 data: 1.0304 max mem: 21002 Epoch: [287] [ 20/312] eta: 0:05:06 lr: 0.000022 min_lr: 0.000022 loss: 3.6063 (3.6179) weight_decay: 0.0500 (0.0500) time: 0.4885 data: 0.1279 max mem: 21002 Epoch: [287] [ 30/312] eta: 0:03:46 lr: 0.000022 min_lr: 0.000022 loss: 3.4851 (3.5027) weight_decay: 0.0500 (0.0500) time: 0.3068 data: 0.0116 max mem: 21002 Epoch: [287] [ 40/312] eta: 0:03:04 lr: 0.000022 min_lr: 0.000022 loss: 3.4921 (3.5213) weight_decay: 0.0500 (0.0500) time: 0.2899 data: 0.0008 max mem: 21002 Epoch: [287] [ 50/312] eta: 0:02:58 lr: 0.000022 min_lr: 0.000022 loss: 3.6435 (3.5403) weight_decay: 0.0500 (0.0500) time: 0.4915 data: 0.1580 max mem: 21002 Epoch: [287] [ 60/312] eta: 0:02:37 lr: 0.000022 min_lr: 0.000022 loss: 3.5056 (3.5300) weight_decay: 0.0500 (0.0500) time: 0.5084 data: 0.1730 max mem: 21002 Epoch: [287] [ 70/312] eta: 0:02:32 lr: 0.000022 min_lr: 0.000022 loss: 3.3590 (3.5175) weight_decay: 0.0500 (0.0500) time: 0.4944 data: 0.1622 max mem: 21002 Epoch: [287] [ 80/312] eta: 0:02:24 lr: 0.000021 min_lr: 0.000021 loss: 3.3590 (3.5246) weight_decay: 0.0500 (0.0500) time: 0.6185 data: 0.2244 max mem: 21002 Epoch: [287] [ 90/312] eta: 0:02:13 lr: 0.000021 min_lr: 0.000021 loss: 3.6739 (3.5421) weight_decay: 0.0500 (0.0500) time: 0.4947 data: 0.1390 max mem: 21002 Epoch: [287] [100/312] eta: 0:02:07 lr: 0.000021 min_lr: 0.000021 loss: 3.7961 (3.5671) weight_decay: 0.0500 (0.0500) time: 0.5224 data: 0.1899 max mem: 21002 Epoch: [287] [110/312] eta: 0:01:57 lr: 0.000021 min_lr: 0.000021 loss: 3.8261 (3.5784) weight_decay: 0.0500 (0.0500) time: 0.4913 data: 0.1456 max mem: 21002 Epoch: [287] [120/312] eta: 0:01:51 lr: 0.000021 min_lr: 0.000021 loss: 3.6979 (3.5858) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.1693 max mem: 21002 Epoch: [287] [130/312] eta: 0:01:47 lr: 0.000021 min_lr: 0.000021 loss: 3.7343 (3.5918) weight_decay: 0.0500 (0.0500) time: 0.6595 data: 0.2836 max mem: 21002 Epoch: [287] [140/312] eta: 0:01:38 lr: 0.000021 min_lr: 0.000021 loss: 3.7561 (3.5788) weight_decay: 0.0500 (0.0500) time: 0.5226 data: 0.1365 max mem: 21002 Epoch: [287] [150/312] eta: 0:01:34 lr: 0.000021 min_lr: 0.000021 loss: 3.5806 (3.5714) weight_decay: 0.0500 (0.0500) time: 0.5052 data: 0.1361 max mem: 21002 Epoch: [287] [160/312] eta: 0:01:27 lr: 0.000021 min_lr: 0.000021 loss: 3.2235 (3.5432) weight_decay: 0.0500 (0.0500) time: 0.6036 data: 0.2399 max mem: 21002 Epoch: [287] [170/312] eta: 0:01:21 lr: 0.000020 min_lr: 0.000020 loss: 3.3367 (3.5498) weight_decay: 0.0500 (0.0500) time: 0.5232 data: 0.1321 max mem: 21002 Epoch: [287] [180/312] eta: 0:01:16 lr: 0.000020 min_lr: 0.000020 loss: 3.7342 (3.5541) weight_decay: 0.0500 (0.0500) time: 0.5643 data: 0.1231 max mem: 21002 Epoch: [287] [190/312] eta: 0:01:09 lr: 0.000020 min_lr: 0.000020 loss: 3.6911 (3.5419) weight_decay: 0.0500 (0.0500) time: 0.5032 data: 0.1052 max mem: 21002 Epoch: [287] [200/312] eta: 0:01:03 lr: 0.000020 min_lr: 0.000020 loss: 3.6911 (3.5465) weight_decay: 0.0500 (0.0500) time: 0.4997 data: 0.0632 max mem: 21002 Epoch: [287] [210/312] eta: 0:00:58 lr: 0.000020 min_lr: 0.000020 loss: 3.7322 (3.5477) weight_decay: 0.0500 (0.0500) time: 0.6323 data: 0.1116 max mem: 21002 Epoch: [287] [220/312] eta: 0:00:52 lr: 0.000020 min_lr: 0.000020 loss: 3.4025 (3.5373) weight_decay: 0.0500 (0.0500) time: 0.5569 data: 0.1046 max mem: 21002 Epoch: [287] [230/312] eta: 0:00:46 lr: 0.000020 min_lr: 0.000020 loss: 3.4109 (3.5377) weight_decay: 0.0500 (0.0500) time: 0.5347 data: 0.0694 max mem: 21002 Epoch: [287] [240/312] eta: 0:00:40 lr: 0.000020 min_lr: 0.000020 loss: 3.5785 (3.5432) weight_decay: 0.0500 (0.0500) time: 0.5111 data: 0.0659 max mem: 21002 Epoch: [287] [250/312] eta: 0:00:34 lr: 0.000020 min_lr: 0.000020 loss: 3.6747 (3.5472) weight_decay: 0.0500 (0.0500) time: 0.4881 data: 0.0986 max mem: 21002 Epoch: [287] [260/312] eta: 0:00:29 lr: 0.000020 min_lr: 0.000020 loss: 3.6747 (3.5505) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0854 max mem: 21002 Epoch: [287] [270/312] eta: 0:00:23 lr: 0.000020 min_lr: 0.000020 loss: 3.6700 (3.5483) weight_decay: 0.0500 (0.0500) time: 0.5629 data: 0.0746 max mem: 21002 Epoch: [287] [280/312] eta: 0:00:17 lr: 0.000019 min_lr: 0.000019 loss: 3.6700 (3.5518) weight_decay: 0.0500 (0.0500) time: 0.4917 data: 0.0998 max mem: 21002 Epoch: [287] [290/312] eta: 0:00:12 lr: 0.000019 min_lr: 0.000019 loss: 3.6642 (3.5543) weight_decay: 0.0500 (0.0500) time: 0.5700 data: 0.1552 max mem: 21002 Epoch: [287] [300/312] eta: 0:00:06 lr: 0.000019 min_lr: 0.000019 loss: 3.8229 (3.5681) weight_decay: 0.0500 (0.0500) time: 0.4738 data: 0.0960 max mem: 21002 Epoch: [287] [310/312] eta: 0:00:01 lr: 0.000019 min_lr: 0.000019 loss: 3.7843 (3.5658) weight_decay: 0.0500 (0.0500) time: 0.2797 data: 0.0001 max mem: 21002 Epoch: [287] [311/312] eta: 0:00:00 lr: 0.000019 min_lr: 0.000019 loss: 3.6389 (3.5646) weight_decay: 0.0500 (0.0500) time: 0.2796 data: 0.0001 max mem: 21002 Epoch: [287] Total time: 0:02:50 (0.5481 s / it) Averaged stats: lr: 0.000019 min_lr: 0.000019 loss: 3.6389 (3.5620) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:15 loss: 1.0420 (1.0420) acc1: 81.2500 (81.2500) acc5: 95.5729 (95.5729) time: 8.3554 data: 8.2365 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3628 (1.2812) acc1: 73.1771 (74.1120) acc5: 91.7969 (92.0480) time: 1.0157 data: 0.9153 max mem: 21002 Test: Total time: 0:00:09 (1.0261 s / it) * Acc@1 73.902 Acc@5 92.110 loss 1.286 Accuracy of the model on the 50000 test images: 73.9% Max accuracy: 73.99% Epoch: [288] [ 0/312] eta: 0:59:03 lr: 0.000019 min_lr: 0.000019 loss: 4.3655 (4.3655) weight_decay: 0.0500 (0.0500) time: 11.3578 data: 9.2528 max mem: 21002 Epoch: [288] [ 10/312] eta: 0:08:41 lr: 0.000019 min_lr: 0.000019 loss: 3.8909 (3.7931) weight_decay: 0.0500 (0.0500) time: 1.7283 data: 1.0727 max mem: 21002 Epoch: [288] [ 20/312] eta: 0:05:06 lr: 0.000019 min_lr: 0.000019 loss: 3.8088 (3.7324) weight_decay: 0.0500 (0.0500) time: 0.5325 data: 0.1320 max mem: 21002 Epoch: [288] [ 30/312] eta: 0:03:46 lr: 0.000019 min_lr: 0.000019 loss: 3.7128 (3.7230) weight_decay: 0.0500 (0.0500) time: 0.2950 data: 0.0050 max mem: 21002 Epoch: [288] [ 40/312] eta: 0:03:04 lr: 0.000019 min_lr: 0.000019 loss: 3.7508 (3.7204) weight_decay: 0.0500 (0.0500) time: 0.2884 data: 0.0008 max mem: 21002 Epoch: [288] [ 50/312] eta: 0:02:53 lr: 0.000019 min_lr: 0.000019 loss: 3.8521 (3.7168) weight_decay: 0.0500 (0.0500) time: 0.4449 data: 0.1090 max mem: 21002 Epoch: [288] [ 60/312] eta: 0:02:34 lr: 0.000019 min_lr: 0.000019 loss: 3.7568 (3.7024) weight_decay: 0.0500 (0.0500) time: 0.4849 data: 0.1472 max mem: 21002 Epoch: [288] [ 70/312] eta: 0:02:28 lr: 0.000018 min_lr: 0.000018 loss: 3.6901 (3.6790) weight_decay: 0.0500 (0.0500) time: 0.4883 data: 0.1270 max mem: 21002 Epoch: [288] [ 80/312] eta: 0:02:20 lr: 0.000018 min_lr: 0.000018 loss: 3.6357 (3.6802) weight_decay: 0.0500 (0.0500) time: 0.5874 data: 0.1239 max mem: 21002 Epoch: [288] [ 90/312] eta: 0:02:11 lr: 0.000018 min_lr: 0.000018 loss: 3.7091 (3.6788) weight_decay: 0.0500 (0.0500) time: 0.5163 data: 0.0899 max mem: 21002 Epoch: [288] [100/312] eta: 0:02:06 lr: 0.000018 min_lr: 0.000018 loss: 3.5397 (3.6419) weight_decay: 0.0500 (0.0500) time: 0.5539 data: 0.1343 max mem: 21002 Epoch: [288] [110/312] eta: 0:01:56 lr: 0.000018 min_lr: 0.000018 loss: 3.6044 (3.6462) weight_decay: 0.0500 (0.0500) time: 0.4940 data: 0.0915 max mem: 21002 Epoch: [288] [120/312] eta: 0:01:51 lr: 0.000018 min_lr: 0.000018 loss: 3.7210 (3.6373) weight_decay: 0.0500 (0.0500) time: 0.4963 data: 0.0782 max mem: 21002 Epoch: [288] [130/312] eta: 0:01:45 lr: 0.000018 min_lr: 0.000018 loss: 3.7210 (3.6355) weight_decay: 0.0500 (0.0500) time: 0.6249 data: 0.1241 max mem: 21002 Epoch: [288] [140/312] eta: 0:01:37 lr: 0.000018 min_lr: 0.000018 loss: 3.7812 (3.6366) weight_decay: 0.0500 (0.0500) time: 0.5019 data: 0.0842 max mem: 21002 Epoch: [288] [150/312] eta: 0:01:33 lr: 0.000018 min_lr: 0.000018 loss: 3.6118 (3.6241) weight_decay: 0.0500 (0.0500) time: 0.5306 data: 0.0963 max mem: 21002 Epoch: [288] [160/312] eta: 0:01:26 lr: 0.000018 min_lr: 0.000018 loss: 3.5830 (3.6175) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.1046 max mem: 21002 Epoch: [288] [170/312] eta: 0:01:20 lr: 0.000017 min_lr: 0.000017 loss: 3.7800 (3.6266) weight_decay: 0.0500 (0.0500) time: 0.4759 data: 0.0981 max mem: 21002 Epoch: [288] [180/312] eta: 0:01:14 lr: 0.000017 min_lr: 0.000017 loss: 3.8746 (3.6345) weight_decay: 0.0500 (0.0500) time: 0.5418 data: 0.1408 max mem: 21002 Epoch: [288] [190/312] eta: 0:01:08 lr: 0.000017 min_lr: 0.000017 loss: 3.8999 (3.6330) weight_decay: 0.0500 (0.0500) time: 0.5213 data: 0.1146 max mem: 21002 Epoch: [288] [200/312] eta: 0:01:03 lr: 0.000017 min_lr: 0.000017 loss: 3.8232 (3.6414) weight_decay: 0.0500 (0.0500) time: 0.5265 data: 0.1062 max mem: 21002 Epoch: [288] [210/312] eta: 0:00:57 lr: 0.000017 min_lr: 0.000017 loss: 3.8950 (3.6543) weight_decay: 0.0500 (0.0500) time: 0.6494 data: 0.1338 max mem: 21002 Epoch: [288] [220/312] eta: 0:00:51 lr: 0.000017 min_lr: 0.000017 loss: 3.6414 (3.6431) weight_decay: 0.0500 (0.0500) time: 0.5226 data: 0.1172 max mem: 21002 Epoch: [288] [230/312] eta: 0:00:46 lr: 0.000017 min_lr: 0.000017 loss: 3.4588 (3.6323) weight_decay: 0.0500 (0.0500) time: 0.5014 data: 0.1169 max mem: 21002 Epoch: [288] [240/312] eta: 0:00:40 lr: 0.000017 min_lr: 0.000017 loss: 3.4375 (3.6289) weight_decay: 0.0500 (0.0500) time: 0.5273 data: 0.0880 max mem: 21002 Epoch: [288] [250/312] eta: 0:00:34 lr: 0.000017 min_lr: 0.000017 loss: 3.6096 (3.6314) weight_decay: 0.0500 (0.0500) time: 0.5211 data: 0.1183 max mem: 21002 Epoch: [288] [260/312] eta: 0:00:29 lr: 0.000017 min_lr: 0.000017 loss: 3.7015 (3.6288) weight_decay: 0.0500 (0.0500) time: 0.6154 data: 0.1572 max mem: 21002 Epoch: [288] [270/312] eta: 0:00:23 lr: 0.000017 min_lr: 0.000017 loss: 3.8391 (3.6309) weight_decay: 0.0500 (0.0500) time: 0.5048 data: 0.1105 max mem: 21002 Epoch: [288] [280/312] eta: 0:00:17 lr: 0.000016 min_lr: 0.000016 loss: 3.8168 (3.6245) weight_decay: 0.0500 (0.0500) time: 0.5047 data: 0.1160 max mem: 21002 Epoch: [288] [290/312] eta: 0:00:12 lr: 0.000016 min_lr: 0.000016 loss: 3.5813 (3.6257) weight_decay: 0.0500 (0.0500) time: 0.6265 data: 0.1354 max mem: 21002 Epoch: [288] [300/312] eta: 0:00:06 lr: 0.000016 min_lr: 0.000016 loss: 3.6487 (3.6213) weight_decay: 0.0500 (0.0500) time: 0.4776 data: 0.0966 max mem: 21002 Epoch: [288] [310/312] eta: 0:00:01 lr: 0.000016 min_lr: 0.000016 loss: 3.8383 (3.6293) weight_decay: 0.0500 (0.0500) time: 0.3058 data: 0.0295 max mem: 21002 Epoch: [288] [311/312] eta: 0:00:00 lr: 0.000016 min_lr: 0.000016 loss: 3.8383 (3.6263) weight_decay: 0.0500 (0.0500) time: 0.2838 data: 0.0070 max mem: 21002 Epoch: [288] Total time: 0:02:50 (0.5462 s / it) Averaged stats: lr: 0.000016 min_lr: 0.000016 loss: 3.8383 (3.5739) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:21 loss: 1.0607 (1.0607) acc1: 80.7292 (80.7292) acc5: 95.5729 (95.5729) time: 9.0358 data: 8.9172 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3802 (1.2960) acc1: 73.0469 (73.6640) acc5: 90.7552 (91.8080) time: 1.0907 data: 0.9909 max mem: 21002 Test: Total time: 0:00:10 (1.1204 s / it) * Acc@1 73.712 Acc@5 91.964 loss 1.300 Accuracy of the model on the 50000 test images: 73.7% Max accuracy: 73.99% Epoch: [289] [ 0/312] eta: 1:03:17 lr: 0.000016 min_lr: 0.000016 loss: 2.9109 (2.9109) weight_decay: 0.0500 (0.0500) time: 12.1705 data: 11.2876 max mem: 21002 Epoch: [289] [ 10/312] eta: 0:07:27 lr: 0.000016 min_lr: 0.000016 loss: 3.4067 (3.4479) weight_decay: 0.0500 (0.0500) time: 1.4821 data: 1.0267 max mem: 21002 Epoch: [289] [ 20/312] eta: 0:05:12 lr: 0.000016 min_lr: 0.000016 loss: 3.4067 (3.5093) weight_decay: 0.0500 (0.0500) time: 0.5159 data: 0.0999 max mem: 21002 Epoch: [289] [ 30/312] eta: 0:03:51 lr: 0.000016 min_lr: 0.000016 loss: 3.8282 (3.5383) weight_decay: 0.0500 (0.0500) time: 0.4551 data: 0.1009 max mem: 21002 Epoch: [289] [ 40/312] eta: 0:03:36 lr: 0.000016 min_lr: 0.000016 loss: 3.6931 (3.5066) weight_decay: 0.0500 (0.0500) time: 0.5091 data: 0.0733 max mem: 21002 Epoch: [289] [ 50/312] eta: 0:03:20 lr: 0.000016 min_lr: 0.000016 loss: 3.4041 (3.4937) weight_decay: 0.0500 (0.0500) time: 0.6781 data: 0.1075 max mem: 21002 Epoch: [289] [ 60/312] eta: 0:02:53 lr: 0.000016 min_lr: 0.000016 loss: 3.6393 (3.5159) weight_decay: 0.0500 (0.0500) time: 0.4698 data: 0.0472 max mem: 21002 Epoch: [289] [ 70/312] eta: 0:02:45 lr: 0.000016 min_lr: 0.000016 loss: 3.5013 (3.5129) weight_decay: 0.0500 (0.0500) time: 0.4778 data: 0.0512 max mem: 21002 Epoch: [289] [ 80/312] eta: 0:02:35 lr: 0.000016 min_lr: 0.000016 loss: 3.4454 (3.5089) weight_decay: 0.0500 (0.0500) time: 0.6061 data: 0.0752 max mem: 21002 Epoch: [289] [ 90/312] eta: 0:02:21 lr: 0.000015 min_lr: 0.000015 loss: 3.5099 (3.5086) weight_decay: 0.0500 (0.0500) time: 0.4769 data: 0.0671 max mem: 21002 Epoch: [289] [100/312] eta: 0:02:15 lr: 0.000015 min_lr: 0.000015 loss: 3.5099 (3.4984) weight_decay: 0.0500 (0.0500) time: 0.5175 data: 0.0982 max mem: 21002 Epoch: [289] [110/312] eta: 0:02:02 lr: 0.000015 min_lr: 0.000015 loss: 3.3931 (3.4832) weight_decay: 0.0500 (0.0500) time: 0.4698 data: 0.0673 max mem: 21002 Epoch: [289] [120/312] eta: 0:01:57 lr: 0.000015 min_lr: 0.000015 loss: 3.7355 (3.5126) weight_decay: 0.0500 (0.0500) time: 0.4724 data: 0.0686 max mem: 21002 Epoch: [289] [130/312] eta: 0:01:51 lr: 0.000015 min_lr: 0.000015 loss: 3.8776 (3.5328) weight_decay: 0.0500 (0.0500) time: 0.6569 data: 0.1492 max mem: 21002 Epoch: [289] [140/312] eta: 0:01:41 lr: 0.000015 min_lr: 0.000015 loss: 3.7904 (3.5399) weight_decay: 0.0500 (0.0500) time: 0.4742 data: 0.0814 max mem: 21002 Epoch: [289] [150/312] eta: 0:01:36 lr: 0.000015 min_lr: 0.000015 loss: 3.5807 (3.5358) weight_decay: 0.0500 (0.0500) time: 0.4753 data: 0.1094 max mem: 21002 Epoch: [289] [160/312] eta: 0:01:31 lr: 0.000015 min_lr: 0.000015 loss: 3.4096 (3.5307) weight_decay: 0.0500 (0.0500) time: 0.6515 data: 0.2503 max mem: 21002 Epoch: [289] [170/312] eta: 0:01:22 lr: 0.000015 min_lr: 0.000015 loss: 3.3431 (3.5247) weight_decay: 0.0500 (0.0500) time: 0.4742 data: 0.1416 max mem: 21002 Epoch: [289] [180/312] eta: 0:01:17 lr: 0.000015 min_lr: 0.000015 loss: 3.3135 (3.5251) weight_decay: 0.0500 (0.0500) time: 0.4770 data: 0.1499 max mem: 21002 Epoch: [289] [190/312] eta: 0:01:09 lr: 0.000015 min_lr: 0.000015 loss: 3.8012 (3.5465) weight_decay: 0.0500 (0.0500) time: 0.4670 data: 0.1499 max mem: 21002 Epoch: [289] [200/312] eta: 0:01:04 lr: 0.000014 min_lr: 0.000014 loss: 3.8899 (3.5518) weight_decay: 0.0500 (0.0500) time: 0.4848 data: 0.1656 max mem: 21002 Epoch: [289] [210/312] eta: 0:00:59 lr: 0.000014 min_lr: 0.000014 loss: 3.5858 (3.5451) weight_decay: 0.0500 (0.0500) time: 0.6818 data: 0.3264 max mem: 21002 Epoch: [289] [220/312] eta: 0:00:52 lr: 0.000014 min_lr: 0.000014 loss: 3.6730 (3.5465) weight_decay: 0.0500 (0.0500) time: 0.4845 data: 0.1614 max mem: 21002 Epoch: [289] [230/312] eta: 0:00:47 lr: 0.000014 min_lr: 0.000014 loss: 3.7012 (3.5487) weight_decay: 0.0500 (0.0500) time: 0.5014 data: 0.1355 max mem: 21002 Epoch: [289] [240/312] eta: 0:00:41 lr: 0.000014 min_lr: 0.000014 loss: 3.6733 (3.5465) weight_decay: 0.0500 (0.0500) time: 0.6688 data: 0.2584 max mem: 21002 Epoch: [289] [250/312] eta: 0:00:35 lr: 0.000014 min_lr: 0.000014 loss: 3.4222 (3.5390) weight_decay: 0.0500 (0.0500) time: 0.4761 data: 0.1236 max mem: 21002 Epoch: [289] [260/312] eta: 0:00:29 lr: 0.000014 min_lr: 0.000014 loss: 3.3727 (3.5348) weight_decay: 0.0500 (0.0500) time: 0.4851 data: 0.1434 max mem: 21002 Epoch: [289] [270/312] eta: 0:00:23 lr: 0.000014 min_lr: 0.000014 loss: 3.6829 (3.5415) weight_decay: 0.0500 (0.0500) time: 0.4653 data: 0.1433 max mem: 21002 Epoch: [289] [280/312] eta: 0:00:18 lr: 0.000014 min_lr: 0.000014 loss: 3.8095 (3.5499) weight_decay: 0.0500 (0.0500) time: 0.5344 data: 0.1049 max mem: 21002 Epoch: [289] [290/312] eta: 0:00:12 lr: 0.000014 min_lr: 0.000014 loss: 3.7437 (3.5530) weight_decay: 0.0500 (0.0500) time: 0.6427 data: 0.1492 max mem: 21002 Epoch: [289] [300/312] eta: 0:00:06 lr: 0.000014 min_lr: 0.000014 loss: 3.5571 (3.5506) weight_decay: 0.0500 (0.0500) time: 0.3926 data: 0.0447 max mem: 21002 Epoch: [289] [310/312] eta: 0:00:01 lr: 0.000014 min_lr: 0.000014 loss: 3.4506 (3.5499) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [289] [311/312] eta: 0:00:00 lr: 0.000014 min_lr: 0.000014 loss: 3.4506 (3.5522) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [289] Total time: 0:02:51 (0.5491 s / it) Averaged stats: lr: 0.000014 min_lr: 0.000014 loss: 3.4506 (3.5817) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:11 loss: 1.0493 (1.0493) acc1: 81.3802 (81.3802) acc5: 96.0938 (96.0938) time: 7.9220 data: 7.8037 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3745 (1.2923) acc1: 73.5677 (74.0960) acc5: 91.7969 (92.2720) time: 1.0288 data: 0.9318 max mem: 21002 Test: Total time: 0:00:09 (1.0412 s / it) * Acc@1 73.980 Acc@5 92.208 loss 1.296 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 73.99% Epoch: [290] [ 0/312] eta: 1:04:33 lr: 0.000014 min_lr: 0.000014 loss: 2.4894 (2.4894) weight_decay: 0.0500 (0.0500) time: 12.4137 data: 11.9590 max mem: 21002 Epoch: [290] [ 10/312] eta: 0:08:09 lr: 0.000013 min_lr: 0.000013 loss: 3.7838 (3.5209) weight_decay: 0.0500 (0.0500) time: 1.6213 data: 1.0881 max mem: 21002 Epoch: [290] [ 20/312] eta: 0:05:09 lr: 0.000013 min_lr: 0.000013 loss: 3.7838 (3.5952) weight_decay: 0.0500 (0.0500) time: 0.4918 data: 0.0185 max mem: 21002 Epoch: [290] [ 30/312] eta: 0:03:49 lr: 0.000013 min_lr: 0.000013 loss: 3.5239 (3.6099) weight_decay: 0.0500 (0.0500) time: 0.3674 data: 0.0183 max mem: 21002 Epoch: [290] [ 40/312] eta: 0:03:17 lr: 0.000013 min_lr: 0.000013 loss: 3.6956 (3.6243) weight_decay: 0.0500 (0.0500) time: 0.3743 data: 0.0569 max mem: 21002 Epoch: [290] [ 50/312] eta: 0:03:05 lr: 0.000013 min_lr: 0.000013 loss: 3.6932 (3.5951) weight_decay: 0.0500 (0.0500) time: 0.5462 data: 0.1549 max mem: 21002 Epoch: [290] [ 60/312] eta: 0:02:41 lr: 0.000013 min_lr: 0.000013 loss: 3.6874 (3.6230) weight_decay: 0.0500 (0.0500) time: 0.4621 data: 0.0987 max mem: 21002 Epoch: [290] [ 70/312] eta: 0:02:35 lr: 0.000013 min_lr: 0.000013 loss: 3.9144 (3.6678) weight_decay: 0.0500 (0.0500) time: 0.4785 data: 0.1328 max mem: 21002 Epoch: [290] [ 80/312] eta: 0:02:27 lr: 0.000013 min_lr: 0.000013 loss: 3.7376 (3.6387) weight_decay: 0.0500 (0.0500) time: 0.6340 data: 0.2504 max mem: 21002 Epoch: [290] [ 90/312] eta: 0:02:14 lr: 0.000013 min_lr: 0.000013 loss: 3.8655 (3.6732) weight_decay: 0.0500 (0.0500) time: 0.4716 data: 0.1378 max mem: 21002 Epoch: [290] [100/312] eta: 0:02:09 lr: 0.000013 min_lr: 0.000013 loss: 3.8655 (3.6809) weight_decay: 0.0500 (0.0500) time: 0.5011 data: 0.1553 max mem: 21002 Epoch: [290] [110/312] eta: 0:01:57 lr: 0.000013 min_lr: 0.000013 loss: 3.7939 (3.6684) weight_decay: 0.0500 (0.0500) time: 0.4726 data: 0.1361 max mem: 21002 Epoch: [290] [120/312] eta: 0:01:53 lr: 0.000013 min_lr: 0.000013 loss: 3.5605 (3.6439) weight_decay: 0.0500 (0.0500) time: 0.4880 data: 0.1581 max mem: 21002 Epoch: [290] [130/312] eta: 0:01:48 lr: 0.000013 min_lr: 0.000013 loss: 3.5605 (3.6302) weight_decay: 0.0500 (0.0500) time: 0.6744 data: 0.2973 max mem: 21002 Epoch: [290] [140/312] eta: 0:01:38 lr: 0.000012 min_lr: 0.000012 loss: 3.6545 (3.6209) weight_decay: 0.0500 (0.0500) time: 0.4743 data: 0.1403 max mem: 21002 Epoch: [290] [150/312] eta: 0:01:33 lr: 0.000012 min_lr: 0.000012 loss: 3.6054 (3.6199) weight_decay: 0.0500 (0.0500) time: 0.4701 data: 0.1388 max mem: 21002 Epoch: [290] [160/312] eta: 0:01:28 lr: 0.000012 min_lr: 0.000012 loss: 3.5923 (3.6138) weight_decay: 0.0500 (0.0500) time: 0.6413 data: 0.2623 max mem: 21002 Epoch: [290] [170/312] eta: 0:01:20 lr: 0.000012 min_lr: 0.000012 loss: 3.6200 (3.6035) weight_decay: 0.0500 (0.0500) time: 0.4926 data: 0.1588 max mem: 21002 Epoch: [290] [180/312] eta: 0:01:16 lr: 0.000012 min_lr: 0.000012 loss: 3.8070 (3.6223) weight_decay: 0.0500 (0.0500) time: 0.5651 data: 0.1766 max mem: 21002 Epoch: [290] [190/312] eta: 0:01:08 lr: 0.000012 min_lr: 0.000012 loss: 3.8695 (3.6143) weight_decay: 0.0500 (0.0500) time: 0.5314 data: 0.1423 max mem: 21002 Epoch: [290] [200/312] eta: 0:01:03 lr: 0.000012 min_lr: 0.000012 loss: 3.3702 (3.6007) weight_decay: 0.0500 (0.0500) time: 0.4704 data: 0.1053 max mem: 21002 Epoch: [290] [210/312] eta: 0:00:58 lr: 0.000012 min_lr: 0.000012 loss: 3.4775 (3.5974) weight_decay: 0.0500 (0.0500) time: 0.6611 data: 0.1988 max mem: 21002 Epoch: [290] [220/312] eta: 0:00:51 lr: 0.000012 min_lr: 0.000012 loss: 3.4775 (3.5863) weight_decay: 0.0500 (0.0500) time: 0.4774 data: 0.0945 max mem: 21002 Epoch: [290] [230/312] eta: 0:00:46 lr: 0.000012 min_lr: 0.000012 loss: 3.4717 (3.5827) weight_decay: 0.0500 (0.0500) time: 0.5014 data: 0.1049 max mem: 21002 Epoch: [290] [240/312] eta: 0:00:40 lr: 0.000012 min_lr: 0.000012 loss: 3.6415 (3.5776) weight_decay: 0.0500 (0.0500) time: 0.6560 data: 0.1682 max mem: 21002 Epoch: [290] [250/312] eta: 0:00:34 lr: 0.000012 min_lr: 0.000012 loss: 3.6626 (3.5866) weight_decay: 0.0500 (0.0500) time: 0.4969 data: 0.1085 max mem: 21002 Epoch: [290] [260/312] eta: 0:00:29 lr: 0.000012 min_lr: 0.000012 loss: 3.7569 (3.5834) weight_decay: 0.0500 (0.0500) time: 0.5605 data: 0.1448 max mem: 21002 Epoch: [290] [270/312] eta: 0:00:23 lr: 0.000011 min_lr: 0.000011 loss: 3.8322 (3.5919) weight_decay: 0.0500 (0.0500) time: 0.5052 data: 0.1005 max mem: 21002 Epoch: [290] [280/312] eta: 0:00:18 lr: 0.000011 min_lr: 0.000011 loss: 3.7911 (3.5923) weight_decay: 0.0500 (0.0500) time: 0.5259 data: 0.0747 max mem: 21002 Epoch: [290] [290/312] eta: 0:00:12 lr: 0.000011 min_lr: 0.000011 loss: 3.8712 (3.6065) weight_decay: 0.0500 (0.0500) time: 0.6368 data: 0.0860 max mem: 21002 Epoch: [290] [300/312] eta: 0:00:06 lr: 0.000011 min_lr: 0.000011 loss: 3.8712 (3.6131) weight_decay: 0.0500 (0.0500) time: 0.3940 data: 0.0116 max mem: 21002 Epoch: [290] [310/312] eta: 0:00:01 lr: 0.000011 min_lr: 0.000011 loss: 3.5296 (3.5998) weight_decay: 0.0500 (0.0500) time: 0.2834 data: 0.0001 max mem: 21002 Epoch: [290] [311/312] eta: 0:00:00 lr: 0.000011 min_lr: 0.000011 loss: 3.5429 (3.6008) weight_decay: 0.0500 (0.0500) time: 0.2832 data: 0.0001 max mem: 21002 Epoch: [290] Total time: 0:02:51 (0.5482 s / it) Averaged stats: lr: 0.000011 min_lr: 0.000011 loss: 3.5429 (3.5650) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:13 loss: 1.0031 (1.0031) acc1: 81.6406 (81.6406) acc5: 96.0938 (96.0938) time: 8.1188 data: 8.0001 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3290 (1.2501) acc1: 73.1771 (74.1760) acc5: 92.1875 (92.2880) time: 1.0023 data: 0.9053 max mem: 21002 Test: Total time: 0:00:09 (1.0135 s / it) * Acc@1 74.030 Acc@5 92.196 loss 1.255 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 74.03% Epoch: [291] [ 0/312] eta: 1:04:32 lr: 0.000011 min_lr: 0.000011 loss: 3.0437 (3.0437) weight_decay: 0.0500 (0.0500) time: 12.4116 data: 12.1098 max mem: 21002 Epoch: [291] [ 10/312] eta: 0:08:08 lr: 0.000011 min_lr: 0.000011 loss: 3.7049 (3.6954) weight_decay: 0.0500 (0.0500) time: 1.6165 data: 1.1016 max mem: 21002 Epoch: [291] [ 20/312] eta: 0:05:03 lr: 0.000011 min_lr: 0.000011 loss: 3.7936 (3.7102) weight_decay: 0.0500 (0.0500) time: 0.4698 data: 0.0607 max mem: 21002 Epoch: [291] [ 30/312] eta: 0:03:44 lr: 0.000011 min_lr: 0.000011 loss: 3.7260 (3.6481) weight_decay: 0.0500 (0.0500) time: 0.3473 data: 0.0608 max mem: 21002 Epoch: [291] [ 40/312] eta: 0:03:14 lr: 0.000011 min_lr: 0.000011 loss: 3.7703 (3.7093) weight_decay: 0.0500 (0.0500) time: 0.3757 data: 0.0198 max mem: 21002 Epoch: [291] [ 50/312] eta: 0:03:05 lr: 0.000011 min_lr: 0.000011 loss: 3.8269 (3.6862) weight_decay: 0.0500 (0.0500) time: 0.5645 data: 0.1129 max mem: 21002 Epoch: [291] [ 60/312] eta: 0:02:40 lr: 0.000011 min_lr: 0.000011 loss: 3.6927 (3.6605) weight_decay: 0.0500 (0.0500) time: 0.4816 data: 0.0986 max mem: 21002 Epoch: [291] [ 70/312] eta: 0:02:37 lr: 0.000011 min_lr: 0.000011 loss: 3.3646 (3.5777) weight_decay: 0.0500 (0.0500) time: 0.5158 data: 0.1047 max mem: 21002 Epoch: [291] [ 80/312] eta: 0:02:27 lr: 0.000011 min_lr: 0.000011 loss: 3.2948 (3.5515) weight_decay: 0.0500 (0.0500) time: 0.6349 data: 0.1445 max mem: 21002 Epoch: [291] [ 90/312] eta: 0:02:17 lr: 0.000011 min_lr: 0.000011 loss: 3.6828 (3.5700) weight_decay: 0.0500 (0.0500) time: 0.4961 data: 0.0793 max mem: 21002 Epoch: [291] [100/312] eta: 0:02:12 lr: 0.000010 min_lr: 0.000010 loss: 3.6618 (3.5718) weight_decay: 0.0500 (0.0500) time: 0.5623 data: 0.0752 max mem: 21002 Epoch: [291] [110/312] eta: 0:02:00 lr: 0.000010 min_lr: 0.000010 loss: 3.5835 (3.5681) weight_decay: 0.0500 (0.0500) time: 0.4905 data: 0.0565 max mem: 21002 Epoch: [291] [120/312] eta: 0:01:54 lr: 0.000010 min_lr: 0.000010 loss: 3.8134 (3.5716) weight_decay: 0.0500 (0.0500) time: 0.4648 data: 0.0696 max mem: 21002 Epoch: [291] [130/312] eta: 0:01:49 lr: 0.000010 min_lr: 0.000010 loss: 3.6050 (3.5674) weight_decay: 0.0500 (0.0500) time: 0.6270 data: 0.1042 max mem: 21002 Epoch: [291] [140/312] eta: 0:01:40 lr: 0.000010 min_lr: 0.000010 loss: 3.5592 (3.5538) weight_decay: 0.0500 (0.0500) time: 0.5014 data: 0.0887 max mem: 21002 Epoch: [291] [150/312] eta: 0:01:35 lr: 0.000010 min_lr: 0.000010 loss: 3.3382 (3.5426) weight_decay: 0.0500 (0.0500) time: 0.5193 data: 0.1121 max mem: 21002 Epoch: [291] [160/312] eta: 0:01:28 lr: 0.000010 min_lr: 0.000010 loss: 3.4698 (3.5489) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.1060 max mem: 21002 Epoch: [291] [170/312] eta: 0:01:21 lr: 0.000010 min_lr: 0.000010 loss: 3.8134 (3.5703) weight_decay: 0.0500 (0.0500) time: 0.4804 data: 0.0632 max mem: 21002 Epoch: [291] [180/312] eta: 0:01:16 lr: 0.000010 min_lr: 0.000010 loss: 3.8134 (3.5675) weight_decay: 0.0500 (0.0500) time: 0.5434 data: 0.0755 max mem: 21002 Epoch: [291] [190/312] eta: 0:01:09 lr: 0.000010 min_lr: 0.000010 loss: 3.5434 (3.5695) weight_decay: 0.0500 (0.0500) time: 0.4796 data: 0.0742 max mem: 21002 Epoch: [291] [200/312] eta: 0:01:04 lr: 0.000010 min_lr: 0.000010 loss: 3.5434 (3.5734) weight_decay: 0.0500 (0.0500) time: 0.5599 data: 0.1102 max mem: 21002 Epoch: [291] [210/312] eta: 0:00:58 lr: 0.000010 min_lr: 0.000010 loss: 3.5431 (3.5743) weight_decay: 0.0500 (0.0500) time: 0.6407 data: 0.0810 max mem: 21002 Epoch: [291] [220/312] eta: 0:00:52 lr: 0.000010 min_lr: 0.000010 loss: 3.6033 (3.5795) weight_decay: 0.0500 (0.0500) time: 0.4612 data: 0.0172 max mem: 21002 Epoch: [291] [230/312] eta: 0:00:47 lr: 0.000010 min_lr: 0.000010 loss: 3.7675 (3.5847) weight_decay: 0.0500 (0.0500) time: 0.6054 data: 0.0596 max mem: 21002 Epoch: [291] [240/312] eta: 0:00:40 lr: 0.000010 min_lr: 0.000010 loss: 3.6715 (3.5819) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0439 max mem: 21002 Epoch: [291] [250/312] eta: 0:00:35 lr: 0.000009 min_lr: 0.000009 loss: 3.7560 (3.5925) weight_decay: 0.0500 (0.0500) time: 0.4654 data: 0.0288 max mem: 21002 Epoch: [291] [260/312] eta: 0:00:29 lr: 0.000009 min_lr: 0.000009 loss: 3.8558 (3.5991) weight_decay: 0.0500 (0.0500) time: 0.5993 data: 0.0285 max mem: 21002 Epoch: [291] [270/312] eta: 0:00:23 lr: 0.000009 min_lr: 0.000009 loss: 3.7054 (3.5980) weight_decay: 0.0500 (0.0500) time: 0.4915 data: 0.0184 max mem: 21002 Epoch: [291] [280/312] eta: 0:00:18 lr: 0.000009 min_lr: 0.000009 loss: 3.8465 (3.6044) weight_decay: 0.0500 (0.0500) time: 0.5248 data: 0.0525 max mem: 21002 Epoch: [291] [290/312] eta: 0:00:12 lr: 0.000009 min_lr: 0.000009 loss: 3.7465 (3.5993) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0351 max mem: 21002 Epoch: [291] [300/312] eta: 0:00:06 lr: 0.000009 min_lr: 0.000009 loss: 3.2324 (3.5865) weight_decay: 0.0500 (0.0500) time: 0.3889 data: 0.0136 max mem: 21002 Epoch: [291] [310/312] eta: 0:00:01 lr: 0.000009 min_lr: 0.000009 loss: 3.5151 (3.5916) weight_decay: 0.0500 (0.0500) time: 0.2975 data: 0.0135 max mem: 21002 Epoch: [291] [311/312] eta: 0:00:00 lr: 0.000009 min_lr: 0.000009 loss: 3.5595 (3.5915) weight_decay: 0.0500 (0.0500) time: 0.2901 data: 0.0135 max mem: 21002 Epoch: [291] Total time: 0:02:51 (0.5498 s / it) Averaged stats: lr: 0.000009 min_lr: 0.000009 loss: 3.5595 (3.5700) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:16 loss: 1.0368 (1.0368) acc1: 81.5104 (81.5104) acc5: 96.0938 (96.0938) time: 8.5515 data: 8.4328 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3574 (1.2770) acc1: 73.1771 (74.0800) acc5: 91.1458 (91.9840) time: 1.0359 data: 0.9371 max mem: 21002 Test: Total time: 0:00:09 (1.0678 s / it) * Acc@1 73.992 Acc@5 92.184 loss 1.282 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 74.03% Epoch: [292] [ 0/312] eta: 1:04:08 lr: 0.000009 min_lr: 0.000009 loss: 3.9522 (3.9522) weight_decay: 0.0500 (0.0500) time: 12.3363 data: 10.0083 max mem: 21002 Epoch: [292] [ 10/312] eta: 0:08:46 lr: 0.000009 min_lr: 0.000009 loss: 3.7970 (3.6075) weight_decay: 0.0500 (0.0500) time: 1.7445 data: 1.0353 max mem: 21002 Epoch: [292] [ 20/312] eta: 0:05:14 lr: 0.000009 min_lr: 0.000009 loss: 3.5264 (3.5495) weight_decay: 0.0500 (0.0500) time: 0.5139 data: 0.0978 max mem: 21002 Epoch: [292] [ 30/312] eta: 0:03:52 lr: 0.000009 min_lr: 0.000009 loss: 3.1778 (3.4057) weight_decay: 0.0500 (0.0500) time: 0.3175 data: 0.0296 max mem: 21002 Epoch: [292] [ 40/312] eta: 0:03:09 lr: 0.000009 min_lr: 0.000009 loss: 3.3915 (3.4219) weight_decay: 0.0500 (0.0500) time: 0.2959 data: 0.0022 max mem: 21002 Epoch: [292] [ 50/312] eta: 0:02:56 lr: 0.000009 min_lr: 0.000009 loss: 3.5536 (3.4235) weight_decay: 0.0500 (0.0500) time: 0.4376 data: 0.0712 max mem: 21002 Epoch: [292] [ 60/312] eta: 0:02:38 lr: 0.000009 min_lr: 0.000009 loss: 3.5522 (3.4391) weight_decay: 0.0500 (0.0500) time: 0.4962 data: 0.0846 max mem: 21002 Epoch: [292] [ 70/312] eta: 0:02:29 lr: 0.000009 min_lr: 0.000009 loss: 3.5522 (3.4465) weight_decay: 0.0500 (0.0500) time: 0.4819 data: 0.1160 max mem: 21002 Epoch: [292] [ 80/312] eta: 0:02:24 lr: 0.000009 min_lr: 0.000009 loss: 3.5571 (3.4660) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.1946 max mem: 21002 Epoch: [292] [ 90/312] eta: 0:02:14 lr: 0.000008 min_lr: 0.000008 loss: 3.7906 (3.5195) weight_decay: 0.0500 (0.0500) time: 0.5702 data: 0.1407 max mem: 21002 Epoch: [292] [100/312] eta: 0:02:07 lr: 0.000008 min_lr: 0.000008 loss: 3.8684 (3.5545) weight_decay: 0.0500 (0.0500) time: 0.5179 data: 0.1225 max mem: 21002 Epoch: [292] [110/312] eta: 0:01:58 lr: 0.000008 min_lr: 0.000008 loss: 3.7088 (3.5365) weight_decay: 0.0500 (0.0500) time: 0.4907 data: 0.1175 max mem: 21002 Epoch: [292] [120/312] eta: 0:01:53 lr: 0.000008 min_lr: 0.000008 loss: 3.7554 (3.5636) weight_decay: 0.0500 (0.0500) time: 0.5286 data: 0.1358 max mem: 21002 Epoch: [292] [130/312] eta: 0:01:48 lr: 0.000008 min_lr: 0.000008 loss: 3.8282 (3.5753) weight_decay: 0.0500 (0.0500) time: 0.6467 data: 0.1662 max mem: 21002 Epoch: [292] [140/312] eta: 0:01:39 lr: 0.000008 min_lr: 0.000008 loss: 3.8304 (3.5831) weight_decay: 0.0500 (0.0500) time: 0.5370 data: 0.1139 max mem: 21002 Epoch: [292] [150/312] eta: 0:01:34 lr: 0.000008 min_lr: 0.000008 loss: 3.7549 (3.5901) weight_decay: 0.0500 (0.0500) time: 0.4965 data: 0.1212 max mem: 21002 Epoch: [292] [160/312] eta: 0:01:28 lr: 0.000008 min_lr: 0.000008 loss: 3.7204 (3.5895) weight_decay: 0.0500 (0.0500) time: 0.5791 data: 0.1735 max mem: 21002 Epoch: [292] [170/312] eta: 0:01:22 lr: 0.000008 min_lr: 0.000008 loss: 3.7204 (3.5854) weight_decay: 0.0500 (0.0500) time: 0.5665 data: 0.0978 max mem: 21002 Epoch: [292] [180/312] eta: 0:01:17 lr: 0.000008 min_lr: 0.000008 loss: 3.6886 (3.5923) weight_decay: 0.0500 (0.0500) time: 0.6331 data: 0.0588 max mem: 21002 Epoch: [292] [190/312] eta: 0:01:10 lr: 0.000008 min_lr: 0.000008 loss: 3.7313 (3.5955) weight_decay: 0.0500 (0.0500) time: 0.5437 data: 0.0596 max mem: 21002 Epoch: [292] [200/312] eta: 0:01:04 lr: 0.000008 min_lr: 0.000008 loss: 3.7313 (3.5961) weight_decay: 0.0500 (0.0500) time: 0.5101 data: 0.0558 max mem: 21002 Epoch: [292] [210/312] eta: 0:00:59 lr: 0.000008 min_lr: 0.000008 loss: 3.7852 (3.6025) weight_decay: 0.0500 (0.0500) time: 0.6259 data: 0.1160 max mem: 21002 Epoch: [292] [220/312] eta: 0:00:52 lr: 0.000008 min_lr: 0.000008 loss: 3.7731 (3.6006) weight_decay: 0.0500 (0.0500) time: 0.5005 data: 0.0657 max mem: 21002 Epoch: [292] [230/312] eta: 0:00:47 lr: 0.000008 min_lr: 0.000008 loss: 3.8801 (3.6138) weight_decay: 0.0500 (0.0500) time: 0.4878 data: 0.0443 max mem: 21002 Epoch: [292] [240/312] eta: 0:00:41 lr: 0.000008 min_lr: 0.000008 loss: 3.8801 (3.6122) weight_decay: 0.0500 (0.0500) time: 0.5477 data: 0.1143 max mem: 21002 Epoch: [292] [250/312] eta: 0:00:35 lr: 0.000008 min_lr: 0.000008 loss: 3.6914 (3.6117) weight_decay: 0.0500 (0.0500) time: 0.5109 data: 0.0709 max mem: 21002 Epoch: [292] [260/312] eta: 0:00:29 lr: 0.000007 min_lr: 0.000007 loss: 3.5633 (3.5921) weight_decay: 0.0500 (0.0500) time: 0.6072 data: 0.0934 max mem: 21002 Epoch: [292] [270/312] eta: 0:00:23 lr: 0.000007 min_lr: 0.000007 loss: 3.2941 (3.5861) weight_decay: 0.0500 (0.0500) time: 0.4942 data: 0.0935 max mem: 21002 Epoch: [292] [280/312] eta: 0:00:18 lr: 0.000007 min_lr: 0.000007 loss: 3.4724 (3.5879) weight_decay: 0.0500 (0.0500) time: 0.4987 data: 0.1067 max mem: 21002 Epoch: [292] [290/312] eta: 0:00:12 lr: 0.000007 min_lr: 0.000007 loss: 3.6236 (3.5880) weight_decay: 0.0500 (0.0500) time: 0.6649 data: 0.2134 max mem: 21002 Epoch: [292] [300/312] eta: 0:00:06 lr: 0.000007 min_lr: 0.000007 loss: 3.6058 (3.5800) weight_decay: 0.0500 (0.0500) time: 0.4593 data: 0.1072 max mem: 21002 Epoch: [292] [310/312] eta: 0:00:01 lr: 0.000007 min_lr: 0.000007 loss: 3.4460 (3.5773) weight_decay: 0.0500 (0.0500) time: 0.2776 data: 0.0001 max mem: 21002 Epoch: [292] [311/312] eta: 0:00:00 lr: 0.000007 min_lr: 0.000007 loss: 3.5086 (3.5771) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [292] Total time: 0:02:52 (0.5544 s / it) Averaged stats: lr: 0.000007 min_lr: 0.000007 loss: 3.5086 (3.5647) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:20 loss: 1.0198 (1.0198) acc1: 81.7708 (81.7708) acc5: 96.0938 (96.0938) time: 8.9395 data: 8.8229 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3480 (1.2651) acc1: 73.5677 (74.3040) acc5: 91.7969 (92.1440) time: 1.0790 data: 0.9804 max mem: 21002 Test: Total time: 0:00:09 (1.0944 s / it) * Acc@1 73.972 Acc@5 92.162 loss 1.270 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 74.03% Epoch: [293] [ 0/312] eta: 0:57:48 lr: 0.000007 min_lr: 0.000007 loss: 3.8553 (3.8553) weight_decay: 0.0500 (0.0500) time: 11.1161 data: 10.6059 max mem: 21002 Epoch: [293] [ 10/312] eta: 0:07:55 lr: 0.000007 min_lr: 0.000007 loss: 3.8553 (3.5658) weight_decay: 0.0500 (0.0500) time: 1.5751 data: 0.9648 max mem: 21002 Epoch: [293] [ 20/312] eta: 0:04:58 lr: 0.000007 min_lr: 0.000007 loss: 3.7460 (3.5477) weight_decay: 0.0500 (0.0500) time: 0.5170 data: 0.0574 max mem: 21002 Epoch: [293] [ 30/312] eta: 0:03:41 lr: 0.000007 min_lr: 0.000007 loss: 3.3289 (3.4072) weight_decay: 0.0500 (0.0500) time: 0.3515 data: 0.0574 max mem: 21002 Epoch: [293] [ 40/312] eta: 0:03:20 lr: 0.000007 min_lr: 0.000007 loss: 3.3289 (3.4341) weight_decay: 0.0500 (0.0500) time: 0.4346 data: 0.0966 max mem: 21002 Epoch: [293] [ 50/312] eta: 0:03:06 lr: 0.000007 min_lr: 0.000007 loss: 3.7841 (3.5245) weight_decay: 0.0500 (0.0500) time: 0.5964 data: 0.1990 max mem: 21002 Epoch: [293] [ 60/312] eta: 0:02:41 lr: 0.000007 min_lr: 0.000007 loss: 3.5754 (3.4861) weight_decay: 0.0500 (0.0500) time: 0.4517 data: 0.1031 max mem: 21002 Epoch: [293] [ 70/312] eta: 0:02:39 lr: 0.000007 min_lr: 0.000007 loss: 3.4986 (3.4741) weight_decay: 0.0500 (0.0500) time: 0.5224 data: 0.1456 max mem: 21002 Epoch: [293] [ 80/312] eta: 0:02:26 lr: 0.000007 min_lr: 0.000007 loss: 3.5422 (3.4866) weight_decay: 0.0500 (0.0500) time: 0.6054 data: 0.1813 max mem: 21002 Epoch: [293] [ 90/312] eta: 0:02:18 lr: 0.000007 min_lr: 0.000007 loss: 3.3186 (3.4656) weight_decay: 0.0500 (0.0500) time: 0.5032 data: 0.1173 max mem: 21002 Epoch: [293] [100/312] eta: 0:02:12 lr: 0.000007 min_lr: 0.000007 loss: 3.3497 (3.4623) weight_decay: 0.0500 (0.0500) time: 0.5939 data: 0.1775 max mem: 21002 Epoch: [293] [110/312] eta: 0:02:00 lr: 0.000007 min_lr: 0.000007 loss: 3.8794 (3.5086) weight_decay: 0.0500 (0.0500) time: 0.4634 data: 0.0966 max mem: 21002 Epoch: [293] [120/312] eta: 0:01:55 lr: 0.000007 min_lr: 0.000007 loss: 3.8425 (3.5148) weight_decay: 0.0500 (0.0500) time: 0.4908 data: 0.1129 max mem: 21002 Epoch: [293] [130/312] eta: 0:01:50 lr: 0.000006 min_lr: 0.000006 loss: 3.7169 (3.5204) weight_decay: 0.0500 (0.0500) time: 0.6785 data: 0.1748 max mem: 21002 Epoch: [293] [140/312] eta: 0:01:41 lr: 0.000006 min_lr: 0.000006 loss: 3.7850 (3.5265) weight_decay: 0.0500 (0.0500) time: 0.4919 data: 0.0766 max mem: 21002 Epoch: [293] [150/312] eta: 0:01:36 lr: 0.000006 min_lr: 0.000006 loss: 3.8102 (3.5350) weight_decay: 0.0500 (0.0500) time: 0.5094 data: 0.1175 max mem: 21002 Epoch: [293] [160/312] eta: 0:01:28 lr: 0.000006 min_lr: 0.000006 loss: 3.8146 (3.5474) weight_decay: 0.0500 (0.0500) time: 0.5568 data: 0.1033 max mem: 21002 Epoch: [293] [170/312] eta: 0:01:22 lr: 0.000006 min_lr: 0.000006 loss: 3.8657 (3.5664) weight_decay: 0.0500 (0.0500) time: 0.5049 data: 0.0860 max mem: 21002 Epoch: [293] [180/312] eta: 0:01:16 lr: 0.000006 min_lr: 0.000006 loss: 3.9007 (3.5770) weight_decay: 0.0500 (0.0500) time: 0.5747 data: 0.0903 max mem: 21002 Epoch: [293] [190/312] eta: 0:01:10 lr: 0.000006 min_lr: 0.000006 loss: 3.7274 (3.5740) weight_decay: 0.0500 (0.0500) time: 0.4850 data: 0.0681 max mem: 21002 Epoch: [293] [200/312] eta: 0:01:05 lr: 0.000006 min_lr: 0.000006 loss: 3.6260 (3.5811) weight_decay: 0.0500 (0.0500) time: 0.5630 data: 0.1200 max mem: 21002 Epoch: [293] [210/312] eta: 0:00:59 lr: 0.000006 min_lr: 0.000006 loss: 3.6260 (3.5677) weight_decay: 0.0500 (0.0500) time: 0.6462 data: 0.0569 max mem: 21002 Epoch: [293] [220/312] eta: 0:00:52 lr: 0.000006 min_lr: 0.000006 loss: 3.3751 (3.5565) weight_decay: 0.0500 (0.0500) time: 0.4768 data: 0.0417 max mem: 21002 Epoch: [293] [230/312] eta: 0:00:47 lr: 0.000006 min_lr: 0.000006 loss: 3.4415 (3.5545) weight_decay: 0.0500 (0.0500) time: 0.5649 data: 0.0950 max mem: 21002 Epoch: [293] [240/312] eta: 0:00:41 lr: 0.000006 min_lr: 0.000006 loss: 3.6661 (3.5609) weight_decay: 0.0500 (0.0500) time: 0.5517 data: 0.0541 max mem: 21002 Epoch: [293] [250/312] eta: 0:00:35 lr: 0.000006 min_lr: 0.000006 loss: 3.5969 (3.5507) weight_decay: 0.0500 (0.0500) time: 0.4733 data: 0.0198 max mem: 21002 Epoch: [293] [260/312] eta: 0:00:29 lr: 0.000006 min_lr: 0.000006 loss: 2.9634 (3.5319) weight_decay: 0.0500 (0.0500) time: 0.6309 data: 0.0198 max mem: 21002 Epoch: [293] [270/312] eta: 0:00:23 lr: 0.000006 min_lr: 0.000006 loss: 3.2786 (3.5382) weight_decay: 0.0500 (0.0500) time: 0.5278 data: 0.0540 max mem: 21002 Epoch: [293] [280/312] eta: 0:00:18 lr: 0.000006 min_lr: 0.000006 loss: 3.5796 (3.5407) weight_decay: 0.0500 (0.0500) time: 0.5595 data: 0.1146 max mem: 21002 Epoch: [293] [290/312] eta: 0:00:12 lr: 0.000006 min_lr: 0.000006 loss: 3.5796 (3.5345) weight_decay: 0.0500 (0.0500) time: 0.5675 data: 0.0612 max mem: 21002 Epoch: [293] [300/312] eta: 0:00:06 lr: 0.000006 min_lr: 0.000006 loss: 3.6009 (3.5317) weight_decay: 0.0500 (0.0500) time: 0.3484 data: 0.0036 max mem: 21002 Epoch: [293] [310/312] eta: 0:00:01 lr: 0.000006 min_lr: 0.000006 loss: 3.7019 (3.5363) weight_decay: 0.0500 (0.0500) time: 0.2801 data: 0.0035 max mem: 21002 Epoch: [293] [311/312] eta: 0:00:00 lr: 0.000006 min_lr: 0.000006 loss: 3.5543 (3.5360) weight_decay: 0.0500 (0.0500) time: 0.2800 data: 0.0035 max mem: 21002 Epoch: [293] Total time: 0:02:52 (0.5525 s / it) Averaged stats: lr: 0.000006 min_lr: 0.000006 loss: 3.5543 (3.5548) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:19 loss: 1.0334 (1.0334) acc1: 81.3802 (81.3802) acc5: 96.0938 (96.0938) time: 8.8317 data: 8.7130 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3635 (1.2798) acc1: 73.3073 (74.1440) acc5: 92.0573 (92.1280) time: 1.0680 data: 0.9682 max mem: 21002 Test: Total time: 0:00:09 (1.0896 s / it) * Acc@1 73.964 Acc@5 92.174 loss 1.284 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 74.03% Epoch: [294] [ 0/312] eta: 1:03:06 lr: 0.000006 min_lr: 0.000006 loss: 2.9238 (2.9238) weight_decay: 0.0500 (0.0500) time: 12.1369 data: 11.8521 max mem: 21002 Epoch: [294] [ 10/312] eta: 0:07:44 lr: 0.000005 min_lr: 0.000005 loss: 3.6570 (3.4880) weight_decay: 0.0500 (0.0500) time: 1.5366 data: 1.0780 max mem: 21002 Epoch: [294] [ 20/312] eta: 0:05:01 lr: 0.000005 min_lr: 0.000005 loss: 3.5594 (3.4810) weight_decay: 0.0500 (0.0500) time: 0.4782 data: 0.0738 max mem: 21002 Epoch: [294] [ 30/312] eta: 0:03:43 lr: 0.000005 min_lr: 0.000005 loss: 3.5661 (3.5365) weight_decay: 0.0500 (0.0500) time: 0.3846 data: 0.0739 max mem: 21002 Epoch: [294] [ 40/312] eta: 0:03:25 lr: 0.000005 min_lr: 0.000005 loss: 3.7246 (3.5439) weight_decay: 0.0500 (0.0500) time: 0.4636 data: 0.1300 max mem: 21002 Epoch: [294] [ 50/312] eta: 0:03:11 lr: 0.000005 min_lr: 0.000005 loss: 3.6617 (3.5340) weight_decay: 0.0500 (0.0500) time: 0.6306 data: 0.2730 max mem: 21002 Epoch: [294] [ 60/312] eta: 0:02:48 lr: 0.000005 min_lr: 0.000005 loss: 3.6039 (3.5538) weight_decay: 0.0500 (0.0500) time: 0.4927 data: 0.1439 max mem: 21002 Epoch: [294] [ 70/312] eta: 0:02:41 lr: 0.000005 min_lr: 0.000005 loss: 3.6039 (3.5695) weight_decay: 0.0500 (0.0500) time: 0.5038 data: 0.1251 max mem: 21002 Epoch: [294] [ 80/312] eta: 0:02:34 lr: 0.000005 min_lr: 0.000005 loss: 3.3416 (3.5378) weight_decay: 0.0500 (0.0500) time: 0.6488 data: 0.2573 max mem: 21002 Epoch: [294] [ 90/312] eta: 0:02:18 lr: 0.000005 min_lr: 0.000005 loss: 3.5363 (3.5622) weight_decay: 0.0500 (0.0500) time: 0.4817 data: 0.1330 max mem: 21002 Epoch: [294] [100/312] eta: 0:02:13 lr: 0.000005 min_lr: 0.000005 loss: 3.6251 (3.5353) weight_decay: 0.0500 (0.0500) time: 0.4841 data: 0.1140 max mem: 21002 Epoch: [294] [110/312] eta: 0:02:00 lr: 0.000005 min_lr: 0.000005 loss: 3.5229 (3.5391) weight_decay: 0.0500 (0.0500) time: 0.4700 data: 0.1140 max mem: 21002 Epoch: [294] [120/312] eta: 0:01:56 lr: 0.000005 min_lr: 0.000005 loss: 3.5578 (3.5360) weight_decay: 0.0500 (0.0500) time: 0.4905 data: 0.0481 max mem: 21002 Epoch: [294] [130/312] eta: 0:01:50 lr: 0.000005 min_lr: 0.000005 loss: 3.7449 (3.5547) weight_decay: 0.0500 (0.0500) time: 0.6785 data: 0.1084 max mem: 21002 Epoch: [294] [140/312] eta: 0:01:41 lr: 0.000005 min_lr: 0.000005 loss: 3.6748 (3.5367) weight_decay: 0.0500 (0.0500) time: 0.4760 data: 0.0614 max mem: 21002 Epoch: [294] [150/312] eta: 0:01:36 lr: 0.000005 min_lr: 0.000005 loss: 3.4802 (3.5474) weight_decay: 0.0500 (0.0500) time: 0.5229 data: 0.0593 max mem: 21002 Epoch: [294] [160/312] eta: 0:01:30 lr: 0.000005 min_lr: 0.000005 loss: 3.7432 (3.5447) weight_decay: 0.0500 (0.0500) time: 0.6560 data: 0.0588 max mem: 21002 Epoch: [294] [170/312] eta: 0:01:22 lr: 0.000005 min_lr: 0.000005 loss: 3.3766 (3.5298) weight_decay: 0.0500 (0.0500) time: 0.4479 data: 0.0168 max mem: 21002 Epoch: [294] [180/312] eta: 0:01:17 lr: 0.000005 min_lr: 0.000005 loss: 3.6482 (3.5465) weight_decay: 0.0500 (0.0500) time: 0.5400 data: 0.0618 max mem: 21002 Epoch: [294] [190/312] eta: 0:01:10 lr: 0.000005 min_lr: 0.000005 loss: 3.6226 (3.5237) weight_decay: 0.0500 (0.0500) time: 0.5184 data: 0.0469 max mem: 21002 Epoch: [294] [200/312] eta: 0:01:04 lr: 0.000005 min_lr: 0.000005 loss: 3.2582 (3.5226) weight_decay: 0.0500 (0.0500) time: 0.4564 data: 0.0215 max mem: 21002 Epoch: [294] [210/312] eta: 0:00:59 lr: 0.000005 min_lr: 0.000005 loss: 3.3797 (3.5182) weight_decay: 0.0500 (0.0500) time: 0.6562 data: 0.0322 max mem: 21002 Epoch: [294] [220/312] eta: 0:00:52 lr: 0.000005 min_lr: 0.000005 loss: 3.1518 (3.4980) weight_decay: 0.0500 (0.0500) time: 0.4907 data: 0.0125 max mem: 21002 Epoch: [294] [230/312] eta: 0:00:46 lr: 0.000004 min_lr: 0.000004 loss: 3.5444 (3.5113) weight_decay: 0.0500 (0.0500) time: 0.4735 data: 0.0405 max mem: 21002 Epoch: [294] [240/312] eta: 0:00:41 lr: 0.000004 min_lr: 0.000004 loss: 3.8480 (3.5103) weight_decay: 0.0500 (0.0500) time: 0.6625 data: 0.0863 max mem: 21002 Epoch: [294] [250/312] eta: 0:00:35 lr: 0.000004 min_lr: 0.000004 loss: 3.7232 (3.5087) weight_decay: 0.0500 (0.0500) time: 0.4761 data: 0.0464 max mem: 21002 Epoch: [294] [260/312] eta: 0:00:29 lr: 0.000004 min_lr: 0.000004 loss: 3.4363 (3.5099) weight_decay: 0.0500 (0.0500) time: 0.5094 data: 0.0352 max mem: 21002 Epoch: [294] [270/312] eta: 0:00:23 lr: 0.000004 min_lr: 0.000004 loss: 3.7263 (3.5161) weight_decay: 0.0500 (0.0500) time: 0.5098 data: 0.0352 max mem: 21002 Epoch: [294] [280/312] eta: 0:00:18 lr: 0.000004 min_lr: 0.000004 loss: 3.7263 (3.5219) weight_decay: 0.0500 (0.0500) time: 0.4645 data: 0.0246 max mem: 21002 Epoch: [294] [290/312] eta: 0:00:12 lr: 0.000004 min_lr: 0.000004 loss: 3.5250 (3.5192) weight_decay: 0.0500 (0.0500) time: 0.6211 data: 0.0570 max mem: 21002 Epoch: [294] [300/312] eta: 0:00:06 lr: 0.000004 min_lr: 0.000004 loss: 3.5545 (3.5261) weight_decay: 0.0500 (0.0500) time: 0.4389 data: 0.0327 max mem: 21002 Epoch: [294] [310/312] eta: 0:00:01 lr: 0.000004 min_lr: 0.000004 loss: 3.7591 (3.5291) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [294] [311/312] eta: 0:00:00 lr: 0.000004 min_lr: 0.000004 loss: 3.7591 (3.5298) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [294] Total time: 0:02:51 (0.5494 s / it) Averaged stats: lr: 0.000004 min_lr: 0.000004 loss: 3.7591 (3.5533) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:15 loss: 1.0984 (1.0984) acc1: 81.5104 (81.5104) acc5: 95.8333 (95.8333) time: 8.3842 data: 8.2653 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4139 (1.3307) acc1: 73.6979 (74.0000) acc5: 92.1875 (92.1440) time: 1.0194 data: 0.9185 max mem: 21002 Test: Total time: 0:00:09 (1.0532 s / it) * Acc@1 73.824 Acc@5 92.126 loss 1.337 Accuracy of the model on the 50000 test images: 73.8% Max accuracy: 74.03% Epoch: [295] [ 0/312] eta: 1:00:05 lr: 0.000004 min_lr: 0.000004 loss: 3.8172 (3.8172) weight_decay: 0.0500 (0.0500) time: 11.5551 data: 9.6874 max mem: 21002 Epoch: [295] [ 10/312] eta: 0:07:59 lr: 0.000004 min_lr: 0.000004 loss: 3.8236 (3.6820) weight_decay: 0.0500 (0.0500) time: 1.5884 data: 0.9690 max mem: 21002 Epoch: [295] [ 20/312] eta: 0:05:15 lr: 0.000004 min_lr: 0.000004 loss: 3.7258 (3.4745) weight_decay: 0.0500 (0.0500) time: 0.5552 data: 0.1205 max mem: 21002 Epoch: [295] [ 30/312] eta: 0:03:53 lr: 0.000004 min_lr: 0.000004 loss: 3.3659 (3.4503) weight_decay: 0.0500 (0.0500) time: 0.4076 data: 0.0723 max mem: 21002 Epoch: [295] [ 40/312] eta: 0:03:22 lr: 0.000004 min_lr: 0.000004 loss: 3.7719 (3.5287) weight_decay: 0.0500 (0.0500) time: 0.3945 data: 0.0049 max mem: 21002 Epoch: [295] [ 50/312] eta: 0:03:16 lr: 0.000004 min_lr: 0.000004 loss: 3.8947 (3.5868) weight_decay: 0.0500 (0.0500) time: 0.6331 data: 0.0564 max mem: 21002 Epoch: [295] [ 60/312] eta: 0:02:51 lr: 0.000004 min_lr: 0.000004 loss: 3.8739 (3.5996) weight_decay: 0.0500 (0.0500) time: 0.5448 data: 0.0673 max mem: 21002 Epoch: [295] [ 70/312] eta: 0:02:40 lr: 0.000004 min_lr: 0.000004 loss: 3.6508 (3.5925) weight_decay: 0.0500 (0.0500) time: 0.4468 data: 0.0473 max mem: 21002 Epoch: [295] [ 80/312] eta: 0:02:29 lr: 0.000004 min_lr: 0.000004 loss: 3.5718 (3.5971) weight_decay: 0.0500 (0.0500) time: 0.5320 data: 0.0324 max mem: 21002 Epoch: [295] [ 90/312] eta: 0:02:17 lr: 0.000004 min_lr: 0.000004 loss: 3.6456 (3.6095) weight_decay: 0.0500 (0.0500) time: 0.4632 data: 0.0281 max mem: 21002 Epoch: [295] [100/312] eta: 0:02:12 lr: 0.000004 min_lr: 0.000004 loss: 3.6020 (3.5823) weight_decay: 0.0500 (0.0500) time: 0.5610 data: 0.0873 max mem: 21002 Epoch: [295] [110/312] eta: 0:02:00 lr: 0.000004 min_lr: 0.000004 loss: 3.4164 (3.5681) weight_decay: 0.0500 (0.0500) time: 0.4840 data: 0.0600 max mem: 21002 Epoch: [295] [120/312] eta: 0:01:56 lr: 0.000004 min_lr: 0.000004 loss: 3.4343 (3.5683) weight_decay: 0.0500 (0.0500) time: 0.5127 data: 0.0278 max mem: 21002 Epoch: [295] [130/312] eta: 0:01:50 lr: 0.000004 min_lr: 0.000004 loss: 3.7788 (3.5734) weight_decay: 0.0500 (0.0500) time: 0.6745 data: 0.0642 max mem: 21002 Epoch: [295] [140/312] eta: 0:01:41 lr: 0.000004 min_lr: 0.000004 loss: 3.4865 (3.5636) weight_decay: 0.0500 (0.0500) time: 0.4625 data: 0.0371 max mem: 21002 Epoch: [295] [150/312] eta: 0:01:36 lr: 0.000004 min_lr: 0.000004 loss: 3.4364 (3.5492) weight_decay: 0.0500 (0.0500) time: 0.4968 data: 0.0469 max mem: 21002 Epoch: [295] [160/312] eta: 0:01:29 lr: 0.000004 min_lr: 0.000004 loss: 3.5221 (3.5623) weight_decay: 0.0500 (0.0500) time: 0.6058 data: 0.0471 max mem: 21002 Epoch: [295] [170/312] eta: 0:01:22 lr: 0.000003 min_lr: 0.000003 loss: 3.8392 (3.5776) weight_decay: 0.0500 (0.0500) time: 0.4898 data: 0.0222 max mem: 21002 Epoch: [295] [180/312] eta: 0:01:17 lr: 0.000003 min_lr: 0.000003 loss: 3.7260 (3.5771) weight_decay: 0.0500 (0.0500) time: 0.5733 data: 0.0593 max mem: 21002 Epoch: [295] [190/312] eta: 0:01:09 lr: 0.000003 min_lr: 0.000003 loss: 3.5411 (3.5640) weight_decay: 0.0500 (0.0500) time: 0.4976 data: 0.0380 max mem: 21002 Epoch: [295] [200/312] eta: 0:01:04 lr: 0.000003 min_lr: 0.000003 loss: 3.4938 (3.5696) weight_decay: 0.0500 (0.0500) time: 0.4854 data: 0.0265 max mem: 21002 Epoch: [295] [210/312] eta: 0:00:59 lr: 0.000003 min_lr: 0.000003 loss: 3.4938 (3.5655) weight_decay: 0.0500 (0.0500) time: 0.6865 data: 0.0316 max mem: 21002 Epoch: [295] [220/312] eta: 0:00:52 lr: 0.000003 min_lr: 0.000003 loss: 3.5609 (3.5659) weight_decay: 0.0500 (0.0500) time: 0.4949 data: 0.0059 max mem: 21002 Epoch: [295] [230/312] eta: 0:00:47 lr: 0.000003 min_lr: 0.000003 loss: 3.8153 (3.5695) weight_decay: 0.0500 (0.0500) time: 0.4753 data: 0.0274 max mem: 21002 Epoch: [295] [240/312] eta: 0:00:40 lr: 0.000003 min_lr: 0.000003 loss: 3.7624 (3.5640) weight_decay: 0.0500 (0.0500) time: 0.5555 data: 0.0273 max mem: 21002 Epoch: [295] [250/312] eta: 0:00:35 lr: 0.000003 min_lr: 0.000003 loss: 3.6167 (3.5601) weight_decay: 0.0500 (0.0500) time: 0.5207 data: 0.0470 max mem: 21002 Epoch: [295] [260/312] eta: 0:00:29 lr: 0.000003 min_lr: 0.000003 loss: 3.6088 (3.5605) weight_decay: 0.0500 (0.0500) time: 0.6183 data: 0.0742 max mem: 21002 Epoch: [295] [270/312] eta: 0:00:23 lr: 0.000003 min_lr: 0.000003 loss: 3.6830 (3.5642) weight_decay: 0.0500 (0.0500) time: 0.4689 data: 0.0279 max mem: 21002 Epoch: [295] [280/312] eta: 0:00:18 lr: 0.000003 min_lr: 0.000003 loss: 3.6325 (3.5568) weight_decay: 0.0500 (0.0500) time: 0.4978 data: 0.0094 max mem: 21002 Epoch: [295] [290/312] eta: 0:00:12 lr: 0.000003 min_lr: 0.000003 loss: 3.5479 (3.5559) weight_decay: 0.0500 (0.0500) time: 0.6374 data: 0.0204 max mem: 21002 Epoch: [295] [300/312] eta: 0:00:06 lr: 0.000003 min_lr: 0.000003 loss: 3.5390 (3.5452) weight_decay: 0.0500 (0.0500) time: 0.4261 data: 0.0115 max mem: 21002 Epoch: [295] [310/312] eta: 0:00:01 lr: 0.000003 min_lr: 0.000003 loss: 3.5843 (3.5417) weight_decay: 0.0500 (0.0500) time: 0.2807 data: 0.0001 max mem: 21002 Epoch: [295] [311/312] eta: 0:00:00 lr: 0.000003 min_lr: 0.000003 loss: 3.5843 (3.5422) weight_decay: 0.0500 (0.0500) time: 0.2806 data: 0.0001 max mem: 21002 Epoch: [295] Total time: 0:02:52 (0.5519 s / it) Averaged stats: lr: 0.000003 min_lr: 0.000003 loss: 3.5843 (3.5659) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:17 loss: 1.0009 (1.0009) acc1: 81.5104 (81.5104) acc5: 96.2240 (96.2240) time: 8.6212 data: 8.5031 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3263 (1.2433) acc1: 73.5677 (74.1920) acc5: 92.0573 (92.1280) time: 1.0586 data: 0.9618 max mem: 21002 Test: Total time: 0:00:09 (1.0692 s / it) * Acc@1 74.050 Acc@5 92.198 loss 1.247 Accuracy of the model on the 50000 test images: 74.1% Max accuracy: 74.05% Epoch: [296] [ 0/312] eta: 0:59:05 lr: 0.000003 min_lr: 0.000003 loss: 3.8821 (3.8821) weight_decay: 0.0500 (0.0500) time: 11.3627 data: 10.0849 max mem: 21002 Epoch: [296] [ 10/312] eta: 0:07:35 lr: 0.000003 min_lr: 0.000003 loss: 3.2353 (3.3003) weight_decay: 0.0500 (0.0500) time: 1.5076 data: 1.0168 max mem: 21002 Epoch: [296] [ 20/312] eta: 0:05:04 lr: 0.000003 min_lr: 0.000003 loss: 3.2353 (3.3894) weight_decay: 0.0500 (0.0500) time: 0.5273 data: 0.1309 max mem: 21002 Epoch: [296] [ 30/312] eta: 0:03:45 lr: 0.000003 min_lr: 0.000003 loss: 3.4436 (3.4123) weight_decay: 0.0500 (0.0500) time: 0.4112 data: 0.0763 max mem: 21002 Epoch: [296] [ 40/312] eta: 0:03:27 lr: 0.000003 min_lr: 0.000003 loss: 3.6565 (3.4958) weight_decay: 0.0500 (0.0500) time: 0.4711 data: 0.1007 max mem: 21002 Epoch: [296] [ 50/312] eta: 0:03:10 lr: 0.000003 min_lr: 0.000003 loss: 3.6565 (3.4538) weight_decay: 0.0500 (0.0500) time: 0.6186 data: 0.2438 max mem: 21002 Epoch: [296] [ 60/312] eta: 0:02:46 lr: 0.000003 min_lr: 0.000003 loss: 3.7391 (3.5415) weight_decay: 0.0500 (0.0500) time: 0.4508 data: 0.1437 max mem: 21002 Epoch: [296] [ 70/312] eta: 0:02:41 lr: 0.000003 min_lr: 0.000003 loss: 3.9255 (3.5879) weight_decay: 0.0500 (0.0500) time: 0.5079 data: 0.1144 max mem: 21002 Epoch: [296] [ 80/312] eta: 0:02:32 lr: 0.000003 min_lr: 0.000003 loss: 3.8717 (3.6300) weight_decay: 0.0500 (0.0500) time: 0.6441 data: 0.2667 max mem: 21002 Epoch: [296] [ 90/312] eta: 0:02:18 lr: 0.000003 min_lr: 0.000003 loss: 3.8052 (3.6232) weight_decay: 0.0500 (0.0500) time: 0.4623 data: 0.1530 max mem: 21002 Epoch: [296] [100/312] eta: 0:02:14 lr: 0.000003 min_lr: 0.000003 loss: 3.7054 (3.6281) weight_decay: 0.0500 (0.0500) time: 0.5337 data: 0.1341 max mem: 21002 Epoch: [296] [110/312] eta: 0:02:02 lr: 0.000003 min_lr: 0.000003 loss: 3.5479 (3.6051) weight_decay: 0.0500 (0.0500) time: 0.5407 data: 0.1340 max mem: 21002 Epoch: [296] [120/312] eta: 0:01:56 lr: 0.000003 min_lr: 0.000003 loss: 3.5121 (3.6090) weight_decay: 0.0500 (0.0500) time: 0.4735 data: 0.1036 max mem: 21002 Epoch: [296] [130/312] eta: 0:01:50 lr: 0.000003 min_lr: 0.000003 loss: 3.7258 (3.6174) weight_decay: 0.0500 (0.0500) time: 0.6120 data: 0.2417 max mem: 21002 Epoch: [296] [140/312] eta: 0:01:40 lr: 0.000003 min_lr: 0.000003 loss: 3.8684 (3.6279) weight_decay: 0.0500 (0.0500) time: 0.4662 data: 0.1389 max mem: 21002 Epoch: [296] [150/312] eta: 0:01:35 lr: 0.000003 min_lr: 0.000003 loss: 3.7649 (3.6335) weight_decay: 0.0500 (0.0500) time: 0.4897 data: 0.1940 max mem: 21002 Epoch: [296] [160/312] eta: 0:01:30 lr: 0.000003 min_lr: 0.000003 loss: 3.4988 (3.6114) weight_decay: 0.0500 (0.0500) time: 0.6526 data: 0.3668 max mem: 21002 Epoch: [296] [170/312] eta: 0:01:22 lr: 0.000003 min_lr: 0.000003 loss: 3.1205 (3.5968) weight_decay: 0.0500 (0.0500) time: 0.4712 data: 0.1863 max mem: 21002 Epoch: [296] [180/312] eta: 0:01:17 lr: 0.000002 min_lr: 0.000002 loss: 3.7406 (3.6044) weight_decay: 0.0500 (0.0500) time: 0.5001 data: 0.1904 max mem: 21002 Epoch: [296] [190/312] eta: 0:01:09 lr: 0.000002 min_lr: 0.000002 loss: 3.7049 (3.5978) weight_decay: 0.0500 (0.0500) time: 0.4884 data: 0.1776 max mem: 21002 Epoch: [296] [200/312] eta: 0:01:04 lr: 0.000002 min_lr: 0.000002 loss: 3.6633 (3.6019) weight_decay: 0.0500 (0.0500) time: 0.5005 data: 0.1601 max mem: 21002 Epoch: [296] [210/312] eta: 0:00:59 lr: 0.000002 min_lr: 0.000002 loss: 3.7214 (3.6033) weight_decay: 0.0500 (0.0500) time: 0.6812 data: 0.3057 max mem: 21002 Epoch: [296] [220/312] eta: 0:00:52 lr: 0.000002 min_lr: 0.000002 loss: 3.7166 (3.6028) weight_decay: 0.0500 (0.0500) time: 0.4665 data: 0.1466 max mem: 21002 Epoch: [296] [230/312] eta: 0:00:47 lr: 0.000002 min_lr: 0.000002 loss: 3.7166 (3.6095) weight_decay: 0.0500 (0.0500) time: 0.5222 data: 0.1498 max mem: 21002 Epoch: [296] [240/312] eta: 0:00:41 lr: 0.000002 min_lr: 0.000002 loss: 3.7798 (3.6134) weight_decay: 0.0500 (0.0500) time: 0.7135 data: 0.2346 max mem: 21002 Epoch: [296] [250/312] eta: 0:00:35 lr: 0.000002 min_lr: 0.000002 loss: 3.7607 (3.6069) weight_decay: 0.0500 (0.0500) time: 0.5019 data: 0.1120 max mem: 21002 Epoch: [296] [260/312] eta: 0:00:29 lr: 0.000002 min_lr: 0.000002 loss: 3.3480 (3.5970) weight_decay: 0.0500 (0.0500) time: 0.5331 data: 0.1605 max mem: 21002 Epoch: [296] [270/312] eta: 0:00:23 lr: 0.000002 min_lr: 0.000002 loss: 3.6691 (3.6029) weight_decay: 0.0500 (0.0500) time: 0.5099 data: 0.1342 max mem: 21002 Epoch: [296] [280/312] eta: 0:00:18 lr: 0.000002 min_lr: 0.000002 loss: 3.5849 (3.5960) weight_decay: 0.0500 (0.0500) time: 0.5213 data: 0.1071 max mem: 21002 Epoch: [296] [290/312] eta: 0:00:12 lr: 0.000002 min_lr: 0.000002 loss: 3.5849 (3.5980) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.1300 max mem: 21002 Epoch: [296] [300/312] eta: 0:00:06 lr: 0.000002 min_lr: 0.000002 loss: 3.8254 (3.6036) weight_decay: 0.0500 (0.0500) time: 0.3555 data: 0.0233 max mem: 21002 Epoch: [296] [310/312] eta: 0:00:01 lr: 0.000002 min_lr: 0.000002 loss: 3.5230 (3.5980) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [296] [311/312] eta: 0:00:00 lr: 0.000002 min_lr: 0.000002 loss: 3.5230 (3.5995) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [296] Total time: 0:02:51 (0.5506 s / it) Averaged stats: lr: 0.000002 min_lr: 0.000002 loss: 3.5230 (3.5614) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:18 loss: 1.0477 (1.0477) acc1: 81.5104 (81.5104) acc5: 95.9635 (95.9635) time: 8.6990 data: 8.5802 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3736 (1.2861) acc1: 73.0469 (73.9520) acc5: 91.7969 (92.1600) time: 1.0521 data: 0.9534 max mem: 21002 Test: Total time: 0:00:09 (1.0692 s / it) * Acc@1 73.910 Acc@5 92.128 loss 1.292 Accuracy of the model on the 50000 test images: 73.9% Max accuracy: 74.05% Epoch: [297] [ 0/312] eta: 0:52:54 lr: 0.000002 min_lr: 0.000002 loss: 3.7613 (3.7613) weight_decay: 0.0500 (0.0500) time: 10.1755 data: 9.2655 max mem: 21002 Epoch: [297] [ 10/312] eta: 0:07:54 lr: 0.000002 min_lr: 0.000002 loss: 3.8690 (3.8602) weight_decay: 0.0500 (0.0500) time: 1.5728 data: 1.0509 max mem: 21002 Epoch: [297] [ 20/312] eta: 0:05:15 lr: 0.000002 min_lr: 0.000002 loss: 3.8312 (3.8072) weight_decay: 0.0500 (0.0500) time: 0.6261 data: 0.1772 max mem: 21002 Epoch: [297] [ 30/312] eta: 0:03:53 lr: 0.000002 min_lr: 0.000002 loss: 3.8020 (3.7432) weight_decay: 0.0500 (0.0500) time: 0.4163 data: 0.0632 max mem: 21002 Epoch: [297] [ 40/312] eta: 0:03:26 lr: 0.000002 min_lr: 0.000002 loss: 3.2991 (3.6017) weight_decay: 0.0500 (0.0500) time: 0.4191 data: 0.0887 max mem: 21002 Epoch: [297] [ 50/312] eta: 0:03:09 lr: 0.000002 min_lr: 0.000002 loss: 3.2368 (3.5714) weight_decay: 0.0500 (0.0500) time: 0.5612 data: 0.1590 max mem: 21002 Epoch: [297] [ 60/312] eta: 0:02:50 lr: 0.000002 min_lr: 0.000002 loss: 3.6818 (3.5912) weight_decay: 0.0500 (0.0500) time: 0.5151 data: 0.1553 max mem: 21002 Epoch: [297] [ 70/312] eta: 0:02:45 lr: 0.000002 min_lr: 0.000002 loss: 3.5172 (3.5483) weight_decay: 0.0500 (0.0500) time: 0.5839 data: 0.1885 max mem: 21002 Epoch: [297] [ 80/312] eta: 0:02:31 lr: 0.000002 min_lr: 0.000002 loss: 3.2258 (3.5378) weight_decay: 0.0500 (0.0500) time: 0.5740 data: 0.1046 max mem: 21002 Epoch: [297] [ 90/312] eta: 0:02:22 lr: 0.000002 min_lr: 0.000002 loss: 3.5632 (3.5283) weight_decay: 0.0500 (0.0500) time: 0.4863 data: 0.0738 max mem: 21002 Epoch: [297] [100/312] eta: 0:02:14 lr: 0.000002 min_lr: 0.000002 loss: 3.6857 (3.5237) weight_decay: 0.0500 (0.0500) time: 0.5724 data: 0.1483 max mem: 21002 Epoch: [297] [110/312] eta: 0:02:02 lr: 0.000002 min_lr: 0.000002 loss: 3.6474 (3.5298) weight_decay: 0.0500 (0.0500) time: 0.4467 data: 0.0751 max mem: 21002 Epoch: [297] [120/312] eta: 0:01:58 lr: 0.000002 min_lr: 0.000002 loss: 3.4225 (3.5207) weight_decay: 0.0500 (0.0500) time: 0.5113 data: 0.1201 max mem: 21002 Epoch: [297] [130/312] eta: 0:01:50 lr: 0.000002 min_lr: 0.000002 loss: 3.7512 (3.5369) weight_decay: 0.0500 (0.0500) time: 0.6030 data: 0.1203 max mem: 21002 Epoch: [297] [140/312] eta: 0:01:42 lr: 0.000002 min_lr: 0.000002 loss: 3.6039 (3.5161) weight_decay: 0.0500 (0.0500) time: 0.4699 data: 0.0929 max mem: 21002 Epoch: [297] [150/312] eta: 0:01:37 lr: 0.000002 min_lr: 0.000002 loss: 3.5277 (3.5359) weight_decay: 0.0500 (0.0500) time: 0.6025 data: 0.1782 max mem: 21002 Epoch: [297] [160/312] eta: 0:01:30 lr: 0.000002 min_lr: 0.000002 loss: 3.4767 (3.5252) weight_decay: 0.0500 (0.0500) time: 0.6114 data: 0.0861 max mem: 21002 Epoch: [297] [170/312] eta: 0:01:23 lr: 0.000002 min_lr: 0.000002 loss: 3.6140 (3.5354) weight_decay: 0.0500 (0.0500) time: 0.4652 data: 0.0768 max mem: 21002 Epoch: [297] [180/312] eta: 0:01:18 lr: 0.000002 min_lr: 0.000002 loss: 3.8094 (3.5530) weight_decay: 0.0500 (0.0500) time: 0.5832 data: 0.1456 max mem: 21002 Epoch: [297] [190/312] eta: 0:01:10 lr: 0.000002 min_lr: 0.000002 loss: 3.9005 (3.5717) weight_decay: 0.0500 (0.0500) time: 0.5053 data: 0.0702 max mem: 21002 Epoch: [297] [200/312] eta: 0:01:05 lr: 0.000002 min_lr: 0.000002 loss: 3.8110 (3.5723) weight_decay: 0.0500 (0.0500) time: 0.4659 data: 0.0597 max mem: 21002 Epoch: [297] [210/312] eta: 0:00:59 lr: 0.000002 min_lr: 0.000002 loss: 3.7881 (3.5783) weight_decay: 0.0500 (0.0500) time: 0.6203 data: 0.0593 max mem: 21002 Epoch: [297] [220/312] eta: 0:00:52 lr: 0.000002 min_lr: 0.000002 loss: 3.8126 (3.5830) weight_decay: 0.0500 (0.0500) time: 0.4878 data: 0.0486 max mem: 21002 Epoch: [297] [230/312] eta: 0:00:47 lr: 0.000002 min_lr: 0.000002 loss: 3.8810 (3.5881) weight_decay: 0.0500 (0.0500) time: 0.5258 data: 0.1050 max mem: 21002 Epoch: [297] [240/312] eta: 0:00:41 lr: 0.000002 min_lr: 0.000002 loss: 3.7948 (3.5941) weight_decay: 0.0500 (0.0500) time: 0.6048 data: 0.0573 max mem: 21002 Epoch: [297] [250/312] eta: 0:00:35 lr: 0.000002 min_lr: 0.000002 loss: 3.7344 (3.5995) weight_decay: 0.0500 (0.0500) time: 0.4936 data: 0.0617 max mem: 21002 Epoch: [297] [260/312] eta: 0:00:29 lr: 0.000002 min_lr: 0.000002 loss: 3.8361 (3.6066) weight_decay: 0.0500 (0.0500) time: 0.5348 data: 0.1198 max mem: 21002 Epoch: [297] [270/312] eta: 0:00:23 lr: 0.000002 min_lr: 0.000002 loss: 3.8630 (3.6069) weight_decay: 0.0500 (0.0500) time: 0.4530 data: 0.0591 max mem: 21002 Epoch: [297] [280/312] eta: 0:00:18 lr: 0.000002 min_lr: 0.000002 loss: 3.5899 (3.6039) weight_decay: 0.0500 (0.0500) time: 0.4786 data: 0.0678 max mem: 21002 Epoch: [297] [290/312] eta: 0:00:12 lr: 0.000002 min_lr: 0.000002 loss: 3.5809 (3.6047) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0673 max mem: 21002 Epoch: [297] [300/312] eta: 0:00:06 lr: 0.000002 min_lr: 0.000002 loss: 3.7679 (3.6092) weight_decay: 0.0500 (0.0500) time: 0.4240 data: 0.0358 max mem: 21002 Epoch: [297] [310/312] eta: 0:00:01 lr: 0.000002 min_lr: 0.000002 loss: 3.8632 (3.6211) weight_decay: 0.0500 (0.0500) time: 0.3125 data: 0.0357 max mem: 21002 Epoch: [297] [311/312] eta: 0:00:00 lr: 0.000002 min_lr: 0.000002 loss: 3.8099 (3.6174) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [297] Total time: 0:02:51 (0.5512 s / it) Averaged stats: lr: 0.000002 min_lr: 0.000002 loss: 3.8099 (3.5784) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:22 loss: 1.0953 (1.0953) acc1: 81.1198 (81.1198) acc5: 95.7031 (95.7031) time: 9.1634 data: 9.0449 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4156 (1.3278) acc1: 73.1771 (73.9680) acc5: 91.9271 (92.1440) time: 1.1038 data: 1.0050 max mem: 21002 Test: Total time: 0:00:10 (1.1241 s / it) * Acc@1 73.836 Acc@5 92.088 loss 1.333 Accuracy of the model on the 50000 test images: 73.8% Max accuracy: 74.05% Epoch: [298] [ 0/312] eta: 0:56:23 lr: 0.000002 min_lr: 0.000002 loss: 4.0898 (4.0898) weight_decay: 0.0500 (0.0500) time: 10.8443 data: 9.2186 max mem: 21002 Epoch: [298] [ 10/312] eta: 0:07:55 lr: 0.000001 min_lr: 0.000001 loss: 3.7131 (3.5169) weight_decay: 0.0500 (0.0500) time: 1.5741 data: 1.0239 max mem: 21002 Epoch: [298] [ 20/312] eta: 0:05:46 lr: 0.000001 min_lr: 0.000001 loss: 3.4223 (3.4342) weight_decay: 0.0500 (0.0500) time: 0.7030 data: 0.1207 max mem: 21002 Epoch: [298] [ 30/312] eta: 0:04:12 lr: 0.000001 min_lr: 0.000001 loss: 3.4223 (3.4388) weight_decay: 0.0500 (0.0500) time: 0.5219 data: 0.0188 max mem: 21002 Epoch: [298] [ 40/312] eta: 0:03:30 lr: 0.000001 min_lr: 0.000001 loss: 3.4026 (3.4376) weight_decay: 0.0500 (0.0500) time: 0.3424 data: 0.0009 max mem: 21002 Epoch: [298] [ 50/312] eta: 0:03:12 lr: 0.000001 min_lr: 0.000001 loss: 3.4026 (3.4533) weight_decay: 0.0500 (0.0500) time: 0.4820 data: 0.0012 max mem: 21002 Epoch: [298] [ 60/312] eta: 0:02:50 lr: 0.000001 min_lr: 0.000001 loss: 3.5819 (3.4894) weight_decay: 0.0500 (0.0500) time: 0.4700 data: 0.0013 max mem: 21002 Epoch: [298] [ 70/312] eta: 0:02:43 lr: 0.000001 min_lr: 0.000001 loss: 3.5819 (3.4786) weight_decay: 0.0500 (0.0500) time: 0.5277 data: 0.0012 max mem: 21002 Epoch: [298] [ 80/312] eta: 0:02:33 lr: 0.000001 min_lr: 0.000001 loss: 3.6976 (3.4837) weight_decay: 0.0500 (0.0500) time: 0.6162 data: 0.0086 max mem: 21002 Epoch: [298] [ 90/312] eta: 0:02:19 lr: 0.000001 min_lr: 0.000001 loss: 3.5426 (3.4753) weight_decay: 0.0500 (0.0500) time: 0.4664 data: 0.0330 max mem: 21002 Epoch: [298] [100/312] eta: 0:02:16 lr: 0.000001 min_lr: 0.000001 loss: 3.4021 (3.4545) weight_decay: 0.0500 (0.0500) time: 0.5786 data: 0.0317 max mem: 21002 Epoch: [298] [110/312] eta: 0:02:04 lr: 0.000001 min_lr: 0.000001 loss: 3.3417 (3.4558) weight_decay: 0.0500 (0.0500) time: 0.5435 data: 0.0188 max mem: 21002 Epoch: [298] [120/312] eta: 0:01:59 lr: 0.000001 min_lr: 0.000001 loss: 3.3992 (3.4527) weight_decay: 0.0500 (0.0500) time: 0.5007 data: 0.0268 max mem: 21002 Epoch: [298] [130/312] eta: 0:01:52 lr: 0.000001 min_lr: 0.000001 loss: 3.5241 (3.4614) weight_decay: 0.0500 (0.0500) time: 0.6457 data: 0.0423 max mem: 21002 Epoch: [298] [140/312] eta: 0:01:43 lr: 0.000001 min_lr: 0.000001 loss: 3.2887 (3.4322) weight_decay: 0.0500 (0.0500) time: 0.4837 data: 0.0626 max mem: 21002 Epoch: [298] [150/312] eta: 0:01:38 lr: 0.000001 min_lr: 0.000001 loss: 3.2887 (3.4380) weight_decay: 0.0500 (0.0500) time: 0.5240 data: 0.0627 max mem: 21002 Epoch: [298] [160/312] eta: 0:01:32 lr: 0.000001 min_lr: 0.000001 loss: 3.6859 (3.4464) weight_decay: 0.0500 (0.0500) time: 0.6562 data: 0.0839 max mem: 21002 Epoch: [298] [170/312] eta: 0:01:24 lr: 0.000001 min_lr: 0.000001 loss: 3.5320 (3.4571) weight_decay: 0.0500 (0.0500) time: 0.5193 data: 0.1059 max mem: 21002 Epoch: [298] [180/312] eta: 0:01:18 lr: 0.000001 min_lr: 0.000001 loss: 3.7494 (3.4699) weight_decay: 0.0500 (0.0500) time: 0.5188 data: 0.0994 max mem: 21002 Epoch: [298] [190/312] eta: 0:01:11 lr: 0.000001 min_lr: 0.000001 loss: 3.7174 (3.4738) weight_decay: 0.0500 (0.0500) time: 0.5036 data: 0.0921 max mem: 21002 Epoch: [298] [200/312] eta: 0:01:05 lr: 0.000001 min_lr: 0.000001 loss: 3.4998 (3.4665) weight_decay: 0.0500 (0.0500) time: 0.5095 data: 0.0838 max mem: 21002 Epoch: [298] [210/312] eta: 0:01:00 lr: 0.000001 min_lr: 0.000001 loss: 3.5820 (3.4790) weight_decay: 0.0500 (0.0500) time: 0.6250 data: 0.0635 max mem: 21002 Epoch: [298] [220/312] eta: 0:00:53 lr: 0.000001 min_lr: 0.000001 loss: 3.7315 (3.4903) weight_decay: 0.0500 (0.0500) time: 0.4591 data: 0.0368 max mem: 21002 Epoch: [298] [230/312] eta: 0:00:47 lr: 0.000001 min_lr: 0.000001 loss: 3.6946 (3.4962) weight_decay: 0.0500 (0.0500) time: 0.4903 data: 0.0522 max mem: 21002 Epoch: [298] [240/312] eta: 0:00:42 lr: 0.000001 min_lr: 0.000001 loss: 3.6828 (3.5000) weight_decay: 0.0500 (0.0500) time: 0.6749 data: 0.0755 max mem: 21002 Epoch: [298] [250/312] eta: 0:00:35 lr: 0.000001 min_lr: 0.000001 loss: 3.6651 (3.4970) weight_decay: 0.0500 (0.0500) time: 0.5150 data: 0.0709 max mem: 21002 Epoch: [298] [260/312] eta: 0:00:30 lr: 0.000001 min_lr: 0.000001 loss: 3.6026 (3.5041) weight_decay: 0.0500 (0.0500) time: 0.5108 data: 0.0690 max mem: 21002 Epoch: [298] [270/312] eta: 0:00:24 lr: 0.000001 min_lr: 0.000001 loss: 3.8380 (3.5092) weight_decay: 0.0500 (0.0500) time: 0.5172 data: 0.0724 max mem: 21002 Epoch: [298] [280/312] eta: 0:00:18 lr: 0.000001 min_lr: 0.000001 loss: 3.2066 (3.4961) weight_decay: 0.0500 (0.0500) time: 0.5325 data: 0.1020 max mem: 21002 Epoch: [298] [290/312] eta: 0:00:12 lr: 0.000001 min_lr: 0.000001 loss: 3.5426 (3.5047) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0764 max mem: 21002 Epoch: [298] [300/312] eta: 0:00:06 lr: 0.000001 min_lr: 0.000001 loss: 3.7770 (3.5039) weight_decay: 0.0500 (0.0500) time: 0.3692 data: 0.0107 max mem: 21002 Epoch: [298] [310/312] eta: 0:00:01 lr: 0.000001 min_lr: 0.000001 loss: 3.5827 (3.5089) weight_decay: 0.0500 (0.0500) time: 0.2772 data: 0.0001 max mem: 21002 Epoch: [298] [311/312] eta: 0:00:00 lr: 0.000001 min_lr: 0.000001 loss: 3.7395 (3.5114) weight_decay: 0.0500 (0.0500) time: 0.2771 data: 0.0001 max mem: 21002 Epoch: [298] Total time: 0:02:53 (0.5564 s / it) Averaged stats: lr: 0.000001 min_lr: 0.000001 loss: 3.7395 (3.5578) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:17 loss: 1.0769 (1.0769) acc1: 81.3802 (81.3802) acc5: 95.7031 (95.7031) time: 8.6371 data: 8.5182 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.4026 (1.3183) acc1: 73.4375 (74.0320) acc5: 91.7969 (92.0480) time: 1.0447 data: 0.9465 max mem: 21002 Test: Total time: 0:00:09 (1.0632 s / it) * Acc@1 73.888 Acc@5 92.148 loss 1.324 Accuracy of the model on the 50000 test images: 73.9% Max accuracy: 74.05% Epoch: [299] [ 0/312] eta: 0:59:09 lr: 0.000001 min_lr: 0.000001 loss: 3.0004 (3.0004) weight_decay: 0.0500 (0.0500) time: 11.3768 data: 10.8890 max mem: 21002 Epoch: [299] [ 10/312] eta: 0:08:00 lr: 0.000001 min_lr: 0.000001 loss: 3.8005 (3.6724) weight_decay: 0.0500 (0.0500) time: 1.5925 data: 1.0454 max mem: 21002 Epoch: [299] [ 20/312] eta: 0:05:06 lr: 0.000001 min_lr: 0.000001 loss: 3.8582 (3.6376) weight_decay: 0.0500 (0.0500) time: 0.5328 data: 0.0799 max mem: 21002 Epoch: [299] [ 30/312] eta: 0:03:46 lr: 0.000001 min_lr: 0.000001 loss: 3.8132 (3.6684) weight_decay: 0.0500 (0.0500) time: 0.3700 data: 0.0497 max mem: 21002 Epoch: [299] [ 40/312] eta: 0:03:22 lr: 0.000001 min_lr: 0.000001 loss: 3.8942 (3.7470) weight_decay: 0.0500 (0.0500) time: 0.4239 data: 0.0584 max mem: 21002 Epoch: [299] [ 50/312] eta: 0:03:12 lr: 0.000001 min_lr: 0.000001 loss: 3.8842 (3.6877) weight_decay: 0.0500 (0.0500) time: 0.6285 data: 0.1468 max mem: 21002 Epoch: [299] [ 60/312] eta: 0:02:46 lr: 0.000001 min_lr: 0.000001 loss: 3.7932 (3.7373) weight_decay: 0.0500 (0.0500) time: 0.4936 data: 0.0891 max mem: 21002 Epoch: [299] [ 70/312] eta: 0:02:40 lr: 0.000001 min_lr: 0.000001 loss: 3.7932 (3.7087) weight_decay: 0.0500 (0.0500) time: 0.4846 data: 0.0493 max mem: 21002 Epoch: [299] [ 80/312] eta: 0:02:31 lr: 0.000001 min_lr: 0.000001 loss: 3.4209 (3.6391) weight_decay: 0.0500 (0.0500) time: 0.6262 data: 0.0562 max mem: 21002 Epoch: [299] [ 90/312] eta: 0:02:19 lr: 0.000001 min_lr: 0.000001 loss: 3.6796 (3.6503) weight_decay: 0.0500 (0.0500) time: 0.4961 data: 0.0499 max mem: 21002 Epoch: [299] [100/312] eta: 0:02:14 lr: 0.000001 min_lr: 0.000001 loss: 3.6796 (3.6086) weight_decay: 0.0500 (0.0500) time: 0.5568 data: 0.0791 max mem: 21002 Epoch: [299] [110/312] eta: 0:02:01 lr: 0.000001 min_lr: 0.000001 loss: 3.2383 (3.5930) weight_decay: 0.0500 (0.0500) time: 0.4899 data: 0.0368 max mem: 21002 Epoch: [299] [120/312] eta: 0:01:56 lr: 0.000001 min_lr: 0.000001 loss: 3.7672 (3.5781) weight_decay: 0.0500 (0.0500) time: 0.4799 data: 0.0416 max mem: 21002 Epoch: [299] [130/312] eta: 0:01:51 lr: 0.000001 min_lr: 0.000001 loss: 3.7756 (3.5836) weight_decay: 0.0500 (0.0500) time: 0.6755 data: 0.0688 max mem: 21002 Epoch: [299] [140/312] eta: 0:01:41 lr: 0.000001 min_lr: 0.000001 loss: 3.7587 (3.5804) weight_decay: 0.0500 (0.0500) time: 0.4819 data: 0.0278 max mem: 21002 Epoch: [299] [150/312] eta: 0:01:36 lr: 0.000001 min_lr: 0.000001 loss: 3.1561 (3.5482) weight_decay: 0.0500 (0.0500) time: 0.4735 data: 0.0099 max mem: 21002 Epoch: [299] [160/312] eta: 0:01:31 lr: 0.000001 min_lr: 0.000001 loss: 3.3749 (3.5597) weight_decay: 0.0500 (0.0500) time: 0.6555 data: 0.0249 max mem: 21002 Epoch: [299] [170/312] eta: 0:01:22 lr: 0.000001 min_lr: 0.000001 loss: 3.5732 (3.5473) weight_decay: 0.0500 (0.0500) time: 0.4936 data: 0.0428 max mem: 21002 Epoch: [299] [180/312] eta: 0:01:17 lr: 0.000001 min_lr: 0.000001 loss: 3.6476 (3.5580) weight_decay: 0.0500 (0.0500) time: 0.5249 data: 0.0554 max mem: 21002 Epoch: [299] [190/312] eta: 0:01:10 lr: 0.000001 min_lr: 0.000001 loss: 3.8616 (3.5721) weight_decay: 0.0500 (0.0500) time: 0.5017 data: 0.0284 max mem: 21002 Epoch: [299] [200/312] eta: 0:01:04 lr: 0.000001 min_lr: 0.000001 loss: 3.5716 (3.5404) weight_decay: 0.0500 (0.0500) time: 0.4597 data: 0.0464 max mem: 21002 Epoch: [299] [210/312] eta: 0:00:59 lr: 0.000001 min_lr: 0.000001 loss: 3.3210 (3.5394) weight_decay: 0.0500 (0.0500) time: 0.6739 data: 0.0959 max mem: 21002 Epoch: [299] [220/312] eta: 0:00:52 lr: 0.000001 min_lr: 0.000001 loss: 3.5893 (3.5469) weight_decay: 0.0500 (0.0500) time: 0.5034 data: 0.0504 max mem: 21002 Epoch: [299] [230/312] eta: 0:00:47 lr: 0.000001 min_lr: 0.000001 loss: 3.7683 (3.5575) weight_decay: 0.0500 (0.0500) time: 0.4643 data: 0.0169 max mem: 21002 Epoch: [299] [240/312] eta: 0:00:41 lr: 0.000001 min_lr: 0.000001 loss: 3.8533 (3.5569) weight_decay: 0.0500 (0.0500) time: 0.6286 data: 0.0169 max mem: 21002 Epoch: [299] [250/312] eta: 0:00:35 lr: 0.000001 min_lr: 0.000001 loss: 3.4302 (3.5541) weight_decay: 0.0500 (0.0500) time: 0.4645 data: 0.0125 max mem: 21002 Epoch: [299] [260/312] eta: 0:00:29 lr: 0.000001 min_lr: 0.000001 loss: 3.5556 (3.5596) weight_decay: 0.0500 (0.0500) time: 0.4981 data: 0.0589 max mem: 21002 Epoch: [299] [270/312] eta: 0:00:23 lr: 0.000001 min_lr: 0.000001 loss: 3.7115 (3.5601) weight_decay: 0.0500 (0.0500) time: 0.4865 data: 0.0471 max mem: 21002 Epoch: [299] [280/312] eta: 0:00:18 lr: 0.000001 min_lr: 0.000001 loss: 3.7115 (3.5619) weight_decay: 0.0500 (0.0500) time: 0.4928 data: 0.0422 max mem: 21002 Epoch: [299] [290/312] eta: 0:00:12 lr: 0.000001 min_lr: 0.000001 loss: 3.5895 (3.5520) weight_decay: 0.0500 (0.0500) time: 0.6231 data: 0.0558 max mem: 21002 Epoch: [299] [300/312] eta: 0:00:06 lr: 0.000001 min_lr: 0.000001 loss: 3.2974 (3.5457) weight_decay: 0.0500 (0.0500) time: 0.4134 data: 0.0140 max mem: 21002 Epoch: [299] [310/312] eta: 0:00:01 lr: 0.000001 min_lr: 0.000001 loss: 3.5698 (3.5485) weight_decay: 0.0500 (0.0500) time: 0.2775 data: 0.0001 max mem: 21002 Epoch: [299] [311/312] eta: 0:00:00 lr: 0.000001 min_lr: 0.000001 loss: 3.5698 (3.5468) weight_decay: 0.0500 (0.0500) time: 0.2774 data: 0.0001 max mem: 21002 Epoch: [299] Total time: 0:02:50 (0.5480 s / it) Averaged stats: lr: 0.000001 min_lr: 0.000001 loss: 3.5698 (3.5541) weight_decay: 0.0500 (0.0500) Test: [0/9] eta: 0:01:17 loss: 1.0065 (1.0065) acc1: 81.6406 (81.6406) acc5: 95.9635 (95.9635) time: 8.5887 data: 8.4703 max mem: 21002 Test: [8/9] eta: 0:00:01 loss: 1.3224 (1.2430) acc1: 73.1771 (74.1760) acc5: 91.7969 (92.1920) time: 1.0741 data: 0.9752 max mem: 21002 Test: Total time: 0:00:09 (1.1016 s / it) * Acc@1 74.020 Acc@5 92.228 loss 1.248 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 74.05% Training time 5:51:10