Namespace(aa='rand-m9-mstd0.5-inc1', auto_resume=True, batch_size=512, cfg_path='iFormer_l.yaml', clip_grad=None, color_jitter=0.4, crop_pct=None, cutmix=1.0, cutmix_minmax=None, data_path='imagenet', data_set='IMNET', device='cuda', disable_eval=False, dist_backend='nccl', dist_eval=True, dist_on_itp=False, dist_url='env://', distillation_alpha=0.5, distillation_tau=1.0, distillation_type='none', distributed=True, drop_path=0.1, enable_wandb=False, epochs=300, eval=False, eval_data_path=None, finetune='', gpu=0, head_init_scale=1.0, imagenet_default_mean_and_std=True, input_size=224, layer_decay=1.0, layer_scale_init_value=0, local_rank=-1, log_dir=None, lr=0.008, min_lr=1e-06, mixup=0.8, mixup_mode='batch', mixup_prob=1.0, mixup_switch_prob=0.5, model='iFormer_l', model_ema=True, model_ema_decay=0.9999, model_ema_eval=True, model_ema_force_cpu=False, model_key='model|module', model_prefix='', momentum=0.9, nb_classes=1000, num_workers=16, opt='adamw', opt_betas=None, opt_eps=1e-08, output_dir='', pin_mem=True, project='iFormer', rank=0, recount=1, remode='pixel', reprob=0.25, resplit=False, resume='', save_ckpt=True, save_ckpt_freq=1, save_ckpt_num=3, seed=0, smoothing=0.1, start_epoch=0, teacher_model='regnety_160', teacher_path='regnety_160-a5fe301d.pth', train_interpolation='bicubic', update_freq=1, use_amp=False, wandb_ckpt=False, warmup_epochs=20, warmup_steps=-1, weight_decay=0.05, weight_decay_end=None, world_size=16) Transform = RandomResizedCropAndInterpolation(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BICUBIC) RandomHorizontalFlip(p=0.5) ToTensor() Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250])) --------------------------- reading from datapath imagenet Number of the class = 1000 Transform = Resize(size=256, interpolation=bicubic, max_size=None, antialias=warn) CenterCrop(size=(224, 224)) ToTensor() Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) --------------------------- reading from datapath imagenet Number of the class = 1000 Sampler_train = Mixup is activated! Using EMA with decay = 0.99990000 Model = iFormer( (downsample_layers): ModuleList( (0): Sequential( (0): Conv2d_BN( (c): Conv2d(3, 24, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False) (bn): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): EdgeResidual( (conv_exp_bn1): Conv2d_BN( (c): Conv2d(24, 96, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (act): GELU(approximate='none') (conv_pwl_bn2): Conv2d_BN( (c): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) (1): Sequential( (0): Conv2d_BN( (c): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (2): Sequential( (0): Conv2d_BN( (c): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (3): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) (stages): ModuleList( (0): Sequential( (0): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(48, 48, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=48, bias=False) (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(48, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(192, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): Identity() ) ) ) (1): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(48, 48, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=48, bias=False) (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(48, 192, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(192, 48, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) ) (1): Sequential( (0): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96, bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (1): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96, bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) ) (2): Sequential( (0): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (1): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (2): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (3): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (4): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (5): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (6): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (7): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (8): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (9): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (10): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (11): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (12): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (13): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (14): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (15): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (16): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (17): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (18): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (19): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (20): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (21): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (22): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (23): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (24): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (25): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (26): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (27): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (28): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (29): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (30): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (31): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (32): BasicBlock( (block): ConvBlock( (token_channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256, bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): Conv2d_BN( (c): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): GELU(approximate='none') (3): Conv2d_BN( (c): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) ) (3): Sequential( (0): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (1): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (2): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1152, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (3): BasicBlock( (block): RepCPE( (cpe): Residual( (m): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (drop_path): Identity() ) ) ) (4): BasicBlock( (block): SHMABlock( (token_channel_mixer): Residual( (m): SHMA( (q): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (k): Conv2d_BN( (c): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (gate_act): Sigmoid() (attn_drop): Dropout(p=0.0, inplace=False) (proj): Conv2d_BN( (c): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (v_gate): Conv2d_BN( (c): Conv2d(384, 768, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) (5): BasicBlock( (block): FFN2d( (channel_mixer): Residual( (m): Sequential( (0): Conv2d_BN( (c): Conv2d(384, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): GELU(approximate='none') (2): Conv2d_BN( (c): Conv2d(1152, 384, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (drop_path): DropPath() ) ) ) ) ) (classifier): Classfier( (classifier): BN_Linear( (bn): BatchNorm1d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (l): Linear(in_features=384, out_features=1000, bias=True) ) ) ) number of params: 14779808 LR = 0.00800000 Batch size = 8192 Update frequent = 1 Number of training examples = 1281167 Number of training training per epoch = 156 Param groups = { "decay": { "weight_decay": 0.05, "params": [ "downsample_layers.0.0.c.weight", "downsample_layers.0.2.conv_exp_bn1.c.weight", "downsample_layers.0.2.conv_pwl_bn2.c.weight", "downsample_layers.1.0.c.weight", "downsample_layers.2.0.c.weight", "downsample_layers.3.0.c.weight", "stages.0.0.block.token_channel_mixer.m.0.c.weight", "stages.0.0.block.token_channel_mixer.m.1.c.weight", "stages.0.0.block.token_channel_mixer.m.3.c.weight", "stages.0.1.block.token_channel_mixer.m.0.c.weight", "stages.0.1.block.token_channel_mixer.m.1.c.weight", "stages.0.1.block.token_channel_mixer.m.3.c.weight", "stages.1.0.block.token_channel_mixer.m.0.c.weight", "stages.1.0.block.token_channel_mixer.m.1.c.weight", "stages.1.0.block.token_channel_mixer.m.3.c.weight", "stages.1.1.block.token_channel_mixer.m.0.c.weight", "stages.1.1.block.token_channel_mixer.m.1.c.weight", "stages.1.1.block.token_channel_mixer.m.3.c.weight", "stages.2.0.block.token_channel_mixer.m.0.c.weight", "stages.2.0.block.token_channel_mixer.m.1.c.weight", "stages.2.0.block.token_channel_mixer.m.3.c.weight", "stages.2.1.block.token_channel_mixer.m.0.c.weight", "stages.2.1.block.token_channel_mixer.m.1.c.weight", "stages.2.1.block.token_channel_mixer.m.3.c.weight", "stages.2.2.block.token_channel_mixer.m.0.c.weight", "stages.2.2.block.token_channel_mixer.m.1.c.weight", "stages.2.2.block.token_channel_mixer.m.3.c.weight", "stages.2.3.block.token_channel_mixer.m.0.c.weight", "stages.2.3.block.token_channel_mixer.m.1.c.weight", "stages.2.3.block.token_channel_mixer.m.3.c.weight", "stages.2.4.block.token_channel_mixer.m.0.c.weight", "stages.2.4.block.token_channel_mixer.m.1.c.weight", "stages.2.4.block.token_channel_mixer.m.3.c.weight", "stages.2.5.block.token_channel_mixer.m.0.c.weight", "stages.2.5.block.token_channel_mixer.m.1.c.weight", "stages.2.5.block.token_channel_mixer.m.3.c.weight", "stages.2.6.block.token_channel_mixer.m.0.c.weight", "stages.2.6.block.token_channel_mixer.m.1.c.weight", "stages.2.6.block.token_channel_mixer.m.3.c.weight", "stages.2.7.block.token_channel_mixer.m.0.c.weight", "stages.2.7.block.token_channel_mixer.m.1.c.weight", "stages.2.7.block.token_channel_mixer.m.3.c.weight", "stages.2.8.block.cpe.m.c.weight", "stages.2.9.block.token_channel_mixer.m.q.c.weight", "stages.2.9.block.token_channel_mixer.m.k.c.weight", "stages.2.9.block.token_channel_mixer.m.proj.c.weight", "stages.2.9.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.10.block.channel_mixer.m.0.c.weight", "stages.2.10.block.channel_mixer.m.2.c.weight", "stages.2.11.block.cpe.m.c.weight", "stages.2.12.block.token_channel_mixer.m.q.c.weight", "stages.2.12.block.token_channel_mixer.m.k.c.weight", "stages.2.12.block.token_channel_mixer.m.proj.c.weight", "stages.2.12.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.13.block.channel_mixer.m.0.c.weight", "stages.2.13.block.channel_mixer.m.2.c.weight", "stages.2.14.block.cpe.m.c.weight", "stages.2.15.block.token_channel_mixer.m.q.c.weight", "stages.2.15.block.token_channel_mixer.m.k.c.weight", "stages.2.15.block.token_channel_mixer.m.proj.c.weight", "stages.2.15.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.16.block.channel_mixer.m.0.c.weight", "stages.2.16.block.channel_mixer.m.2.c.weight", "stages.2.17.block.cpe.m.c.weight", "stages.2.18.block.token_channel_mixer.m.q.c.weight", "stages.2.18.block.token_channel_mixer.m.k.c.weight", "stages.2.18.block.token_channel_mixer.m.proj.c.weight", "stages.2.18.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.19.block.channel_mixer.m.0.c.weight", "stages.2.19.block.channel_mixer.m.2.c.weight", "stages.2.20.block.cpe.m.c.weight", "stages.2.21.block.token_channel_mixer.m.q.c.weight", "stages.2.21.block.token_channel_mixer.m.k.c.weight", "stages.2.21.block.token_channel_mixer.m.proj.c.weight", "stages.2.21.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.22.block.channel_mixer.m.0.c.weight", "stages.2.22.block.channel_mixer.m.2.c.weight", "stages.2.23.block.cpe.m.c.weight", "stages.2.24.block.token_channel_mixer.m.q.c.weight", "stages.2.24.block.token_channel_mixer.m.k.c.weight", "stages.2.24.block.token_channel_mixer.m.proj.c.weight", "stages.2.24.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.25.block.channel_mixer.m.0.c.weight", "stages.2.25.block.channel_mixer.m.2.c.weight", "stages.2.26.block.cpe.m.c.weight", "stages.2.27.block.token_channel_mixer.m.q.c.weight", "stages.2.27.block.token_channel_mixer.m.k.c.weight", "stages.2.27.block.token_channel_mixer.m.proj.c.weight", "stages.2.27.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.28.block.channel_mixer.m.0.c.weight", "stages.2.28.block.channel_mixer.m.2.c.weight", "stages.2.29.block.cpe.m.c.weight", "stages.2.30.block.token_channel_mixer.m.q.c.weight", "stages.2.30.block.token_channel_mixer.m.k.c.weight", "stages.2.30.block.token_channel_mixer.m.proj.c.weight", "stages.2.30.block.token_channel_mixer.m.v_gate.c.weight", "stages.2.31.block.channel_mixer.m.0.c.weight", "stages.2.31.block.channel_mixer.m.2.c.weight", "stages.2.32.block.token_channel_mixer.m.0.c.weight", "stages.2.32.block.token_channel_mixer.m.1.c.weight", "stages.2.32.block.token_channel_mixer.m.3.c.weight", "stages.3.0.block.cpe.m.c.weight", "stages.3.1.block.token_channel_mixer.m.q.c.weight", "stages.3.1.block.token_channel_mixer.m.k.c.weight", "stages.3.1.block.token_channel_mixer.m.proj.c.weight", "stages.3.1.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.2.block.channel_mixer.m.0.c.weight", "stages.3.2.block.channel_mixer.m.2.c.weight", "stages.3.3.block.cpe.m.c.weight", "stages.3.4.block.token_channel_mixer.m.q.c.weight", "stages.3.4.block.token_channel_mixer.m.k.c.weight", "stages.3.4.block.token_channel_mixer.m.proj.c.weight", "stages.3.4.block.token_channel_mixer.m.v_gate.c.weight", "stages.3.5.block.channel_mixer.m.0.c.weight", "stages.3.5.block.channel_mixer.m.2.c.weight", "classifier.classifier.l.weight" ], "lr_scale": 1.0 }, "no_decay": { "weight_decay": 0.0, "params": [ "downsample_layers.0.0.bn.weight", "downsample_layers.0.0.bn.bias", "downsample_layers.0.2.conv_exp_bn1.bn.weight", "downsample_layers.0.2.conv_exp_bn1.bn.bias", "downsample_layers.0.2.conv_pwl_bn2.bn.weight", "downsample_layers.0.2.conv_pwl_bn2.bn.bias", "downsample_layers.1.0.bn.weight", "downsample_layers.1.0.bn.bias", "downsample_layers.2.0.bn.weight", "downsample_layers.2.0.bn.bias", "downsample_layers.3.0.bn.weight", "downsample_layers.3.0.bn.bias", "stages.0.0.block.token_channel_mixer.m.0.bn.weight", "stages.0.0.block.token_channel_mixer.m.0.bn.bias", "stages.0.0.block.token_channel_mixer.m.1.bn.weight", "stages.0.0.block.token_channel_mixer.m.1.bn.bias", "stages.0.0.block.token_channel_mixer.m.3.bn.weight", "stages.0.0.block.token_channel_mixer.m.3.bn.bias", "stages.0.1.block.token_channel_mixer.m.0.bn.weight", "stages.0.1.block.token_channel_mixer.m.0.bn.bias", "stages.0.1.block.token_channel_mixer.m.1.bn.weight", "stages.0.1.block.token_channel_mixer.m.1.bn.bias", "stages.0.1.block.token_channel_mixer.m.3.bn.weight", "stages.0.1.block.token_channel_mixer.m.3.bn.bias", "stages.1.0.block.token_channel_mixer.m.0.bn.weight", "stages.1.0.block.token_channel_mixer.m.0.bn.bias", "stages.1.0.block.token_channel_mixer.m.1.bn.weight", "stages.1.0.block.token_channel_mixer.m.1.bn.bias", "stages.1.0.block.token_channel_mixer.m.3.bn.weight", "stages.1.0.block.token_channel_mixer.m.3.bn.bias", "stages.1.1.block.token_channel_mixer.m.0.bn.weight", "stages.1.1.block.token_channel_mixer.m.0.bn.bias", "stages.1.1.block.token_channel_mixer.m.1.bn.weight", "stages.1.1.block.token_channel_mixer.m.1.bn.bias", "stages.1.1.block.token_channel_mixer.m.3.bn.weight", "stages.1.1.block.token_channel_mixer.m.3.bn.bias", "stages.2.0.block.token_channel_mixer.m.0.bn.weight", "stages.2.0.block.token_channel_mixer.m.0.bn.bias", "stages.2.0.block.token_channel_mixer.m.1.bn.weight", "stages.2.0.block.token_channel_mixer.m.1.bn.bias", "stages.2.0.block.token_channel_mixer.m.3.bn.weight", "stages.2.0.block.token_channel_mixer.m.3.bn.bias", "stages.2.1.block.token_channel_mixer.m.0.bn.weight", "stages.2.1.block.token_channel_mixer.m.0.bn.bias", "stages.2.1.block.token_channel_mixer.m.1.bn.weight", "stages.2.1.block.token_channel_mixer.m.1.bn.bias", "stages.2.1.block.token_channel_mixer.m.3.bn.weight", "stages.2.1.block.token_channel_mixer.m.3.bn.bias", "stages.2.2.block.token_channel_mixer.m.0.bn.weight", "stages.2.2.block.token_channel_mixer.m.0.bn.bias", "stages.2.2.block.token_channel_mixer.m.1.bn.weight", "stages.2.2.block.token_channel_mixer.m.1.bn.bias", "stages.2.2.block.token_channel_mixer.m.3.bn.weight", "stages.2.2.block.token_channel_mixer.m.3.bn.bias", "stages.2.3.block.token_channel_mixer.m.0.bn.weight", "stages.2.3.block.token_channel_mixer.m.0.bn.bias", "stages.2.3.block.token_channel_mixer.m.1.bn.weight", "stages.2.3.block.token_channel_mixer.m.1.bn.bias", "stages.2.3.block.token_channel_mixer.m.3.bn.weight", "stages.2.3.block.token_channel_mixer.m.3.bn.bias", "stages.2.4.block.token_channel_mixer.m.0.bn.weight", "stages.2.4.block.token_channel_mixer.m.0.bn.bias", "stages.2.4.block.token_channel_mixer.m.1.bn.weight", "stages.2.4.block.token_channel_mixer.m.1.bn.bias", "stages.2.4.block.token_channel_mixer.m.3.bn.weight", "stages.2.4.block.token_channel_mixer.m.3.bn.bias", "stages.2.5.block.token_channel_mixer.m.0.bn.weight", "stages.2.5.block.token_channel_mixer.m.0.bn.bias", "stages.2.5.block.token_channel_mixer.m.1.bn.weight", "stages.2.5.block.token_channel_mixer.m.1.bn.bias", "stages.2.5.block.token_channel_mixer.m.3.bn.weight", "stages.2.5.block.token_channel_mixer.m.3.bn.bias", "stages.2.6.block.token_channel_mixer.m.0.bn.weight", "stages.2.6.block.token_channel_mixer.m.0.bn.bias", "stages.2.6.block.token_channel_mixer.m.1.bn.weight", "stages.2.6.block.token_channel_mixer.m.1.bn.bias", "stages.2.6.block.token_channel_mixer.m.3.bn.weight", "stages.2.6.block.token_channel_mixer.m.3.bn.bias", "stages.2.7.block.token_channel_mixer.m.0.bn.weight", "stages.2.7.block.token_channel_mixer.m.0.bn.bias", "stages.2.7.block.token_channel_mixer.m.1.bn.weight", "stages.2.7.block.token_channel_mixer.m.1.bn.bias", "stages.2.7.block.token_channel_mixer.m.3.bn.weight", "stages.2.7.block.token_channel_mixer.m.3.bn.bias", "stages.2.8.block.cpe.m.bn.weight", "stages.2.8.block.cpe.m.bn.bias", "stages.2.9.block.token_channel_mixer.m.q.bn.weight", "stages.2.9.block.token_channel_mixer.m.q.bn.bias", "stages.2.9.block.token_channel_mixer.m.k.bn.weight", "stages.2.9.block.token_channel_mixer.m.k.bn.bias", "stages.2.9.block.token_channel_mixer.m.proj.bn.weight", "stages.2.9.block.token_channel_mixer.m.proj.bn.bias", "stages.2.9.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.9.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.10.block.channel_mixer.m.0.bn.weight", "stages.2.10.block.channel_mixer.m.0.bn.bias", "stages.2.10.block.channel_mixer.m.2.bn.weight", "stages.2.10.block.channel_mixer.m.2.bn.bias", "stages.2.11.block.cpe.m.bn.weight", "stages.2.11.block.cpe.m.bn.bias", "stages.2.12.block.token_channel_mixer.m.q.bn.weight", "stages.2.12.block.token_channel_mixer.m.q.bn.bias", "stages.2.12.block.token_channel_mixer.m.k.bn.weight", "stages.2.12.block.token_channel_mixer.m.k.bn.bias", "stages.2.12.block.token_channel_mixer.m.proj.bn.weight", "stages.2.12.block.token_channel_mixer.m.proj.bn.bias", "stages.2.12.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.12.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.13.block.channel_mixer.m.0.bn.weight", "stages.2.13.block.channel_mixer.m.0.bn.bias", "stages.2.13.block.channel_mixer.m.2.bn.weight", "stages.2.13.block.channel_mixer.m.2.bn.bias", "stages.2.14.block.cpe.m.bn.weight", "stages.2.14.block.cpe.m.bn.bias", "stages.2.15.block.token_channel_mixer.m.q.bn.weight", "stages.2.15.block.token_channel_mixer.m.q.bn.bias", "stages.2.15.block.token_channel_mixer.m.k.bn.weight", "stages.2.15.block.token_channel_mixer.m.k.bn.bias", "stages.2.15.block.token_channel_mixer.m.proj.bn.weight", "stages.2.15.block.token_channel_mixer.m.proj.bn.bias", "stages.2.15.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.15.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.16.block.channel_mixer.m.0.bn.weight", "stages.2.16.block.channel_mixer.m.0.bn.bias", "stages.2.16.block.channel_mixer.m.2.bn.weight", "stages.2.16.block.channel_mixer.m.2.bn.bias", "stages.2.17.block.cpe.m.bn.weight", "stages.2.17.block.cpe.m.bn.bias", "stages.2.18.block.token_channel_mixer.m.q.bn.weight", "stages.2.18.block.token_channel_mixer.m.q.bn.bias", "stages.2.18.block.token_channel_mixer.m.k.bn.weight", "stages.2.18.block.token_channel_mixer.m.k.bn.bias", "stages.2.18.block.token_channel_mixer.m.proj.bn.weight", "stages.2.18.block.token_channel_mixer.m.proj.bn.bias", "stages.2.18.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.18.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.19.block.channel_mixer.m.0.bn.weight", "stages.2.19.block.channel_mixer.m.0.bn.bias", "stages.2.19.block.channel_mixer.m.2.bn.weight", "stages.2.19.block.channel_mixer.m.2.bn.bias", "stages.2.20.block.cpe.m.bn.weight", "stages.2.20.block.cpe.m.bn.bias", "stages.2.21.block.token_channel_mixer.m.q.bn.weight", "stages.2.21.block.token_channel_mixer.m.q.bn.bias", "stages.2.21.block.token_channel_mixer.m.k.bn.weight", "stages.2.21.block.token_channel_mixer.m.k.bn.bias", "stages.2.21.block.token_channel_mixer.m.proj.bn.weight", "stages.2.21.block.token_channel_mixer.m.proj.bn.bias", "stages.2.21.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.21.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.22.block.channel_mixer.m.0.bn.weight", "stages.2.22.block.channel_mixer.m.0.bn.bias", "stages.2.22.block.channel_mixer.m.2.bn.weight", "stages.2.22.block.channel_mixer.m.2.bn.bias", "stages.2.23.block.cpe.m.bn.weight", "stages.2.23.block.cpe.m.bn.bias", "stages.2.24.block.token_channel_mixer.m.q.bn.weight", "stages.2.24.block.token_channel_mixer.m.q.bn.bias", "stages.2.24.block.token_channel_mixer.m.k.bn.weight", "stages.2.24.block.token_channel_mixer.m.k.bn.bias", "stages.2.24.block.token_channel_mixer.m.proj.bn.weight", "stages.2.24.block.token_channel_mixer.m.proj.bn.bias", "stages.2.24.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.24.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.25.block.channel_mixer.m.0.bn.weight", "stages.2.25.block.channel_mixer.m.0.bn.bias", "stages.2.25.block.channel_mixer.m.2.bn.weight", "stages.2.25.block.channel_mixer.m.2.bn.bias", "stages.2.26.block.cpe.m.bn.weight", "stages.2.26.block.cpe.m.bn.bias", "stages.2.27.block.token_channel_mixer.m.q.bn.weight", "stages.2.27.block.token_channel_mixer.m.q.bn.bias", "stages.2.27.block.token_channel_mixer.m.k.bn.weight", "stages.2.27.block.token_channel_mixer.m.k.bn.bias", "stages.2.27.block.token_channel_mixer.m.proj.bn.weight", "stages.2.27.block.token_channel_mixer.m.proj.bn.bias", "stages.2.27.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.27.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.28.block.channel_mixer.m.0.bn.weight", "stages.2.28.block.channel_mixer.m.0.bn.bias", "stages.2.28.block.channel_mixer.m.2.bn.weight", "stages.2.28.block.channel_mixer.m.2.bn.bias", "stages.2.29.block.cpe.m.bn.weight", "stages.2.29.block.cpe.m.bn.bias", "stages.2.30.block.token_channel_mixer.m.q.bn.weight", "stages.2.30.block.token_channel_mixer.m.q.bn.bias", "stages.2.30.block.token_channel_mixer.m.k.bn.weight", "stages.2.30.block.token_channel_mixer.m.k.bn.bias", "stages.2.30.block.token_channel_mixer.m.proj.bn.weight", "stages.2.30.block.token_channel_mixer.m.proj.bn.bias", "stages.2.30.block.token_channel_mixer.m.v_gate.bn.weight", "stages.2.30.block.token_channel_mixer.m.v_gate.bn.bias", "stages.2.31.block.channel_mixer.m.0.bn.weight", "stages.2.31.block.channel_mixer.m.0.bn.bias", "stages.2.31.block.channel_mixer.m.2.bn.weight", "stages.2.31.block.channel_mixer.m.2.bn.bias", "stages.2.32.block.token_channel_mixer.m.0.bn.weight", "stages.2.32.block.token_channel_mixer.m.0.bn.bias", "stages.2.32.block.token_channel_mixer.m.1.bn.weight", "stages.2.32.block.token_channel_mixer.m.1.bn.bias", "stages.2.32.block.token_channel_mixer.m.3.bn.weight", "stages.2.32.block.token_channel_mixer.m.3.bn.bias", "stages.3.0.block.cpe.m.bn.weight", "stages.3.0.block.cpe.m.bn.bias", "stages.3.1.block.token_channel_mixer.m.q.bn.weight", "stages.3.1.block.token_channel_mixer.m.q.bn.bias", "stages.3.1.block.token_channel_mixer.m.k.bn.weight", "stages.3.1.block.token_channel_mixer.m.k.bn.bias", "stages.3.1.block.token_channel_mixer.m.proj.bn.weight", "stages.3.1.block.token_channel_mixer.m.proj.bn.bias", "stages.3.1.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.1.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.2.block.channel_mixer.m.0.bn.weight", "stages.3.2.block.channel_mixer.m.0.bn.bias", "stages.3.2.block.channel_mixer.m.2.bn.weight", "stages.3.2.block.channel_mixer.m.2.bn.bias", "stages.3.3.block.cpe.m.bn.weight", "stages.3.3.block.cpe.m.bn.bias", "stages.3.4.block.token_channel_mixer.m.q.bn.weight", "stages.3.4.block.token_channel_mixer.m.q.bn.bias", "stages.3.4.block.token_channel_mixer.m.k.bn.weight", "stages.3.4.block.token_channel_mixer.m.k.bn.bias", "stages.3.4.block.token_channel_mixer.m.proj.bn.weight", "stages.3.4.block.token_channel_mixer.m.proj.bn.bias", "stages.3.4.block.token_channel_mixer.m.v_gate.bn.weight", "stages.3.4.block.token_channel_mixer.m.v_gate.bn.bias", "stages.3.5.block.channel_mixer.m.0.bn.weight", "stages.3.5.block.channel_mixer.m.0.bn.bias", "stages.3.5.block.channel_mixer.m.2.bn.weight", "stages.3.5.block.channel_mixer.m.2.bn.bias", "classifier.classifier.bn.weight", "classifier.classifier.bn.bias", "classifier.classifier.l.bias" ], "lr_scale": 1.0 } } Use Cosine LR scheduler Set warmup steps = 3120 Set warmup steps = 0 Max WD = 0.0500000, Min WD = 0.0500000 criterion = SoftTargetCrossEntropy() Auto resume checkpoint: Start training for 300 epochs Epoch: [0] [ 0/156] eta: 2:39:57 lr: 0.000000 min_lr: 0.000000 loss: 6.9882 (6.9882) weight_decay: 0.0500 (0.0500) time: 61.5198 data: 18.0429 max mem: 55573 Epoch: [0] [ 10/156] eta: 0:17:51 lr: 0.000026 min_lr: 0.000026 loss: 6.9841 (6.9752) weight_decay: 0.0500 (0.0500) time: 7.3382 data: 1.6407 max mem: 55573 Epoch: [0] [ 20/156] eta: 0:09:20 lr: 0.000051 min_lr: 0.000051 loss: 6.9558 (6.9610) weight_decay: 0.0500 (0.0500) time: 1.2545 data: 0.0004 max mem: 55573 Epoch: [0] [ 30/156] eta: 0:06:15 lr: 0.000077 min_lr: 0.000077 loss: 6.9283 (6.9458) weight_decay: 0.0500 (0.0500) time: 0.5880 data: 0.0004 max mem: 55573 Epoch: [0] [ 40/156] eta: 0:04:38 lr: 0.000103 min_lr: 0.000103 loss: 6.9006 (6.9314) weight_decay: 0.0500 (0.0500) time: 0.5878 data: 0.0004 max mem: 55573 Epoch: [0] [ 50/156] eta: 0:03:36 lr: 0.000128 min_lr: 0.000128 loss: 6.8754 (6.9182) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [0] [ 60/156] eta: 0:02:59 lr: 0.000154 min_lr: 0.000154 loss: 6.8639 (6.9062) weight_decay: 0.0500 (0.0500) time: 0.7854 data: 0.0004 max mem: 55573 Epoch: [0] [ 70/156] eta: 0:02:26 lr: 0.000180 min_lr: 0.000180 loss: 6.8312 (6.8930) weight_decay: 0.0500 (0.0500) time: 0.8393 data: 0.0004 max mem: 55573 Epoch: [0] [ 80/156] eta: 0:02:05 lr: 0.000205 min_lr: 0.000205 loss: 6.8060 (6.8813) weight_decay: 0.0500 (0.0500) time: 1.0035 data: 0.0004 max mem: 55573 Epoch: [0] [ 90/156] eta: 0:01:45 lr: 0.000231 min_lr: 0.000231 loss: 6.7895 (6.8692) weight_decay: 0.0500 (0.0500) time: 1.2319 data: 0.0005 max mem: 55573 Epoch: [0] [100/156] eta: 0:01:25 lr: 0.000256 min_lr: 0.000256 loss: 6.7680 (6.8571) weight_decay: 0.0500 (0.0500) time: 0.9804 data: 0.0005 max mem: 55573 Epoch: [0] [110/156] eta: 0:01:08 lr: 0.000282 min_lr: 0.000282 loss: 6.7350 (6.8447) weight_decay: 0.0500 (0.0500) time: 1.0022 data: 0.0004 max mem: 55573 Epoch: [0] [120/156] eta: 0:00:51 lr: 0.000308 min_lr: 0.000308 loss: 6.7350 (6.8340) weight_decay: 0.0500 (0.0500) time: 0.9828 data: 0.0004 max mem: 55573 Epoch: [0] [130/156] eta: 0:00:37 lr: 0.000333 min_lr: 0.000333 loss: 6.7077 (6.8211) weight_decay: 0.0500 (0.0500) time: 1.0519 data: 0.0010 max mem: 55573 Epoch: [0] [140/156] eta: 0:00:22 lr: 0.000359 min_lr: 0.000359 loss: 6.6225 (6.8079) weight_decay: 0.0500 (0.0500) time: 1.2889 data: 0.0009 max mem: 55573 Epoch: [0] [150/156] eta: 0:00:08 lr: 0.000385 min_lr: 0.000385 loss: 6.6308 (6.7964) weight_decay: 0.0500 (0.0500) time: 0.9255 data: 0.0002 max mem: 55573 Epoch: [0] [155/156] eta: 0:00:01 lr: 0.000398 min_lr: 0.000398 loss: 6.6308 (6.7923) weight_decay: 0.0500 (0.0500) time: 0.9246 data: 0.0002 max mem: 55573 Epoch: [0] Total time: 0:03:29 (1.3419 s / it) Averaged stats: lr: 0.000398 min_lr: 0.000398 loss: 6.6308 (6.7902) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:01:00 loss: 6.0855 (6.0855) acc1: 1.5625 (1.5625) acc5: 6.3802 (6.3802) time: 12.0442 data: 7.0845 max mem: 55573 Test: [4/5] eta: 0:00:02 loss: 6.0897 (5.9683) acc1: 2.4740 (2.5920) acc5: 7.8125 (7.7120) time: 2.8886 data: 1.4170 max mem: 55573 Test: Total time: 0:00:14 (2.9156 s / it) * Acc@1 2.226 Acc@5 7.746 loss 6.005 Accuracy of the model on the 50000 test images: 2.2% Max accuracy: 2.23% Test: [0/5] eta: 0:00:36 loss: 6.9148 (6.9148) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.3080 data: 6.5321 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9148 (6.9127) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.6068 data: 1.3065 max mem: 55573 Test: Total time: 0:00:08 (1.6238 s / it) * Acc@1 0.098 Acc@5 0.494 loss 6.911 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.10% Epoch: [1] [ 0/156] eta: 1:06:57 lr: 0.000400 min_lr: 0.000400 loss: 6.6372 (6.6372) weight_decay: 0.0500 (0.0500) time: 25.7538 data: 10.8100 max mem: 55573 Epoch: [1] [ 10/156] eta: 0:08:29 lr: 0.000426 min_lr: 0.000426 loss: 6.6414 (6.6380) weight_decay: 0.0500 (0.0500) time: 3.4909 data: 0.9830 max mem: 55573 Epoch: [1] [ 20/156] eta: 0:05:30 lr: 0.000451 min_lr: 0.000451 loss: 6.6266 (6.6014) weight_decay: 0.0500 (0.0500) time: 1.2617 data: 0.0004 max mem: 55573 Epoch: [1] [ 30/156] eta: 0:04:03 lr: 0.000477 min_lr: 0.000477 loss: 6.5608 (6.5878) weight_decay: 0.0500 (0.0500) time: 1.0777 data: 0.0004 max mem: 55573 Epoch: [1] [ 40/156] eta: 0:03:06 lr: 0.000503 min_lr: 0.000503 loss: 6.5216 (6.5705) weight_decay: 0.0500 (0.0500) time: 0.7420 data: 0.0004 max mem: 55573 Epoch: [1] [ 50/156] eta: 0:02:29 lr: 0.000528 min_lr: 0.000528 loss: 6.5068 (6.5513) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0005 max mem: 55573 Epoch: [1] [ 60/156] eta: 0:02:02 lr: 0.000554 min_lr: 0.000554 loss: 6.5115 (6.5436) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [1] [ 70/156] eta: 0:01:41 lr: 0.000580 min_lr: 0.000580 loss: 6.5306 (6.5378) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0006 max mem: 55573 Epoch: [1] [ 80/156] eta: 0:01:23 lr: 0.000605 min_lr: 0.000605 loss: 6.4794 (6.5272) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [1] [ 90/156] eta: 0:01:09 lr: 0.000631 min_lr: 0.000631 loss: 6.5008 (6.5230) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [1] [100/156] eta: 0:00:56 lr: 0.000657 min_lr: 0.000657 loss: 6.3610 (6.5055) weight_decay: 0.0500 (0.0500) time: 0.6214 data: 0.0005 max mem: 55573 Epoch: [1] [110/156] eta: 0:00:45 lr: 0.000682 min_lr: 0.000682 loss: 6.3056 (6.4924) weight_decay: 0.0500 (0.0500) time: 0.7288 data: 0.0004 max mem: 55573 Epoch: [1] [120/156] eta: 0:00:34 lr: 0.000708 min_lr: 0.000708 loss: 6.4525 (6.4931) weight_decay: 0.0500 (0.0500) time: 0.7764 data: 0.0004 max mem: 55573 Epoch: [1] [130/156] eta: 0:00:24 lr: 0.000734 min_lr: 0.000734 loss: 6.5321 (6.4860) weight_decay: 0.0500 (0.0500) time: 0.7510 data: 0.0010 max mem: 55573 Epoch: [1] [140/156] eta: 0:00:14 lr: 0.000759 min_lr: 0.000759 loss: 6.3089 (6.4717) weight_decay: 0.0500 (0.0500) time: 0.7211 data: 0.0008 max mem: 55573 Epoch: [1] [150/156] eta: 0:00:05 lr: 0.000785 min_lr: 0.000785 loss: 6.3089 (6.4631) weight_decay: 0.0500 (0.0500) time: 0.6461 data: 0.0001 max mem: 55573 Epoch: [1] [155/156] eta: 0:00:00 lr: 0.000798 min_lr: 0.000798 loss: 6.3103 (6.4619) weight_decay: 0.0500 (0.0500) time: 0.6253 data: 0.0001 max mem: 55573 Epoch: [1] Total time: 0:02:21 (0.9093 s / it) Averaged stats: lr: 0.000798 min_lr: 0.000798 loss: 6.3103 (6.4843) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 5.1609 (5.1609) acc1: 6.7708 (6.7708) acc5: 20.4427 (20.4427) time: 7.1668 data: 6.9294 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 5.2887 (5.0925) acc1: 6.7708 (7.0720) acc5: 20.0521 (19.6160) time: 1.5799 data: 1.3860 max mem: 55573 Test: Total time: 0:00:08 (1.6146 s / it) * Acc@1 7.006 Acc@5 19.644 loss 5.148 Accuracy of the model on the 50000 test images: 7.0% Max accuracy: 7.01% Test: [0/5] eta: 0:00:34 loss: 6.9132 (6.9132) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.8970 data: 6.6611 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9132 (6.9115) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5247 data: 1.3323 max mem: 55573 Test: Total time: 0:00:07 (1.5397 s / it) * Acc@1 0.096 Acc@5 0.498 loss 6.910 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [2] [ 0/156] eta: 0:34:50 lr: 0.000800 min_lr: 0.000800 loss: 6.5567 (6.5567) weight_decay: 0.0500 (0.0500) time: 13.4002 data: 9.3431 max mem: 55573 Epoch: [2] [ 10/156] eta: 0:04:19 lr: 0.000826 min_lr: 0.000826 loss: 6.4973 (6.4743) weight_decay: 0.0500 (0.0500) time: 1.7756 data: 0.8498 max mem: 55573 Epoch: [2] [ 20/156] eta: 0:02:44 lr: 0.000852 min_lr: 0.000852 loss: 6.4505 (6.3945) weight_decay: 0.0500 (0.0500) time: 0.6034 data: 0.0004 max mem: 55573 Epoch: [2] [ 30/156] eta: 0:02:07 lr: 0.000877 min_lr: 0.000877 loss: 6.3530 (6.3838) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0004 max mem: 55573 Epoch: [2] [ 40/156] eta: 0:01:45 lr: 0.000903 min_lr: 0.000903 loss: 6.3529 (6.3603) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [2] [ 50/156] eta: 0:01:29 lr: 0.000929 min_lr: 0.000929 loss: 6.3168 (6.3350) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [2] [ 60/156] eta: 0:01:17 lr: 0.000954 min_lr: 0.000954 loss: 6.2585 (6.3105) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [2] [ 70/156] eta: 0:01:06 lr: 0.000980 min_lr: 0.000980 loss: 6.2585 (6.2891) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [2] [ 80/156] eta: 0:00:57 lr: 0.001005 min_lr: 0.001005 loss: 6.1635 (6.2726) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [2] [ 90/156] eta: 0:00:48 lr: 0.001031 min_lr: 0.001031 loss: 6.3098 (6.2770) weight_decay: 0.0500 (0.0500) time: 0.5937 data: 0.0004 max mem: 55573 Epoch: [2] [100/156] eta: 0:00:40 lr: 0.001057 min_lr: 0.001057 loss: 6.2947 (6.2762) weight_decay: 0.0500 (0.0500) time: 0.5942 data: 0.0004 max mem: 55573 Epoch: [2] [110/156] eta: 0:00:32 lr: 0.001082 min_lr: 0.001082 loss: 6.1876 (6.2597) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0005 max mem: 55573 Epoch: [2] [120/156] eta: 0:00:25 lr: 0.001108 min_lr: 0.001108 loss: 6.2815 (6.2639) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [2] [130/156] eta: 0:00:17 lr: 0.001134 min_lr: 0.001134 loss: 6.2815 (6.2589) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0011 max mem: 55573 Epoch: [2] [140/156] eta: 0:00:10 lr: 0.001159 min_lr: 0.001159 loss: 6.1512 (6.2414) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0009 max mem: 55573 Epoch: [2] [150/156] eta: 0:00:04 lr: 0.001185 min_lr: 0.001185 loss: 5.9349 (6.2238) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.0001 max mem: 55573 Epoch: [2] [155/156] eta: 0:00:00 lr: 0.001198 min_lr: 0.001198 loss: 5.9132 (6.2125) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0001 max mem: 55573 Epoch: [2] Total time: 0:01:46 (0.6816 s / it) Averaged stats: lr: 0.001198 min_lr: 0.001198 loss: 5.9132 (6.2210) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 4.5472 (4.5472) acc1: 12.7604 (12.7604) acc5: 33.7240 (33.7240) time: 6.7376 data: 6.5007 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 4.6459 (4.5515) acc1: 12.7604 (13.1520) acc5: 32.1615 (31.5840) time: 1.4928 data: 1.3002 max mem: 55573 Test: Total time: 0:00:07 (1.5156 s / it) * Acc@1 13.466 Acc@5 32.376 loss 4.588 Accuracy of the model on the 50000 test images: 13.5% Max accuracy: 13.47% Test: [0/5] eta: 0:00:34 loss: 6.9125 (6.9125) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.9821 data: 6.7459 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9125 (6.9112) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5416 data: 1.3493 max mem: 55573 Test: Total time: 0:00:07 (1.5586 s / it) * Acc@1 0.098 Acc@5 0.494 loss 6.909 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [3] [ 0/156] eta: 0:33:09 lr: 0.001200 min_lr: 0.001200 loss: 5.4995 (5.4995) weight_decay: 0.0500 (0.0500) time: 12.7523 data: 9.8772 max mem: 55573 Epoch: [3] [ 10/156] eta: 0:04:09 lr: 0.001226 min_lr: 0.001226 loss: 6.0262 (6.0100) weight_decay: 0.0500 (0.0500) time: 1.7092 data: 0.8983 max mem: 55573 Epoch: [3] [ 20/156] eta: 0:02:40 lr: 0.001252 min_lr: 0.001252 loss: 6.0837 (6.0797) weight_decay: 0.0500 (0.0500) time: 0.5978 data: 0.0004 max mem: 55573 Epoch: [3] [ 30/156] eta: 0:02:04 lr: 0.001277 min_lr: 0.001277 loss: 5.9958 (6.0507) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [3] [ 40/156] eta: 0:01:43 lr: 0.001303 min_lr: 0.001303 loss: 6.1164 (6.0677) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [3] [ 50/156] eta: 0:01:28 lr: 0.001329 min_lr: 0.001329 loss: 6.1642 (6.0808) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [3] [ 60/156] eta: 0:01:16 lr: 0.001354 min_lr: 0.001354 loss: 6.0965 (6.0342) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [3] [ 70/156] eta: 0:01:05 lr: 0.001380 min_lr: 0.001380 loss: 5.9905 (6.0262) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [3] [ 80/156] eta: 0:00:56 lr: 0.001406 min_lr: 0.001406 loss: 6.1164 (6.0223) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [3] [ 90/156] eta: 0:00:47 lr: 0.001431 min_lr: 0.001431 loss: 6.1260 (6.0286) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [3] [100/156] eta: 0:00:39 lr: 0.001457 min_lr: 0.001457 loss: 6.1993 (6.0315) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [3] [110/156] eta: 0:00:32 lr: 0.001483 min_lr: 0.001483 loss: 6.1564 (6.0311) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [3] [120/156] eta: 0:00:24 lr: 0.001508 min_lr: 0.001508 loss: 6.1283 (6.0259) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [3] [130/156] eta: 0:00:17 lr: 0.001534 min_lr: 0.001534 loss: 6.0463 (6.0195) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0010 max mem: 55573 Epoch: [3] [140/156] eta: 0:00:10 lr: 0.001559 min_lr: 0.001559 loss: 5.8850 (6.0068) weight_decay: 0.0500 (0.0500) time: 0.5867 data: 0.0008 max mem: 55573 Epoch: [3] [150/156] eta: 0:00:04 lr: 0.001585 min_lr: 0.001585 loss: 5.9052 (5.9991) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [3] [155/156] eta: 0:00:00 lr: 0.001598 min_lr: 0.001598 loss: 5.9807 (6.0035) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [3] Total time: 0:01:45 (0.6753 s / it) Averaged stats: lr: 0.001598 min_lr: 0.001598 loss: 5.9807 (5.9695) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 3.9729 (3.9729) acc1: 20.0521 (20.0521) acc5: 45.9635 (45.9635) time: 7.1147 data: 6.8773 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 4.0859 (3.9698) acc1: 20.0521 (19.2320) acc5: 43.7500 (40.8960) time: 1.5686 data: 1.3756 max mem: 55573 Test: Total time: 0:00:08 (1.6058 s / it) * Acc@1 19.768 Acc@5 41.254 loss 3.958 Accuracy of the model on the 50000 test images: 19.8% Max accuracy: 19.77% Test: [0/5] eta: 0:00:36 loss: 6.9120 (6.9120) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2487 data: 7.0127 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9120 (6.9114) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5950 data: 1.4026 max mem: 55573 Test: Total time: 0:00:08 (1.6137 s / it) * Acc@1 0.098 Acc@5 0.492 loss 6.909 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [4] [ 0/156] eta: 0:31:45 lr: 0.001601 min_lr: 0.001601 loss: 6.0239 (6.0239) weight_decay: 0.0500 (0.0500) time: 12.2145 data: 11.1444 max mem: 55573 Epoch: [4] [ 10/156] eta: 0:04:04 lr: 0.001626 min_lr: 0.001626 loss: 5.7407 (5.8413) weight_decay: 0.0500 (0.0500) time: 1.6738 data: 1.0135 max mem: 55573 Epoch: [4] [ 20/156] eta: 0:02:37 lr: 0.001652 min_lr: 0.001652 loss: 5.6432 (5.7308) weight_decay: 0.0500 (0.0500) time: 0.6059 data: 0.0005 max mem: 55573 Epoch: [4] [ 30/156] eta: 0:02:02 lr: 0.001677 min_lr: 0.001677 loss: 5.6773 (5.7710) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [4] [ 40/156] eta: 0:01:42 lr: 0.001703 min_lr: 0.001703 loss: 5.6765 (5.7286) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [4] [ 50/156] eta: 0:01:27 lr: 0.001729 min_lr: 0.001729 loss: 5.9544 (5.7858) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [4] [ 60/156] eta: 0:01:15 lr: 0.001754 min_lr: 0.001754 loss: 5.9651 (5.7807) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [4] [ 70/156] eta: 0:01:05 lr: 0.001780 min_lr: 0.001780 loss: 5.6288 (5.7597) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [4] [ 80/156] eta: 0:00:56 lr: 0.001806 min_lr: 0.001806 loss: 5.4777 (5.7458) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [4] [ 90/156] eta: 0:00:47 lr: 0.001831 min_lr: 0.001831 loss: 5.3874 (5.7143) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [4] [100/156] eta: 0:00:39 lr: 0.001857 min_lr: 0.001857 loss: 5.5030 (5.7062) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [4] [110/156] eta: 0:00:32 lr: 0.001883 min_lr: 0.001883 loss: 5.7600 (5.7179) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [4] [120/156] eta: 0:00:24 lr: 0.001908 min_lr: 0.001908 loss: 5.8973 (5.7232) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [4] [130/156] eta: 0:00:17 lr: 0.001934 min_lr: 0.001934 loss: 5.8772 (5.7087) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0011 max mem: 55573 Epoch: [4] [140/156] eta: 0:00:10 lr: 0.001960 min_lr: 0.001960 loss: 5.7799 (5.7196) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0010 max mem: 55573 Epoch: [4] [150/156] eta: 0:00:04 lr: 0.001985 min_lr: 0.001985 loss: 5.8654 (5.7217) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [4] [155/156] eta: 0:00:00 lr: 0.001998 min_lr: 0.001998 loss: 5.7799 (5.7202) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [4] Total time: 0:01:44 (0.6726 s / it) Averaged stats: lr: 0.001998 min_lr: 0.001998 loss: 5.7799 (5.7241) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 3.3159 (3.3159) acc1: 32.1615 (32.1615) acc5: 57.1615 (57.1615) time: 7.1629 data: 6.9250 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 3.5164 (3.4945) acc1: 30.2083 (28.1920) acc5: 52.6042 (50.6240) time: 1.5792 data: 1.3851 max mem: 55573 Test: Total time: 0:00:08 (1.6140 s / it) * Acc@1 27.444 Acc@5 51.622 loss 3.505 Accuracy of the model on the 50000 test images: 27.4% Max accuracy: 27.44% Test: [0/5] eta: 0:00:34 loss: 6.9118 (6.9118) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9262 data: 6.6902 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9118 (6.9117) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5305 data: 1.3381 max mem: 55573 Test: Total time: 0:00:07 (1.5466 s / it) * Acc@1 0.098 Acc@5 0.488 loss 6.910 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [5] [ 0/156] eta: 0:35:56 lr: 0.002001 min_lr: 0.002001 loss: 6.0397 (6.0397) weight_decay: 0.0500 (0.0500) time: 13.8261 data: 6.3486 max mem: 55573 Epoch: [5] [ 10/156] eta: 0:04:23 lr: 0.002026 min_lr: 0.002026 loss: 5.6057 (5.5751) weight_decay: 0.0500 (0.0500) time: 1.8032 data: 0.5777 max mem: 55573 Epoch: [5] [ 20/156] eta: 0:02:46 lr: 0.002052 min_lr: 0.002052 loss: 5.6515 (5.6107) weight_decay: 0.0500 (0.0500) time: 0.5968 data: 0.0005 max mem: 55573 Epoch: [5] [ 30/156] eta: 0:02:08 lr: 0.002078 min_lr: 0.002078 loss: 5.2684 (5.4593) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0004 max mem: 55573 Epoch: [5] [ 40/156] eta: 0:01:46 lr: 0.002103 min_lr: 0.002103 loss: 5.2684 (5.5243) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [5] [ 50/156] eta: 0:01:30 lr: 0.002129 min_lr: 0.002129 loss: 5.7081 (5.4807) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [5] [ 60/156] eta: 0:01:17 lr: 0.002155 min_lr: 0.002155 loss: 5.1724 (5.4796) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [5] [ 70/156] eta: 0:01:07 lr: 0.002180 min_lr: 0.002180 loss: 5.6109 (5.4798) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [5] [ 80/156] eta: 0:00:57 lr: 0.002206 min_lr: 0.002206 loss: 5.6766 (5.4824) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [5] [ 90/156] eta: 0:00:48 lr: 0.002231 min_lr: 0.002231 loss: 5.5143 (5.4879) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [5] [100/156] eta: 0:00:40 lr: 0.002257 min_lr: 0.002257 loss: 5.4080 (5.4700) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [5] [110/156] eta: 0:00:32 lr: 0.002283 min_lr: 0.002283 loss: 5.3709 (5.4618) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0005 max mem: 55573 Epoch: [5] [120/156] eta: 0:00:25 lr: 0.002308 min_lr: 0.002308 loss: 5.6112 (5.4626) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [5] [130/156] eta: 0:00:18 lr: 0.002334 min_lr: 0.002334 loss: 5.6644 (5.4629) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [5] [140/156] eta: 0:00:10 lr: 0.002360 min_lr: 0.002360 loss: 5.7219 (5.4711) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0009 max mem: 55573 Epoch: [5] [150/156] eta: 0:00:04 lr: 0.002385 min_lr: 0.002385 loss: 5.6716 (5.4863) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [5] [155/156] eta: 0:00:00 lr: 0.002398 min_lr: 0.002398 loss: 5.5963 (5.4720) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [5] Total time: 0:01:46 (0.6818 s / it) Averaged stats: lr: 0.002398 min_lr: 0.002398 loss: 5.5963 (5.5087) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 2.8956 (2.8956) acc1: 37.2396 (37.2396) acc5: 64.3229 (64.3229) time: 6.8871 data: 6.6498 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 3.0522 (2.9907) acc1: 35.4167 (32.6400) acc5: 63.5417 (58.3040) time: 1.5228 data: 1.3300 max mem: 55573 Test: Total time: 0:00:07 (1.5381 s / it) * Acc@1 33.658 Acc@5 59.640 loss 3.020 Accuracy of the model on the 50000 test images: 33.7% Max accuracy: 33.66% Test: [0/5] eta: 0:00:34 loss: 6.9117 (6.9117) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.8973 data: 6.6612 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9117 (6.9123) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5248 data: 1.3324 max mem: 55573 Test: Total time: 0:00:07 (1.5427 s / it) * Acc@1 0.098 Acc@5 0.492 loss 6.910 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [6] [ 0/156] eta: 0:30:46 lr: 0.002401 min_lr: 0.002401 loss: 5.8381 (5.8381) weight_decay: 0.0500 (0.0500) time: 11.8366 data: 11.2461 max mem: 55573 Epoch: [6] [ 10/156] eta: 0:04:18 lr: 0.002426 min_lr: 0.002426 loss: 5.4912 (5.3132) weight_decay: 0.0500 (0.0500) time: 1.7704 data: 1.0227 max mem: 55573 Epoch: [6] [ 20/156] eta: 0:02:44 lr: 0.002452 min_lr: 0.002452 loss: 5.2879 (5.2546) weight_decay: 0.0500 (0.0500) time: 0.6771 data: 0.0004 max mem: 55573 Epoch: [6] [ 30/156] eta: 0:02:07 lr: 0.002478 min_lr: 0.002478 loss: 5.3311 (5.3310) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [6] [ 40/156] eta: 0:01:45 lr: 0.002503 min_lr: 0.002503 loss: 5.5733 (5.3727) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [6] [ 50/156] eta: 0:01:29 lr: 0.002529 min_lr: 0.002529 loss: 5.5733 (5.3655) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [6] [ 60/156] eta: 0:01:17 lr: 0.002555 min_lr: 0.002555 loss: 5.5075 (5.3485) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [6] [ 70/156] eta: 0:01:06 lr: 0.002580 min_lr: 0.002580 loss: 5.3876 (5.3456) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [6] [ 80/156] eta: 0:00:57 lr: 0.002606 min_lr: 0.002606 loss: 5.5036 (5.3662) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [6] [ 90/156] eta: 0:00:48 lr: 0.002632 min_lr: 0.002632 loss: 5.4555 (5.3300) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0005 max mem: 55573 Epoch: [6] [100/156] eta: 0:00:40 lr: 0.002657 min_lr: 0.002657 loss: 5.2874 (5.3323) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [6] [110/156] eta: 0:00:32 lr: 0.002683 min_lr: 0.002683 loss: 5.5990 (5.3370) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [6] [120/156] eta: 0:00:25 lr: 0.002709 min_lr: 0.002709 loss: 5.5856 (5.3435) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [6] [130/156] eta: 0:00:17 lr: 0.002734 min_lr: 0.002734 loss: 5.5202 (5.3409) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0010 max mem: 55573 Epoch: [6] [140/156] eta: 0:00:10 lr: 0.002760 min_lr: 0.002760 loss: 5.1448 (5.3225) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.0008 max mem: 55573 Epoch: [6] [150/156] eta: 0:00:04 lr: 0.002786 min_lr: 0.002786 loss: 5.0486 (5.3137) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0002 max mem: 55573 Epoch: [6] [155/156] eta: 0:00:00 lr: 0.002798 min_lr: 0.002798 loss: 5.1384 (5.3067) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0002 max mem: 55573 Epoch: [6] Total time: 0:01:45 (0.6795 s / it) Averaged stats: lr: 0.002798 min_lr: 0.002798 loss: 5.1384 (5.3091) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 2.6750 (2.6750) acc1: 43.6198 (43.6198) acc5: 70.4427 (70.4427) time: 6.8400 data: 6.6028 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.7894 (2.7218) acc1: 41.0156 (38.3040) acc5: 68.3594 (63.9680) time: 1.5146 data: 1.3206 max mem: 55573 Test: Total time: 0:00:07 (1.5429 s / it) * Acc@1 38.372 Acc@5 64.556 loss 2.750 Accuracy of the model on the 50000 test images: 38.4% Max accuracy: 38.37% Test: [0/5] eta: 0:00:34 loss: 6.9116 (6.9116) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9757 data: 6.7396 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9116 (6.9131) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5405 data: 1.3480 max mem: 55573 Test: Total time: 0:00:07 (1.5556 s / it) * Acc@1 0.098 Acc@5 0.494 loss 6.911 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [7] [ 0/156] eta: 0:37:13 lr: 0.002801 min_lr: 0.002801 loss: 5.5152 (5.5152) weight_decay: 0.0500 (0.0500) time: 14.3186 data: 10.4473 max mem: 55573 Epoch: [7] [ 10/156] eta: 0:04:29 lr: 0.002827 min_lr: 0.002827 loss: 5.3660 (5.1670) weight_decay: 0.0500 (0.0500) time: 1.8480 data: 0.9501 max mem: 55573 Epoch: [7] [ 20/156] eta: 0:02:49 lr: 0.002852 min_lr: 0.002852 loss: 5.2283 (5.1960) weight_decay: 0.0500 (0.0500) time: 0.5956 data: 0.0004 max mem: 55573 Epoch: [7] [ 30/156] eta: 0:02:10 lr: 0.002878 min_lr: 0.002878 loss: 5.4239 (5.2517) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [7] [ 40/156] eta: 0:01:47 lr: 0.002903 min_lr: 0.002903 loss: 5.3792 (5.2436) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [7] [ 50/156] eta: 0:01:31 lr: 0.002929 min_lr: 0.002929 loss: 5.3339 (5.2421) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [7] [ 60/156] eta: 0:01:18 lr: 0.002955 min_lr: 0.002955 loss: 5.2355 (5.2310) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [7] [ 70/156] eta: 0:01:07 lr: 0.002980 min_lr: 0.002980 loss: 5.0080 (5.1935) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [7] [ 80/156] eta: 0:00:57 lr: 0.003006 min_lr: 0.003006 loss: 5.1562 (5.1975) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [7] [ 90/156] eta: 0:00:49 lr: 0.003032 min_lr: 0.003032 loss: 5.5073 (5.2387) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [7] [100/156] eta: 0:00:40 lr: 0.003057 min_lr: 0.003057 loss: 5.5048 (5.2380) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [7] [110/156] eta: 0:00:32 lr: 0.003083 min_lr: 0.003083 loss: 5.2972 (5.2413) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [7] [120/156] eta: 0:00:25 lr: 0.003109 min_lr: 0.003109 loss: 5.2620 (5.2350) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [7] [130/156] eta: 0:00:18 lr: 0.003134 min_lr: 0.003134 loss: 5.0427 (5.2177) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0011 max mem: 55573 Epoch: [7] [140/156] eta: 0:00:11 lr: 0.003160 min_lr: 0.003160 loss: 5.1017 (5.2059) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0009 max mem: 55573 Epoch: [7] [150/156] eta: 0:00:04 lr: 0.003186 min_lr: 0.003186 loss: 5.2094 (5.2031) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [7] [155/156] eta: 0:00:00 lr: 0.003198 min_lr: 0.003198 loss: 5.2094 (5.1929) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [7] Total time: 0:01:46 (0.6850 s / it) Averaged stats: lr: 0.003198 min_lr: 0.003198 loss: 5.2094 (5.1540) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 2.5308 (2.5308) acc1: 46.6146 (46.6146) acc5: 72.1354 (72.1354) time: 7.0156 data: 6.7783 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.5308 (2.5762) acc1: 46.6146 (43.1360) acc5: 72.1354 (67.5520) time: 1.5495 data: 1.3557 max mem: 55573 Test: Total time: 0:00:07 (1.5873 s / it) * Acc@1 43.032 Acc@5 69.178 loss 2.571 Accuracy of the model on the 50000 test images: 43.0% Max accuracy: 43.03% Test: [0/5] eta: 0:00:33 loss: 6.9113 (6.9113) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.7438 data: 6.5079 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9113 (6.9141) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.4941 data: 1.3017 max mem: 55573 Test: Total time: 0:00:07 (1.5116 s / it) * Acc@1 0.096 Acc@5 0.492 loss 6.911 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [8] [ 0/156] eta: 0:36:31 lr: 0.003201 min_lr: 0.003201 loss: 3.9776 (3.9776) weight_decay: 0.0500 (0.0500) time: 14.0467 data: 10.3489 max mem: 55573 Epoch: [8] [ 10/156] eta: 0:04:28 lr: 0.003227 min_lr: 0.003227 loss: 5.2855 (4.9991) weight_decay: 0.0500 (0.0500) time: 1.8366 data: 0.9411 max mem: 55573 Epoch: [8] [ 20/156] eta: 0:02:49 lr: 0.003252 min_lr: 0.003252 loss: 5.2855 (5.1360) weight_decay: 0.0500 (0.0500) time: 0.6030 data: 0.0004 max mem: 55573 Epoch: [8] [ 30/156] eta: 0:02:10 lr: 0.003278 min_lr: 0.003278 loss: 5.3199 (5.1471) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [8] [ 40/156] eta: 0:01:47 lr: 0.003304 min_lr: 0.003304 loss: 5.3709 (5.1700) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [8] [ 50/156] eta: 0:01:31 lr: 0.003329 min_lr: 0.003329 loss: 5.2418 (5.1474) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [8] [ 60/156] eta: 0:01:18 lr: 0.003355 min_lr: 0.003355 loss: 5.0524 (5.1170) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [8] [ 70/156] eta: 0:01:07 lr: 0.003381 min_lr: 0.003381 loss: 4.7696 (5.0684) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [8] [ 80/156] eta: 0:00:57 lr: 0.003406 min_lr: 0.003406 loss: 4.7772 (5.0540) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [8] [ 90/156] eta: 0:00:48 lr: 0.003432 min_lr: 0.003432 loss: 5.1843 (5.0541) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0004 max mem: 55573 Epoch: [8] [100/156] eta: 0:00:40 lr: 0.003458 min_lr: 0.003458 loss: 5.1790 (5.0553) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [8] [110/156] eta: 0:00:32 lr: 0.003483 min_lr: 0.003483 loss: 5.1538 (5.0691) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [8] [120/156] eta: 0:00:25 lr: 0.003509 min_lr: 0.003509 loss: 5.2927 (5.0823) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [8] [130/156] eta: 0:00:18 lr: 0.003534 min_lr: 0.003534 loss: 5.2927 (5.0901) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0011 max mem: 55573 Epoch: [8] [140/156] eta: 0:00:10 lr: 0.003560 min_lr: 0.003560 loss: 5.3431 (5.0917) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0009 max mem: 55573 Epoch: [8] [150/156] eta: 0:00:04 lr: 0.003586 min_lr: 0.003586 loss: 5.1436 (5.0813) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [8] [155/156] eta: 0:00:00 lr: 0.003599 min_lr: 0.003599 loss: 5.2154 (5.0908) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [8] Total time: 0:01:46 (0.6841 s / it) Averaged stats: lr: 0.003599 min_lr: 0.003599 loss: 5.2154 (5.0060) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 2.3922 (2.3922) acc1: 50.0000 (50.0000) acc5: 76.4323 (76.4323) time: 7.0623 data: 6.8249 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.4245 (2.4973) acc1: 48.9583 (45.3120) acc5: 76.3021 (70.9120) time: 1.5588 data: 1.3651 max mem: 55573 Test: Total time: 0:00:08 (1.6021 s / it) * Acc@1 45.262 Acc@5 71.348 loss 2.529 Accuracy of the model on the 50000 test images: 45.3% Max accuracy: 45.26% Test: [0/5] eta: 0:00:33 loss: 6.9110 (6.9110) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.7333 data: 6.4974 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9110 (6.9151) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.4920 data: 1.2996 max mem: 55573 Test: Total time: 0:00:07 (1.5077 s / it) * Acc@1 0.096 Acc@5 0.496 loss 6.912 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [9] [ 0/156] eta: 0:39:09 lr: 0.003601 min_lr: 0.003601 loss: 5.3790 (5.3790) weight_decay: 0.0500 (0.0500) time: 15.0603 data: 8.8702 max mem: 55573 Epoch: [9] [ 10/156] eta: 0:04:41 lr: 0.003627 min_lr: 0.003627 loss: 5.2249 (5.1180) weight_decay: 0.0500 (0.0500) time: 1.9259 data: 0.8068 max mem: 55573 Epoch: [9] [ 20/156] eta: 0:02:55 lr: 0.003652 min_lr: 0.003652 loss: 5.1351 (5.0197) weight_decay: 0.0500 (0.0500) time: 0.6031 data: 0.0004 max mem: 55573 Epoch: [9] [ 30/156] eta: 0:02:14 lr: 0.003678 min_lr: 0.003678 loss: 5.1196 (4.9613) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [9] [ 40/156] eta: 0:01:50 lr: 0.003704 min_lr: 0.003704 loss: 4.9508 (4.9587) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [9] [ 50/156] eta: 0:01:33 lr: 0.003729 min_lr: 0.003729 loss: 4.8207 (4.9487) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [9] [ 60/156] eta: 0:01:19 lr: 0.003755 min_lr: 0.003755 loss: 5.1701 (4.9918) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [9] [ 70/156] eta: 0:01:08 lr: 0.003781 min_lr: 0.003781 loss: 5.2281 (4.9582) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [9] [ 80/156] eta: 0:00:58 lr: 0.003806 min_lr: 0.003806 loss: 4.4322 (4.8905) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [9] [ 90/156] eta: 0:00:49 lr: 0.003832 min_lr: 0.003832 loss: 4.3604 (4.8560) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [9] [100/156] eta: 0:00:41 lr: 0.003858 min_lr: 0.003858 loss: 4.8846 (4.8675) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [9] [110/156] eta: 0:00:33 lr: 0.003883 min_lr: 0.003883 loss: 4.9952 (4.8802) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [9] [120/156] eta: 0:00:25 lr: 0.003909 min_lr: 0.003909 loss: 5.0464 (4.8972) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [9] [130/156] eta: 0:00:18 lr: 0.003935 min_lr: 0.003935 loss: 4.9664 (4.8771) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0010 max mem: 55573 Epoch: [9] [140/156] eta: 0:00:11 lr: 0.003960 min_lr: 0.003960 loss: 4.7380 (4.8683) weight_decay: 0.0500 (0.0500) time: 0.5871 data: 0.0009 max mem: 55573 Epoch: [9] [150/156] eta: 0:00:04 lr: 0.003986 min_lr: 0.003986 loss: 4.8673 (4.8644) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [9] [155/156] eta: 0:00:00 lr: 0.003999 min_lr: 0.003999 loss: 4.7244 (4.8625) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [9] Total time: 0:01:47 (0.6909 s / it) Averaged stats: lr: 0.003999 min_lr: 0.003999 loss: 4.7244 (4.8817) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 2.1465 (2.1465) acc1: 54.0365 (54.0365) acc5: 79.6875 (79.6875) time: 6.7304 data: 6.4934 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.1465 (2.2317) acc1: 53.3854 (49.5680) acc5: 79.6875 (75.0400) time: 1.4926 data: 1.2988 max mem: 55573 Test: Total time: 0:00:07 (1.5313 s / it) * Acc@1 50.574 Acc@5 75.910 loss 2.212 Accuracy of the model on the 50000 test images: 50.6% Max accuracy: 50.57% Test: [0/5] eta: 0:00:34 loss: 6.9105 (6.9105) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.8871 data: 6.6510 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9105 (6.9165) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5228 data: 1.3303 max mem: 55573 Test: Total time: 0:00:07 (1.5417 s / it) * Acc@1 0.096 Acc@5 0.496 loss 6.913 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [10] [ 0/156] eta: 0:40:51 lr: 0.004001 min_lr: 0.004001 loss: 3.7319 (3.7319) weight_decay: 0.0500 (0.0500) time: 15.7133 data: 7.3530 max mem: 55573 Epoch: [10] [ 10/156] eta: 0:04:46 lr: 0.004027 min_lr: 0.004027 loss: 4.5041 (4.4067) weight_decay: 0.0500 (0.0500) time: 1.9618 data: 0.6688 max mem: 55573 Epoch: [10] [ 20/156] eta: 0:02:58 lr: 0.004053 min_lr: 0.004053 loss: 4.6282 (4.5596) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [10] [ 30/156] eta: 0:02:15 lr: 0.004078 min_lr: 0.004078 loss: 4.9352 (4.7115) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [10] [ 40/156] eta: 0:01:51 lr: 0.004104 min_lr: 0.004104 loss: 5.0999 (4.8077) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [10] [ 50/156] eta: 0:01:33 lr: 0.004130 min_lr: 0.004130 loss: 5.0864 (4.8378) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [10] [ 60/156] eta: 0:01:20 lr: 0.004155 min_lr: 0.004155 loss: 4.8141 (4.8192) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [10] [ 70/156] eta: 0:01:09 lr: 0.004181 min_lr: 0.004181 loss: 4.9645 (4.8279) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [10] [ 80/156] eta: 0:00:59 lr: 0.004206 min_lr: 0.004206 loss: 4.9801 (4.8249) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [10] [ 90/156] eta: 0:00:49 lr: 0.004232 min_lr: 0.004232 loss: 4.5474 (4.7633) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [10] [100/156] eta: 0:00:41 lr: 0.004258 min_lr: 0.004258 loss: 4.1438 (4.7214) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [10] [110/156] eta: 0:00:33 lr: 0.004283 min_lr: 0.004283 loss: 4.5830 (4.7268) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [10] [120/156] eta: 0:00:25 lr: 0.004309 min_lr: 0.004309 loss: 4.9860 (4.7291) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [10] [130/156] eta: 0:00:18 lr: 0.004335 min_lr: 0.004335 loss: 4.9665 (4.7275) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0011 max mem: 55573 Epoch: [10] [140/156] eta: 0:00:11 lr: 0.004360 min_lr: 0.004360 loss: 5.0809 (4.7555) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0008 max mem: 55573 Epoch: [10] [150/156] eta: 0:00:04 lr: 0.004386 min_lr: 0.004386 loss: 5.0596 (4.7486) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [10] [155/156] eta: 0:00:00 lr: 0.004399 min_lr: 0.004399 loss: 4.8895 (4.7478) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [10] Total time: 0:01:48 (0.6925 s / it) Averaged stats: lr: 0.004399 min_lr: 0.004399 loss: 4.8895 (4.7655) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.9734 (1.9734) acc1: 59.6354 (59.6354) acc5: 82.4219 (82.4219) time: 6.8791 data: 6.6420 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.0561 (2.0826) acc1: 56.3802 (53.5040) acc5: 82.4219 (78.2720) time: 1.5223 data: 1.3285 max mem: 55573 Test: Total time: 0:00:07 (1.5614 s / it) * Acc@1 53.688 Acc@5 78.540 loss 2.102 Accuracy of the model on the 50000 test images: 53.7% Max accuracy: 53.69% Test: [0/5] eta: 0:00:34 loss: 6.9102 (6.9102) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9843 data: 6.7478 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9102 (6.9179) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5424 data: 1.3497 max mem: 55573 Test: Total time: 0:00:07 (1.5612 s / it) * Acc@1 0.098 Acc@5 0.494 loss 6.914 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [11] [ 0/156] eta: 0:40:41 lr: 0.004401 min_lr: 0.004401 loss: 4.9920 (4.9920) weight_decay: 0.0500 (0.0500) time: 15.6527 data: 9.1926 max mem: 55573 Epoch: [11] [ 10/156] eta: 0:04:45 lr: 0.004427 min_lr: 0.004427 loss: 4.8404 (4.5700) weight_decay: 0.0500 (0.0500) time: 1.9581 data: 0.8360 max mem: 55573 Epoch: [11] [ 20/156] eta: 0:02:57 lr: 0.004453 min_lr: 0.004453 loss: 4.8404 (4.6481) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [11] [ 30/156] eta: 0:02:16 lr: 0.004478 min_lr: 0.004478 loss: 4.9524 (4.6359) weight_decay: 0.0500 (0.0500) time: 0.5977 data: 0.0005 max mem: 55573 Epoch: [11] [ 40/156] eta: 0:01:51 lr: 0.004504 min_lr: 0.004504 loss: 4.4032 (4.5749) weight_decay: 0.0500 (0.0500) time: 0.5981 data: 0.0005 max mem: 55573 Epoch: [11] [ 50/156] eta: 0:01:34 lr: 0.004530 min_lr: 0.004530 loss: 4.7041 (4.6350) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [11] [ 60/156] eta: 0:01:20 lr: 0.004555 min_lr: 0.004555 loss: 4.9571 (4.6724) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [11] [ 70/156] eta: 0:01:09 lr: 0.004581 min_lr: 0.004581 loss: 4.7652 (4.6406) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [11] [ 80/156] eta: 0:00:59 lr: 0.004607 min_lr: 0.004607 loss: 4.8883 (4.6929) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [11] [ 90/156] eta: 0:00:49 lr: 0.004632 min_lr: 0.004632 loss: 4.9231 (4.6835) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [11] [100/156] eta: 0:00:41 lr: 0.004658 min_lr: 0.004658 loss: 4.6043 (4.6779) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [11] [110/156] eta: 0:00:33 lr: 0.004684 min_lr: 0.004684 loss: 4.7918 (4.6857) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [11] [120/156] eta: 0:00:25 lr: 0.004709 min_lr: 0.004709 loss: 4.8434 (4.6950) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [11] [130/156] eta: 0:00:18 lr: 0.004735 min_lr: 0.004735 loss: 4.7659 (4.6716) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0011 max mem: 55573 Epoch: [11] [140/156] eta: 0:00:11 lr: 0.004761 min_lr: 0.004761 loss: 4.5506 (4.6578) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0010 max mem: 55573 Epoch: [11] [150/156] eta: 0:00:04 lr: 0.004786 min_lr: 0.004786 loss: 4.5711 (4.6456) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [11] [155/156] eta: 0:00:00 lr: 0.004799 min_lr: 0.004799 loss: 4.5506 (4.6395) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [11] Total time: 0:01:48 (0.6934 s / it) Averaged stats: lr: 0.004799 min_lr: 0.004799 loss: 4.5506 (4.6550) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.8946 (1.8946) acc1: 59.7656 (59.7656) acc5: 85.9375 (85.9375) time: 7.1672 data: 6.9298 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.9879 (2.0284) acc1: 57.5521 (53.4720) acc5: 82.6823 (79.3280) time: 1.5799 data: 1.3860 max mem: 55573 Test: Total time: 0:00:08 (1.6224 s / it) * Acc@1 54.814 Acc@5 79.254 loss 2.030 Accuracy of the model on the 50000 test images: 54.8% Max accuracy: 54.81% Test: [0/5] eta: 0:00:34 loss: 6.9098 (6.9098) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9525 data: 6.7166 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9098 (6.9193) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5358 data: 1.3434 max mem: 55573 Test: Total time: 0:00:07 (1.5570 s / it) * Acc@1 0.100 Acc@5 0.496 loss 6.915 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.10% Epoch: [12] [ 0/156] eta: 0:32:24 lr: 0.004802 min_lr: 0.004802 loss: 5.4818 (5.4818) weight_decay: 0.0500 (0.0500) time: 12.4618 data: 7.7381 max mem: 55573 Epoch: [12] [ 10/156] eta: 0:04:04 lr: 0.004827 min_lr: 0.004827 loss: 5.0961 (4.8765) weight_decay: 0.0500 (0.0500) time: 1.6717 data: 0.7038 max mem: 55573 Epoch: [12] [ 20/156] eta: 0:02:37 lr: 0.004853 min_lr: 0.004853 loss: 4.8127 (4.7189) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [12] [ 30/156] eta: 0:02:02 lr: 0.004878 min_lr: 0.004878 loss: 4.6095 (4.6540) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [12] [ 40/156] eta: 0:01:42 lr: 0.004904 min_lr: 0.004904 loss: 4.6095 (4.6423) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [12] [ 50/156] eta: 0:01:27 lr: 0.004930 min_lr: 0.004930 loss: 4.3094 (4.5428) weight_decay: 0.0500 (0.0500) time: 0.5932 data: 0.0004 max mem: 55573 Epoch: [12] [ 60/156] eta: 0:01:15 lr: 0.004955 min_lr: 0.004955 loss: 4.2687 (4.5175) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [12] [ 70/156] eta: 0:01:05 lr: 0.004981 min_lr: 0.004981 loss: 4.5342 (4.5177) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [12] [ 80/156] eta: 0:00:56 lr: 0.005007 min_lr: 0.005007 loss: 4.5342 (4.4892) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [12] [ 90/156] eta: 0:00:47 lr: 0.005032 min_lr: 0.005032 loss: 4.5729 (4.4981) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [12] [100/156] eta: 0:00:39 lr: 0.005058 min_lr: 0.005058 loss: 4.6141 (4.4839) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [12] [110/156] eta: 0:00:32 lr: 0.005084 min_lr: 0.005084 loss: 4.4440 (4.4840) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [12] [120/156] eta: 0:00:24 lr: 0.005109 min_lr: 0.005109 loss: 4.5360 (4.4840) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [12] [130/156] eta: 0:00:17 lr: 0.005135 min_lr: 0.005135 loss: 4.6980 (4.4880) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0010 max mem: 55573 Epoch: [12] [140/156] eta: 0:00:10 lr: 0.005161 min_lr: 0.005161 loss: 4.7625 (4.5110) weight_decay: 0.0500 (0.0500) time: 0.5874 data: 0.0009 max mem: 55573 Epoch: [12] [150/156] eta: 0:00:04 lr: 0.005186 min_lr: 0.005186 loss: 4.8243 (4.5148) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [12] [155/156] eta: 0:00:00 lr: 0.005199 min_lr: 0.005199 loss: 4.5914 (4.5176) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [12] Total time: 0:01:44 (0.6728 s / it) Averaged stats: lr: 0.005199 min_lr: 0.005199 loss: 4.5914 (4.5784) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.8941 (1.8941) acc1: 61.5885 (61.5885) acc5: 86.1979 (86.1979) time: 6.9301 data: 6.6929 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.0881 (2.1031) acc1: 57.4219 (55.7760) acc5: 82.5521 (79.7440) time: 1.5326 data: 1.3387 max mem: 55573 Test: Total time: 0:00:07 (1.5749 s / it) * Acc@1 55.166 Acc@5 79.774 loss 2.074 Accuracy of the model on the 50000 test images: 55.2% Max accuracy: 55.17% Test: [0/5] eta: 0:00:32 loss: 6.9092 (6.9092) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.5912 data: 6.3552 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9092 (6.9207) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.4636 data: 1.2712 max mem: 55573 Test: Total time: 0:00:07 (1.4794 s / it) * Acc@1 0.100 Acc@5 0.498 loss 6.915 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [13] [ 0/156] eta: 0:33:41 lr: 0.005202 min_lr: 0.005202 loss: 4.8295 (4.8295) weight_decay: 0.0500 (0.0500) time: 12.9583 data: 11.1965 max mem: 55573 Epoch: [13] [ 10/156] eta: 0:04:10 lr: 0.005227 min_lr: 0.005227 loss: 4.9281 (4.7366) weight_decay: 0.0500 (0.0500) time: 1.7173 data: 1.0182 max mem: 55573 Epoch: [13] [ 20/156] eta: 0:02:40 lr: 0.005253 min_lr: 0.005253 loss: 4.6482 (4.5791) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [13] [ 30/156] eta: 0:02:04 lr: 0.005279 min_lr: 0.005279 loss: 4.4575 (4.5306) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [13] [ 40/156] eta: 0:01:43 lr: 0.005304 min_lr: 0.005304 loss: 4.3990 (4.5171) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [13] [ 50/156] eta: 0:01:28 lr: 0.005330 min_lr: 0.005330 loss: 4.5699 (4.5514) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [13] [ 60/156] eta: 0:01:16 lr: 0.005356 min_lr: 0.005356 loss: 4.7034 (4.5576) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [13] [ 70/156] eta: 0:01:05 lr: 0.005381 min_lr: 0.005381 loss: 4.7335 (4.5455) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [13] [ 80/156] eta: 0:00:56 lr: 0.005407 min_lr: 0.005407 loss: 4.5970 (4.5395) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [13] [ 90/156] eta: 0:00:48 lr: 0.005433 min_lr: 0.005433 loss: 4.5970 (4.5359) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [13] [100/156] eta: 0:00:39 lr: 0.005458 min_lr: 0.005458 loss: 4.4146 (4.5277) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [13] [110/156] eta: 0:00:32 lr: 0.005484 min_lr: 0.005484 loss: 4.7969 (4.5557) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [13] [120/156] eta: 0:00:24 lr: 0.005509 min_lr: 0.005509 loss: 4.8554 (4.5487) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [13] [130/156] eta: 0:00:17 lr: 0.005535 min_lr: 0.005535 loss: 4.7270 (4.5568) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0010 max mem: 55573 Epoch: [13] [140/156] eta: 0:00:10 lr: 0.005561 min_lr: 0.005561 loss: 4.8001 (4.5562) weight_decay: 0.0500 (0.0500) time: 0.5881 data: 0.0008 max mem: 55573 Epoch: [13] [150/156] eta: 0:00:04 lr: 0.005586 min_lr: 0.005586 loss: 4.7131 (4.5506) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [13] [155/156] eta: 0:00:00 lr: 0.005599 min_lr: 0.005599 loss: 4.6612 (4.5415) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [13] Total time: 0:01:45 (0.6760 s / it) Averaged stats: lr: 0.005599 min_lr: 0.005599 loss: 4.6612 (4.5049) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.9438 (1.9438) acc1: 58.9844 (58.9844) acc5: 84.7656 (84.7656) time: 7.2876 data: 7.0502 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.0056 (2.0698) acc1: 55.5990 (53.4080) acc5: 81.6406 (78.6240) time: 1.6041 data: 1.4101 max mem: 55573 Test: Total time: 0:00:08 (1.6429 s / it) * Acc@1 54.470 Acc@5 79.108 loss 2.058 Accuracy of the model on the 50000 test images: 54.5% Max accuracy: 55.17% Test: [0/5] eta: 0:00:34 loss: 6.9084 (6.9084) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9898 data: 6.7538 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9084 (6.9223) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5433 data: 1.3508 max mem: 55573 Test: Total time: 0:00:07 (1.5593 s / it) * Acc@1 0.100 Acc@5 0.502 loss 6.916 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [14] [ 0/156] eta: 0:33:51 lr: 0.005602 min_lr: 0.005602 loss: 4.9298 (4.9298) weight_decay: 0.0500 (0.0500) time: 13.0222 data: 10.1353 max mem: 55573 Epoch: [14] [ 10/156] eta: 0:04:17 lr: 0.005627 min_lr: 0.005627 loss: 4.9298 (4.7823) weight_decay: 0.0500 (0.0500) time: 1.7662 data: 0.9218 max mem: 55573 Epoch: [14] [ 20/156] eta: 0:02:44 lr: 0.005653 min_lr: 0.005653 loss: 4.5458 (4.4764) weight_decay: 0.0500 (0.0500) time: 0.6157 data: 0.0004 max mem: 55573 Epoch: [14] [ 30/156] eta: 0:02:06 lr: 0.005679 min_lr: 0.005679 loss: 4.2501 (4.4756) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [14] [ 40/156] eta: 0:01:45 lr: 0.005704 min_lr: 0.005704 loss: 4.2094 (4.3705) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [14] [ 50/156] eta: 0:01:29 lr: 0.005730 min_lr: 0.005730 loss: 4.2094 (4.3555) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [14] [ 60/156] eta: 0:01:17 lr: 0.005756 min_lr: 0.005756 loss: 4.2787 (4.3528) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [14] [ 70/156] eta: 0:01:06 lr: 0.005781 min_lr: 0.005781 loss: 4.2403 (4.3534) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [14] [ 80/156] eta: 0:00:57 lr: 0.005807 min_lr: 0.005807 loss: 4.5974 (4.3884) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [14] [ 90/156] eta: 0:00:48 lr: 0.005833 min_lr: 0.005833 loss: 4.6873 (4.4109) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [14] [100/156] eta: 0:00:40 lr: 0.005858 min_lr: 0.005858 loss: 4.4425 (4.3954) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [14] [110/156] eta: 0:00:32 lr: 0.005884 min_lr: 0.005884 loss: 4.3789 (4.3957) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [14] [120/156] eta: 0:00:25 lr: 0.005910 min_lr: 0.005910 loss: 4.4347 (4.3918) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [14] [130/156] eta: 0:00:17 lr: 0.005935 min_lr: 0.005935 loss: 4.5331 (4.3972) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0011 max mem: 55573 Epoch: [14] [140/156] eta: 0:00:10 lr: 0.005961 min_lr: 0.005961 loss: 4.6499 (4.4161) weight_decay: 0.0500 (0.0500) time: 0.5872 data: 0.0009 max mem: 55573 Epoch: [14] [150/156] eta: 0:00:04 lr: 0.005987 min_lr: 0.005987 loss: 4.6891 (4.4279) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [14] [155/156] eta: 0:00:00 lr: 0.005999 min_lr: 0.005999 loss: 4.6891 (4.4232) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [14] Total time: 0:01:45 (0.6788 s / it) Averaged stats: lr: 0.005999 min_lr: 0.005999 loss: 4.6891 (4.4297) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.7202 (1.7202) acc1: 63.2812 (63.2812) acc5: 86.0677 (86.0677) time: 7.1056 data: 6.8685 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.8811 (1.8745) acc1: 61.5885 (57.5680) acc5: 84.2448 (81.4400) time: 1.5675 data: 1.3738 max mem: 55573 Test: Total time: 0:00:08 (1.6045 s / it) * Acc@1 56.956 Acc@5 81.802 loss 1.895 Accuracy of the model on the 50000 test images: 57.0% Max accuracy: 56.96% Test: [0/5] eta: 0:00:34 loss: 6.9075 (6.9075) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.9895 data: 6.7530 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9075 (6.9239) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5436 data: 1.3507 max mem: 55573 Test: Total time: 0:00:07 (1.5649 s / it) * Acc@1 0.100 Acc@5 0.500 loss 6.917 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [15] [ 0/156] eta: 0:36:01 lr: 0.006002 min_lr: 0.006002 loss: 4.4531 (4.4531) weight_decay: 0.0500 (0.0500) time: 13.8545 data: 8.8830 max mem: 55573 Epoch: [15] [ 10/156] eta: 0:04:26 lr: 0.006028 min_lr: 0.006028 loss: 4.3697 (4.4018) weight_decay: 0.0500 (0.0500) time: 1.8235 data: 0.8078 max mem: 55573 Epoch: [15] [ 20/156] eta: 0:02:48 lr: 0.006053 min_lr: 0.006053 loss: 4.2625 (4.3161) weight_decay: 0.0500 (0.0500) time: 0.6074 data: 0.0004 max mem: 55573 Epoch: [15] [ 30/156] eta: 0:02:09 lr: 0.006079 min_lr: 0.006079 loss: 4.2625 (4.3178) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [15] [ 40/156] eta: 0:01:46 lr: 0.006105 min_lr: 0.006105 loss: 4.4810 (4.3690) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [15] [ 50/156] eta: 0:01:30 lr: 0.006130 min_lr: 0.006130 loss: 4.4962 (4.3360) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [15] [ 60/156] eta: 0:01:18 lr: 0.006156 min_lr: 0.006156 loss: 4.1339 (4.3345) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [15] [ 70/156] eta: 0:01:07 lr: 0.006181 min_lr: 0.006181 loss: 4.6899 (4.3772) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [15] [ 80/156] eta: 0:00:57 lr: 0.006207 min_lr: 0.006207 loss: 4.4615 (4.3560) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [15] [ 90/156] eta: 0:00:48 lr: 0.006233 min_lr: 0.006233 loss: 4.1649 (4.3670) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [15] [100/156] eta: 0:00:40 lr: 0.006258 min_lr: 0.006258 loss: 4.5674 (4.3777) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [15] [110/156] eta: 0:00:32 lr: 0.006284 min_lr: 0.006284 loss: 4.5674 (4.3975) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [15] [120/156] eta: 0:00:25 lr: 0.006310 min_lr: 0.006310 loss: 4.7581 (4.4063) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [15] [130/156] eta: 0:00:18 lr: 0.006335 min_lr: 0.006335 loss: 4.7581 (4.3999) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0010 max mem: 55573 Epoch: [15] [140/156] eta: 0:00:10 lr: 0.006361 min_lr: 0.006361 loss: 4.1564 (4.3877) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [15] [150/156] eta: 0:00:04 lr: 0.006387 min_lr: 0.006387 loss: 4.3894 (4.4038) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [15] [155/156] eta: 0:00:00 lr: 0.006399 min_lr: 0.006399 loss: 4.3894 (4.3927) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [15] Total time: 0:01:46 (0.6825 s / it) Averaged stats: lr: 0.006399 min_lr: 0.006399 loss: 4.3894 (4.4087) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.6790 (1.6790) acc1: 63.2812 (63.2812) acc5: 86.3281 (86.3281) time: 7.3148 data: 7.0773 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.8268 (1.9031) acc1: 59.5052 (56.4160) acc5: 84.2448 (81.3440) time: 1.6095 data: 1.4155 max mem: 55573 Test: Total time: 0:00:08 (1.6484 s / it) * Acc@1 57.588 Acc@5 81.714 loss 1.864 Accuracy of the model on the 50000 test images: 57.6% Max accuracy: 57.59% Test: [0/5] eta: 0:00:34 loss: 6.9065 (6.9065) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.9628 data: 6.7262 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9065 (6.9255) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5382 data: 1.3454 max mem: 55573 Test: Total time: 0:00:07 (1.5564 s / it) * Acc@1 0.102 Acc@5 0.500 loss 6.918 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.10% Epoch: [16] [ 0/156] eta: 0:36:31 lr: 0.006402 min_lr: 0.006402 loss: 3.7989 (3.7989) weight_decay: 0.0500 (0.0500) time: 14.0510 data: 10.4540 max mem: 55573 Epoch: [16] [ 10/156] eta: 0:04:26 lr: 0.006428 min_lr: 0.006428 loss: 3.9863 (4.1661) weight_decay: 0.0500 (0.0500) time: 1.8251 data: 0.9508 max mem: 55573 Epoch: [16] [ 20/156] eta: 0:02:48 lr: 0.006453 min_lr: 0.006453 loss: 4.3949 (4.3415) weight_decay: 0.0500 (0.0500) time: 0.5971 data: 0.0004 max mem: 55573 Epoch: [16] [ 30/156] eta: 0:02:09 lr: 0.006479 min_lr: 0.006479 loss: 4.5623 (4.4157) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [16] [ 40/156] eta: 0:01:46 lr: 0.006505 min_lr: 0.006505 loss: 4.6544 (4.4226) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [16] [ 50/156] eta: 0:01:30 lr: 0.006530 min_lr: 0.006530 loss: 4.3384 (4.4027) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [16] [ 60/156] eta: 0:01:18 lr: 0.006556 min_lr: 0.006556 loss: 4.5961 (4.4418) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [16] [ 70/156] eta: 0:01:07 lr: 0.006582 min_lr: 0.006582 loss: 4.6327 (4.4690) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0006 max mem: 55573 Epoch: [16] [ 80/156] eta: 0:00:57 lr: 0.006607 min_lr: 0.006607 loss: 4.4882 (4.4607) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [16] [ 90/156] eta: 0:00:48 lr: 0.006633 min_lr: 0.006633 loss: 4.4786 (4.4594) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [16] [100/156] eta: 0:00:40 lr: 0.006659 min_lr: 0.006659 loss: 4.3934 (4.4387) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [16] [110/156] eta: 0:00:32 lr: 0.006684 min_lr: 0.006684 loss: 4.3934 (4.4399) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [16] [120/156] eta: 0:00:25 lr: 0.006710 min_lr: 0.006710 loss: 4.5651 (4.4230) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0004 max mem: 55573 Epoch: [16] [130/156] eta: 0:00:18 lr: 0.006735 min_lr: 0.006735 loss: 4.3591 (4.4200) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0010 max mem: 55573 Epoch: [16] [140/156] eta: 0:00:10 lr: 0.006761 min_lr: 0.006761 loss: 4.3591 (4.4197) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0008 max mem: 55573 Epoch: [16] [150/156] eta: 0:00:04 lr: 0.006787 min_lr: 0.006787 loss: 4.6138 (4.4370) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [16] [155/156] eta: 0:00:00 lr: 0.006800 min_lr: 0.006800 loss: 4.6472 (4.4190) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [16] Total time: 0:01:46 (0.6835 s / it) Averaged stats: lr: 0.006800 min_lr: 0.006800 loss: 4.6472 (4.3548) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.7706 (1.7706) acc1: 65.4948 (65.4948) acc5: 86.3281 (86.3281) time: 6.7398 data: 6.5027 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.9889 (1.9403) acc1: 57.5521 (56.6720) acc5: 82.9427 (81.0240) time: 1.4932 data: 1.3006 max mem: 55573 Test: Total time: 0:00:07 (1.5098 s / it) * Acc@1 56.358 Acc@5 80.818 loss 1.954 Accuracy of the model on the 50000 test images: 56.4% Max accuracy: 57.59% Test: [0/5] eta: 0:00:35 loss: 6.9056 (6.9056) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.1035 data: 6.8670 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9063 (6.9270) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5661 data: 1.3735 max mem: 55573 Test: Total time: 0:00:07 (1.5888 s / it) * Acc@1 0.102 Acc@5 0.502 loss 6.919 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [17] [ 0/156] eta: 0:37:40 lr: 0.006802 min_lr: 0.006802 loss: 3.3034 (3.3034) weight_decay: 0.0500 (0.0500) time: 14.4913 data: 11.5777 max mem: 55573 Epoch: [17] [ 10/156] eta: 0:04:30 lr: 0.006828 min_lr: 0.006828 loss: 4.6115 (4.3792) weight_decay: 0.0500 (0.0500) time: 1.8546 data: 1.0528 max mem: 55573 Epoch: [17] [ 20/156] eta: 0:02:50 lr: 0.006853 min_lr: 0.006853 loss: 4.5655 (4.3761) weight_decay: 0.0500 (0.0500) time: 0.5939 data: 0.0003 max mem: 55573 Epoch: [17] [ 30/156] eta: 0:02:11 lr: 0.006879 min_lr: 0.006879 loss: 4.6254 (4.4206) weight_decay: 0.0500 (0.0500) time: 0.5939 data: 0.0003 max mem: 55573 Epoch: [17] [ 40/156] eta: 0:01:48 lr: 0.006905 min_lr: 0.006905 loss: 4.6500 (4.4193) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [17] [ 50/156] eta: 0:01:31 lr: 0.006930 min_lr: 0.006930 loss: 4.4969 (4.4496) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [17] [ 60/156] eta: 0:01:18 lr: 0.006956 min_lr: 0.006956 loss: 4.4966 (4.4727) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [17] [ 70/156] eta: 0:01:07 lr: 0.006982 min_lr: 0.006982 loss: 4.5452 (4.4453) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [17] [ 80/156] eta: 0:00:58 lr: 0.007007 min_lr: 0.007007 loss: 4.6041 (4.4731) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [17] [ 90/156] eta: 0:00:49 lr: 0.007033 min_lr: 0.007033 loss: 4.2574 (4.4108) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.0005 max mem: 55573 Epoch: [17] [100/156] eta: 0:00:40 lr: 0.007059 min_lr: 0.007059 loss: 4.0771 (4.3906) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [17] [110/156] eta: 0:00:32 lr: 0.007084 min_lr: 0.007084 loss: 4.4569 (4.3821) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [17] [120/156] eta: 0:00:25 lr: 0.007110 min_lr: 0.007110 loss: 4.6497 (4.3957) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [17] [130/156] eta: 0:00:18 lr: 0.007136 min_lr: 0.007136 loss: 4.6227 (4.3841) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0010 max mem: 55573 Epoch: [17] [140/156] eta: 0:00:11 lr: 0.007161 min_lr: 0.007161 loss: 3.9257 (4.3530) weight_decay: 0.0500 (0.0500) time: 0.5856 data: 0.0009 max mem: 55573 Epoch: [17] [150/156] eta: 0:00:04 lr: 0.007187 min_lr: 0.007187 loss: 4.0566 (4.3473) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [17] [155/156] eta: 0:00:00 lr: 0.007200 min_lr: 0.007200 loss: 4.4000 (4.3475) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [17] Total time: 0:01:47 (0.6863 s / it) Averaged stats: lr: 0.007200 min_lr: 0.007200 loss: 4.4000 (4.3042) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.8214 (1.8214) acc1: 62.8906 (62.8906) acc5: 86.4583 (86.4583) time: 7.1516 data: 6.9132 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.9293 (2.0369) acc1: 59.3750 (57.2800) acc5: 84.5052 (80.9920) time: 1.5768 data: 1.3827 max mem: 55573 Test: Total time: 0:00:08 (1.6139 s / it) * Acc@1 57.308 Acc@5 81.436 loss 2.021 Accuracy of the model on the 50000 test images: 57.3% Max accuracy: 57.59% Test: [0/5] eta: 0:00:35 loss: 6.9046 (6.9046) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.0647 data: 6.8282 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9073 (6.9285) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5584 data: 1.3657 max mem: 55573 Test: Total time: 0:00:07 (1.5772 s / it) * Acc@1 0.102 Acc@5 0.502 loss 6.920 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [18] [ 0/156] eta: 0:35:08 lr: 0.007202 min_lr: 0.007202 loss: 4.3485 (4.3485) weight_decay: 0.0500 (0.0500) time: 13.5171 data: 10.2658 max mem: 55573 Epoch: [18] [ 10/156] eta: 0:04:18 lr: 0.007228 min_lr: 0.007228 loss: 4.6377 (4.3115) weight_decay: 0.0500 (0.0500) time: 1.7689 data: 0.9336 max mem: 55573 Epoch: [18] [ 20/156] eta: 0:02:44 lr: 0.007254 min_lr: 0.007254 loss: 4.5255 (4.2825) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [18] [ 30/156] eta: 0:02:07 lr: 0.007279 min_lr: 0.007279 loss: 4.2014 (4.2508) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [18] [ 40/156] eta: 0:01:45 lr: 0.007305 min_lr: 0.007305 loss: 4.1171 (4.2020) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [18] [ 50/156] eta: 0:01:29 lr: 0.007331 min_lr: 0.007331 loss: 3.7395 (4.1226) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [18] [ 60/156] eta: 0:01:17 lr: 0.007356 min_lr: 0.007356 loss: 3.8157 (4.1117) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [18] [ 70/156] eta: 0:01:06 lr: 0.007382 min_lr: 0.007382 loss: 4.0458 (4.1072) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [18] [ 80/156] eta: 0:00:57 lr: 0.007408 min_lr: 0.007408 loss: 4.0472 (4.0937) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [18] [ 90/156] eta: 0:00:48 lr: 0.007433 min_lr: 0.007433 loss: 4.4677 (4.1452) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [18] [100/156] eta: 0:00:40 lr: 0.007459 min_lr: 0.007459 loss: 4.4824 (4.1368) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [18] [110/156] eta: 0:00:32 lr: 0.007484 min_lr: 0.007484 loss: 4.3811 (4.1580) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [18] [120/156] eta: 0:00:25 lr: 0.007510 min_lr: 0.007510 loss: 4.5194 (4.1741) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [18] [130/156] eta: 0:00:17 lr: 0.007536 min_lr: 0.007536 loss: 4.5667 (4.2034) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [18] [140/156] eta: 0:00:10 lr: 0.007561 min_lr: 0.007561 loss: 4.3559 (4.1907) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0009 max mem: 55573 Epoch: [18] [150/156] eta: 0:00:04 lr: 0.007587 min_lr: 0.007587 loss: 4.0900 (4.1884) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [18] [155/156] eta: 0:00:00 lr: 0.007600 min_lr: 0.007600 loss: 4.0900 (4.1893) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [18] Total time: 0:01:46 (0.6797 s / it) Averaged stats: lr: 0.007600 min_lr: 0.007600 loss: 4.0900 (4.2735) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.5740 (1.5740) acc1: 63.0208 (63.0208) acc5: 88.2812 (88.2812) time: 7.0190 data: 6.7816 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.7820 (1.8589) acc1: 61.7188 (58.3680) acc5: 85.8073 (82.0480) time: 1.5504 data: 1.3564 max mem: 55573 Test: Total time: 0:00:07 (1.5894 s / it) * Acc@1 58.350 Acc@5 82.556 loss 1.874 Accuracy of the model on the 50000 test images: 58.4% Max accuracy: 58.35% Test: [0/5] eta: 0:00:32 loss: 6.9037 (6.9037) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.5158 data: 6.2791 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9084 (6.9300) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.4761 data: 1.2834 max mem: 55573 Test: Total time: 0:00:07 (1.4916 s / it) * Acc@1 0.104 Acc@5 0.502 loss 6.921 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.10% Epoch: [19] [ 0/156] eta: 0:32:58 lr: 0.007602 min_lr: 0.007602 loss: 3.7124 (3.7124) weight_decay: 0.0500 (0.0500) time: 12.6842 data: 11.5792 max mem: 55573 Epoch: [19] [ 10/156] eta: 0:04:12 lr: 0.007628 min_lr: 0.007628 loss: 4.5908 (4.3943) weight_decay: 0.0500 (0.0500) time: 1.7271 data: 1.0531 max mem: 55573 Epoch: [19] [ 20/156] eta: 0:02:41 lr: 0.007654 min_lr: 0.007654 loss: 4.3464 (4.1883) weight_decay: 0.0500 (0.0500) time: 0.6109 data: 0.0005 max mem: 55573 Epoch: [19] [ 30/156] eta: 0:02:05 lr: 0.007679 min_lr: 0.007679 loss: 4.3464 (4.2723) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [19] [ 40/156] eta: 0:01:43 lr: 0.007705 min_lr: 0.007705 loss: 4.4619 (4.2709) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [19] [ 50/156] eta: 0:01:28 lr: 0.007731 min_lr: 0.007731 loss: 4.2020 (4.2618) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0004 max mem: 55573 Epoch: [19] [ 60/156] eta: 0:01:16 lr: 0.007756 min_lr: 0.007756 loss: 4.2020 (4.2401) weight_decay: 0.0500 (0.0500) time: 0.5941 data: 0.0005 max mem: 55573 Epoch: [19] [ 70/156] eta: 0:01:06 lr: 0.007782 min_lr: 0.007782 loss: 4.3172 (4.2408) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [19] [ 80/156] eta: 0:00:56 lr: 0.007808 min_lr: 0.007808 loss: 4.3253 (4.2396) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [19] [ 90/156] eta: 0:00:48 lr: 0.007833 min_lr: 0.007833 loss: 4.4804 (4.2495) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [19] [100/156] eta: 0:00:40 lr: 0.007859 min_lr: 0.007859 loss: 4.2002 (4.2243) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [19] [110/156] eta: 0:00:32 lr: 0.007885 min_lr: 0.007885 loss: 4.2002 (4.2231) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [19] [120/156] eta: 0:00:25 lr: 0.007910 min_lr: 0.007910 loss: 4.2540 (4.2029) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [19] [130/156] eta: 0:00:17 lr: 0.007936 min_lr: 0.007936 loss: 4.3281 (4.2134) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0012 max mem: 55573 Epoch: [19] [140/156] eta: 0:00:10 lr: 0.007962 min_lr: 0.007962 loss: 4.3180 (4.1982) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0011 max mem: 55573 Epoch: [19] [150/156] eta: 0:00:04 lr: 0.007987 min_lr: 0.007987 loss: 4.1359 (4.2023) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [19] [155/156] eta: 0:00:00 lr: 0.008000 min_lr: 0.008000 loss: 4.1359 (4.2114) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [19] Total time: 0:01:45 (0.6769 s / it) Averaged stats: lr: 0.008000 min_lr: 0.008000 loss: 4.1359 (4.2360) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.8779 (1.8779) acc1: 59.8958 (59.8958) acc5: 85.0260 (85.0260) time: 7.1843 data: 6.9468 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.9301 (1.9863) acc1: 59.5052 (55.9360) acc5: 85.0260 (80.6080) time: 1.5834 data: 1.3894 max mem: 55573 Test: Total time: 0:00:08 (1.6262 s / it) * Acc@1 56.212 Acc@5 80.678 loss 1.996 Accuracy of the model on the 50000 test images: 56.2% Max accuracy: 58.35% Test: [0/5] eta: 0:00:36 loss: 6.9032 (6.9032) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.3056 data: 7.0692 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9095 (6.9314) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.6067 data: 1.4139 max mem: 55573 Test: Total time: 0:00:08 (1.6292 s / it) * Acc@1 0.104 Acc@5 0.506 loss 6.922 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [20] [ 0/156] eta: 0:35:56 lr: 0.008000 min_lr: 0.008000 loss: 4.2855 (4.2855) weight_decay: 0.0500 (0.0500) time: 13.8216 data: 8.9870 max mem: 55573 Epoch: [20] [ 10/156] eta: 0:04:24 lr: 0.008000 min_lr: 0.008000 loss: 4.2176 (4.2147) weight_decay: 0.0500 (0.0500) time: 1.8127 data: 0.8174 max mem: 55573 Epoch: [20] [ 20/156] eta: 0:02:47 lr: 0.008000 min_lr: 0.008000 loss: 4.0041 (4.0595) weight_decay: 0.0500 (0.0500) time: 0.6004 data: 0.0004 max mem: 55573 Epoch: [20] [ 30/156] eta: 0:02:08 lr: 0.008000 min_lr: 0.008000 loss: 3.9495 (4.1045) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [20] [ 40/156] eta: 0:01:46 lr: 0.008000 min_lr: 0.008000 loss: 4.3076 (4.1069) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [20] [ 50/156] eta: 0:01:30 lr: 0.008000 min_lr: 0.008000 loss: 4.3551 (4.1390) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [20] [ 60/156] eta: 0:01:17 lr: 0.008000 min_lr: 0.008000 loss: 4.3631 (4.1593) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [20] [ 70/156] eta: 0:01:07 lr: 0.008000 min_lr: 0.008000 loss: 4.1741 (4.1359) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [20] [ 80/156] eta: 0:00:57 lr: 0.008000 min_lr: 0.008000 loss: 4.2377 (4.1637) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [20] [ 90/156] eta: 0:00:48 lr: 0.008000 min_lr: 0.008000 loss: 4.5812 (4.1745) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [20] [100/156] eta: 0:00:40 lr: 0.008000 min_lr: 0.008000 loss: 4.5812 (4.1909) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [20] [110/156] eta: 0:00:32 lr: 0.008000 min_lr: 0.008000 loss: 4.3684 (4.1981) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [20] [120/156] eta: 0:00:25 lr: 0.008000 min_lr: 0.008000 loss: 4.4961 (4.2331) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [20] [130/156] eta: 0:00:18 lr: 0.008000 min_lr: 0.008000 loss: 4.5009 (4.2348) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0010 max mem: 55573 Epoch: [20] [140/156] eta: 0:00:10 lr: 0.008000 min_lr: 0.008000 loss: 4.1065 (4.2371) weight_decay: 0.0500 (0.0500) time: 0.5864 data: 0.0008 max mem: 55573 Epoch: [20] [150/156] eta: 0:00:04 lr: 0.008000 min_lr: 0.008000 loss: 4.1065 (4.2297) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [20] [155/156] eta: 0:00:00 lr: 0.008000 min_lr: 0.008000 loss: 4.2666 (4.2296) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [20] Total time: 0:01:46 (0.6818 s / it) Averaged stats: lr: 0.008000 min_lr: 0.008000 loss: 4.2666 (4.2070) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.7322 (1.7322) acc1: 66.6667 (66.6667) acc5: 87.6302 (87.6302) time: 7.1410 data: 6.9040 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.8599 (1.9082) acc1: 62.6302 (58.4960) acc5: 85.6771 (82.0480) time: 1.5747 data: 1.3809 max mem: 55573 Test: Total time: 0:00:08 (1.6212 s / it) * Acc@1 58.806 Acc@5 82.494 loss 1.897 Accuracy of the model on the 50000 test images: 58.8% Max accuracy: 58.81% Test: [0/5] eta: 0:00:33 loss: 6.9027 (6.9027) acc1: 0.3906 (0.3906) acc5: 0.3906 (0.3906) time: 6.6775 data: 6.4410 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9108 (6.9329) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.4812 data: 1.2883 max mem: 55573 Test: Total time: 0:00:07 (1.4988 s / it) * Acc@1 0.102 Acc@5 0.504 loss 6.922 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [21] [ 0/156] eta: 0:35:33 lr: 0.008000 min_lr: 0.008000 loss: 4.3258 (4.3258) weight_decay: 0.0500 (0.0500) time: 13.6772 data: 8.7018 max mem: 55573 Epoch: [21] [ 10/156] eta: 0:04:21 lr: 0.008000 min_lr: 0.008000 loss: 3.9713 (3.9941) weight_decay: 0.0500 (0.0500) time: 1.7931 data: 0.7915 max mem: 55573 Epoch: [21] [ 20/156] eta: 0:02:45 lr: 0.008000 min_lr: 0.008000 loss: 4.2187 (4.2220) weight_decay: 0.0500 (0.0500) time: 0.5971 data: 0.0004 max mem: 55573 Epoch: [21] [ 30/156] eta: 0:02:08 lr: 0.008000 min_lr: 0.008000 loss: 4.4579 (4.2390) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [21] [ 40/156] eta: 0:01:46 lr: 0.008000 min_lr: 0.008000 loss: 4.4579 (4.2353) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [21] [ 50/156] eta: 0:01:30 lr: 0.008000 min_lr: 0.008000 loss: 4.0083 (4.1594) weight_decay: 0.0500 (0.0500) time: 0.5984 data: 0.0004 max mem: 55573 Epoch: [21] [ 60/156] eta: 0:01:17 lr: 0.008000 min_lr: 0.008000 loss: 3.7893 (4.1196) weight_decay: 0.0500 (0.0500) time: 0.5953 data: 0.0004 max mem: 55573 Epoch: [21] [ 70/156] eta: 0:01:06 lr: 0.007999 min_lr: 0.007999 loss: 4.1699 (4.1304) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0005 max mem: 55573 Epoch: [21] [ 80/156] eta: 0:00:57 lr: 0.007999 min_lr: 0.007999 loss: 4.3223 (4.1286) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0005 max mem: 55573 Epoch: [21] [ 90/156] eta: 0:00:48 lr: 0.007999 min_lr: 0.007999 loss: 4.2963 (4.1334) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [21] [100/156] eta: 0:00:40 lr: 0.007999 min_lr: 0.007999 loss: 4.2127 (4.1405) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [21] [110/156] eta: 0:00:32 lr: 0.007999 min_lr: 0.007999 loss: 4.3516 (4.1584) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [21] [120/156] eta: 0:00:25 lr: 0.007999 min_lr: 0.007999 loss: 4.3058 (4.1540) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [21] [130/156] eta: 0:00:18 lr: 0.007999 min_lr: 0.007999 loss: 4.1344 (4.1614) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0011 max mem: 55573 Epoch: [21] [140/156] eta: 0:00:10 lr: 0.007999 min_lr: 0.007999 loss: 4.5062 (4.1620) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [21] [150/156] eta: 0:00:04 lr: 0.007999 min_lr: 0.007999 loss: 4.3090 (4.1549) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [21] [155/156] eta: 0:00:00 lr: 0.007999 min_lr: 0.007999 loss: 4.2480 (4.1457) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [21] Total time: 0:01:46 (0.6815 s / it) Averaged stats: lr: 0.007999 min_lr: 0.007999 loss: 4.2480 (4.1580) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.8084 (1.8084) acc1: 65.4948 (65.4948) acc5: 87.3698 (87.3698) time: 7.0090 data: 6.7717 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.8084 (1.8273) acc1: 62.8906 (59.7120) acc5: 87.3698 (83.5520) time: 1.5482 data: 1.3544 max mem: 55573 Test: Total time: 0:00:07 (1.5913 s / it) * Acc@1 60.202 Acc@5 83.816 loss 1.806 Accuracy of the model on the 50000 test images: 60.2% Max accuracy: 60.20% Test: [0/5] eta: 0:00:34 loss: 6.9022 (6.9022) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.9197 data: 6.6837 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9122 (6.9344) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5292 data: 1.3368 max mem: 55573 Test: Total time: 0:00:07 (1.5455 s / it) * Acc@1 0.098 Acc@5 0.508 loss 6.923 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [22] [ 0/156] eta: 0:34:15 lr: 0.007999 min_lr: 0.007999 loss: 4.6287 (4.6287) weight_decay: 0.0500 (0.0500) time: 13.1733 data: 5.9656 max mem: 55573 Epoch: [22] [ 10/156] eta: 0:04:15 lr: 0.007999 min_lr: 0.007999 loss: 3.8238 (4.0234) weight_decay: 0.0500 (0.0500) time: 1.7469 data: 0.5428 max mem: 55573 Epoch: [22] [ 20/156] eta: 0:02:42 lr: 0.007999 min_lr: 0.007999 loss: 3.9197 (4.0546) weight_decay: 0.0500 (0.0500) time: 0.5994 data: 0.0005 max mem: 55573 Epoch: [22] [ 30/156] eta: 0:02:06 lr: 0.007999 min_lr: 0.007999 loss: 4.4368 (4.1254) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0004 max mem: 55573 Epoch: [22] [ 40/156] eta: 0:01:44 lr: 0.007999 min_lr: 0.007999 loss: 3.9143 (4.0404) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [22] [ 50/156] eta: 0:01:29 lr: 0.007999 min_lr: 0.007999 loss: 4.0047 (4.0525) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [22] [ 60/156] eta: 0:01:16 lr: 0.007999 min_lr: 0.007999 loss: 4.0534 (4.0778) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [22] [ 70/156] eta: 0:01:06 lr: 0.007998 min_lr: 0.007998 loss: 4.1056 (4.0724) weight_decay: 0.0500 (0.0500) time: 0.5939 data: 0.0005 max mem: 55573 Epoch: [22] [ 80/156] eta: 0:00:56 lr: 0.007998 min_lr: 0.007998 loss: 4.2066 (4.0876) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [22] [ 90/156] eta: 0:00:48 lr: 0.007998 min_lr: 0.007998 loss: 4.0582 (4.0715) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [22] [100/156] eta: 0:00:40 lr: 0.007998 min_lr: 0.007998 loss: 4.0774 (4.0821) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [22] [110/156] eta: 0:00:32 lr: 0.007998 min_lr: 0.007998 loss: 4.0774 (4.0627) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [22] [120/156] eta: 0:00:25 lr: 0.007998 min_lr: 0.007998 loss: 3.9630 (4.0656) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [22] [130/156] eta: 0:00:17 lr: 0.007998 min_lr: 0.007998 loss: 4.4412 (4.0925) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0010 max mem: 55573 Epoch: [22] [140/156] eta: 0:00:10 lr: 0.007998 min_lr: 0.007998 loss: 4.3553 (4.0950) weight_decay: 0.0500 (0.0500) time: 0.5856 data: 0.0009 max mem: 55573 Epoch: [22] [150/156] eta: 0:00:04 lr: 0.007998 min_lr: 0.007998 loss: 4.2830 (4.1076) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [22] [155/156] eta: 0:00:00 lr: 0.007998 min_lr: 0.007998 loss: 4.2904 (4.1214) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [22] Total time: 0:01:45 (0.6779 s / it) Averaged stats: lr: 0.007998 min_lr: 0.007998 loss: 4.2904 (4.1221) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.7081 (1.7081) acc1: 64.9740 (64.9740) acc5: 88.6719 (88.6719) time: 6.8550 data: 6.6180 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.9066 (1.8631) acc1: 61.7188 (58.7200) acc5: 86.0677 (83.0080) time: 1.5166 data: 1.3237 max mem: 55573 Test: Total time: 0:00:07 (1.5560 s / it) * Acc@1 58.628 Acc@5 82.940 loss 1.879 Accuracy of the model on the 50000 test images: 58.6% Max accuracy: 60.20% Test: [0/5] eta: 0:00:36 loss: 6.9017 (6.9017) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.2160 data: 6.9801 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9138 (6.9358) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5886 data: 1.3961 max mem: 55573 Test: Total time: 0:00:08 (1.6074 s / it) * Acc@1 0.098 Acc@5 0.508 loss 6.924 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [23] [ 0/156] eta: 0:35:41 lr: 0.007998 min_lr: 0.007998 loss: 4.3032 (4.3032) weight_decay: 0.0500 (0.0500) time: 13.7304 data: 10.6293 max mem: 55573 Epoch: [23] [ 10/156] eta: 0:04:21 lr: 0.007998 min_lr: 0.007998 loss: 4.0780 (3.9850) weight_decay: 0.0500 (0.0500) time: 1.7882 data: 0.9667 max mem: 55573 Epoch: [23] [ 20/156] eta: 0:02:45 lr: 0.007998 min_lr: 0.007998 loss: 4.0780 (3.9727) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [23] [ 30/156] eta: 0:02:07 lr: 0.007997 min_lr: 0.007997 loss: 4.2574 (4.1122) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [23] [ 40/156] eta: 0:01:45 lr: 0.007997 min_lr: 0.007997 loss: 4.3475 (4.1755) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [23] [ 50/156] eta: 0:01:29 lr: 0.007997 min_lr: 0.007997 loss: 4.3193 (4.1739) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [23] [ 60/156] eta: 0:01:17 lr: 0.007997 min_lr: 0.007997 loss: 4.2670 (4.1839) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [23] [ 70/156] eta: 0:01:06 lr: 0.007997 min_lr: 0.007997 loss: 4.2737 (4.1622) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [23] [ 80/156] eta: 0:00:57 lr: 0.007997 min_lr: 0.007997 loss: 4.3859 (4.1767) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [23] [ 90/156] eta: 0:00:48 lr: 0.007997 min_lr: 0.007997 loss: 4.2502 (4.1653) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [23] [100/156] eta: 0:00:40 lr: 0.007997 min_lr: 0.007997 loss: 4.2137 (4.1710) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [23] [110/156] eta: 0:00:32 lr: 0.007997 min_lr: 0.007997 loss: 4.2376 (4.1516) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [23] [120/156] eta: 0:00:25 lr: 0.007996 min_lr: 0.007996 loss: 4.1558 (4.1536) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [23] [130/156] eta: 0:00:17 lr: 0.007996 min_lr: 0.007996 loss: 4.2273 (4.1592) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0007 max mem: 55573 Epoch: [23] [140/156] eta: 0:00:10 lr: 0.007996 min_lr: 0.007996 loss: 4.2729 (4.1576) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0006 max mem: 55573 Epoch: [23] [150/156] eta: 0:00:04 lr: 0.007996 min_lr: 0.007996 loss: 4.0856 (4.1563) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [23] [155/156] eta: 0:00:00 lr: 0.007996 min_lr: 0.007996 loss: 4.2562 (4.1634) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [23] Total time: 0:01:46 (0.6804 s / it) Averaged stats: lr: 0.007996 min_lr: 0.007996 loss: 4.2562 (4.0931) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.5829 (1.5829) acc1: 68.8802 (68.8802) acc5: 90.4948 (90.4948) time: 7.2406 data: 7.0021 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.8808 (1.9530) acc1: 63.1510 (60.6400) acc5: 87.1094 (83.6160) time: 1.5946 data: 1.4005 max mem: 55573 Test: Total time: 0:00:08 (1.6534 s / it) * Acc@1 61.114 Acc@5 83.824 loss 1.938 Accuracy of the model on the 50000 test images: 61.1% Max accuracy: 61.11% Test: [0/5] eta: 0:00:35 loss: 6.9015 (6.9015) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.0309 data: 6.7949 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9150 (6.9374) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5515 data: 1.3591 max mem: 55573 Test: Total time: 0:00:07 (1.5711 s / it) * Acc@1 0.100 Acc@5 0.512 loss 6.925 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [24] [ 0/156] eta: 0:35:28 lr: 0.007996 min_lr: 0.007996 loss: 4.1489 (4.1489) weight_decay: 0.0500 (0.0500) time: 13.6419 data: 8.8803 max mem: 55573 Epoch: [24] [ 10/156] eta: 0:04:19 lr: 0.007996 min_lr: 0.007996 loss: 4.0709 (3.9564) weight_decay: 0.0500 (0.0500) time: 1.7800 data: 0.8077 max mem: 55573 Epoch: [24] [ 20/156] eta: 0:02:45 lr: 0.007996 min_lr: 0.007996 loss: 4.2811 (4.0681) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [24] [ 30/156] eta: 0:02:07 lr: 0.007996 min_lr: 0.007996 loss: 4.3503 (4.0745) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [24] [ 40/156] eta: 0:01:45 lr: 0.007995 min_lr: 0.007995 loss: 4.2438 (4.0781) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [24] [ 50/156] eta: 0:01:29 lr: 0.007995 min_lr: 0.007995 loss: 4.1994 (4.0832) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [24] [ 60/156] eta: 0:01:17 lr: 0.007995 min_lr: 0.007995 loss: 4.1260 (4.0832) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [24] [ 70/156] eta: 0:01:06 lr: 0.007995 min_lr: 0.007995 loss: 3.9801 (4.0424) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [24] [ 80/156] eta: 0:00:57 lr: 0.007995 min_lr: 0.007995 loss: 4.2234 (4.0695) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [24] [ 90/156] eta: 0:00:48 lr: 0.007995 min_lr: 0.007995 loss: 4.3290 (4.0741) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [24] [100/156] eta: 0:00:40 lr: 0.007995 min_lr: 0.007995 loss: 4.2313 (4.0565) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [24] [110/156] eta: 0:00:32 lr: 0.007994 min_lr: 0.007994 loss: 4.2490 (4.0657) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0004 max mem: 55573 Epoch: [24] [120/156] eta: 0:00:25 lr: 0.007994 min_lr: 0.007994 loss: 4.1944 (4.0493) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [24] [130/156] eta: 0:00:17 lr: 0.007994 min_lr: 0.007994 loss: 4.1944 (4.0534) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0010 max mem: 55573 Epoch: [24] [140/156] eta: 0:00:10 lr: 0.007994 min_lr: 0.007994 loss: 4.2200 (4.0427) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0009 max mem: 55573 Epoch: [24] [150/156] eta: 0:00:04 lr: 0.007994 min_lr: 0.007994 loss: 4.0806 (4.0399) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [24] [155/156] eta: 0:00:00 lr: 0.007994 min_lr: 0.007994 loss: 4.1172 (4.0270) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [24] Total time: 0:01:45 (0.6794 s / it) Averaged stats: lr: 0.007994 min_lr: 0.007994 loss: 4.1172 (4.0579) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.5592 (1.5592) acc1: 66.4062 (66.4062) acc5: 90.1042 (90.1042) time: 7.1981 data: 6.9606 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.7756 (1.7966) acc1: 63.0208 (60.3840) acc5: 87.7604 (83.4560) time: 1.5863 data: 1.3922 max mem: 55573 Test: Total time: 0:00:08 (1.6357 s / it) * Acc@1 61.156 Acc@5 84.338 loss 1.779 Accuracy of the model on the 50000 test images: 61.2% Max accuracy: 61.16% Test: [0/5] eta: 0:00:34 loss: 6.9012 (6.9012) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.8748 data: 6.6388 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9165 (6.9390) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5203 data: 1.3279 max mem: 55573 Test: Total time: 0:00:07 (1.5370 s / it) * Acc@1 0.100 Acc@5 0.514 loss 6.925 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [25] [ 0/156] eta: 0:32:22 lr: 0.007994 min_lr: 0.007994 loss: 4.4994 (4.4994) weight_decay: 0.0500 (0.0500) time: 12.4514 data: 11.7960 max mem: 55573 Epoch: [25] [ 10/156] eta: 0:04:23 lr: 0.007994 min_lr: 0.007994 loss: 4.0514 (4.1087) weight_decay: 0.0500 (0.0500) time: 1.8014 data: 1.0727 max mem: 55573 Epoch: [25] [ 20/156] eta: 0:02:46 lr: 0.007993 min_lr: 0.007993 loss: 4.0514 (4.1224) weight_decay: 0.0500 (0.0500) time: 0.6634 data: 0.0004 max mem: 55573 Epoch: [25] [ 30/156] eta: 0:02:08 lr: 0.007993 min_lr: 0.007993 loss: 3.9340 (4.0304) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [25] [ 40/156] eta: 0:01:46 lr: 0.007993 min_lr: 0.007993 loss: 3.7872 (4.0008) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [25] [ 50/156] eta: 0:01:30 lr: 0.007993 min_lr: 0.007993 loss: 4.0677 (3.9764) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [25] [ 60/156] eta: 0:01:17 lr: 0.007993 min_lr: 0.007993 loss: 4.0677 (4.0053) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [25] [ 70/156] eta: 0:01:06 lr: 0.007993 min_lr: 0.007993 loss: 4.1872 (4.0557) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [25] [ 80/156] eta: 0:00:57 lr: 0.007992 min_lr: 0.007992 loss: 4.1424 (4.0607) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [25] [ 90/156] eta: 0:00:48 lr: 0.007992 min_lr: 0.007992 loss: 4.0127 (4.0650) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [25] [100/156] eta: 0:00:40 lr: 0.007992 min_lr: 0.007992 loss: 4.0127 (4.0601) weight_decay: 0.0500 (0.0500) time: 0.5987 data: 0.0005 max mem: 55573 Epoch: [25] [110/156] eta: 0:00:32 lr: 0.007992 min_lr: 0.007992 loss: 3.9260 (4.0493) weight_decay: 0.0500 (0.0500) time: 0.5975 data: 0.0005 max mem: 55573 Epoch: [25] [120/156] eta: 0:00:25 lr: 0.007992 min_lr: 0.007992 loss: 4.1455 (4.0563) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [25] [130/156] eta: 0:00:18 lr: 0.007991 min_lr: 0.007991 loss: 4.1455 (4.0550) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0010 max mem: 55573 Epoch: [25] [140/156] eta: 0:00:10 lr: 0.007991 min_lr: 0.007991 loss: 4.1892 (4.0667) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [25] [150/156] eta: 0:00:04 lr: 0.007991 min_lr: 0.007991 loss: 4.3140 (4.0868) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [25] [155/156] eta: 0:00:00 lr: 0.007991 min_lr: 0.007991 loss: 4.3140 (4.0954) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [25] Total time: 0:01:46 (0.6815 s / it) Averaged stats: lr: 0.007991 min_lr: 0.007991 loss: 4.3140 (4.0429) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.6137 (1.6137) acc1: 68.6198 (68.6198) acc5: 91.2760 (91.2760) time: 6.8377 data: 6.6001 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.8071 (1.8191) acc1: 64.7135 (61.2800) acc5: 89.0625 (85.3760) time: 1.5140 data: 1.3201 max mem: 55573 Test: Total time: 0:00:07 (1.5580 s / it) * Acc@1 62.102 Acc@5 84.940 loss 1.812 Accuracy of the model on the 50000 test images: 62.1% Max accuracy: 62.10% Test: [0/5] eta: 0:00:35 loss: 6.9011 (6.9011) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.0668 data: 6.8305 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9179 (6.9403) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5589 data: 1.3663 max mem: 55573 Test: Total time: 0:00:07 (1.5748 s / it) * Acc@1 0.102 Acc@5 0.514 loss 6.926 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [26] [ 0/156] eta: 0:37:31 lr: 0.007991 min_lr: 0.007991 loss: 4.1827 (4.1827) weight_decay: 0.0500 (0.0500) time: 14.4336 data: 10.4241 max mem: 55573 Epoch: [26] [ 10/156] eta: 0:04:30 lr: 0.007991 min_lr: 0.007991 loss: 3.6017 (3.7311) weight_decay: 0.0500 (0.0500) time: 1.8540 data: 0.9480 max mem: 55573 Epoch: [26] [ 20/156] eta: 0:02:50 lr: 0.007991 min_lr: 0.007991 loss: 3.6017 (3.7997) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0003 max mem: 55573 Epoch: [26] [ 30/156] eta: 0:02:10 lr: 0.007990 min_lr: 0.007990 loss: 3.5642 (3.7310) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [26] [ 40/156] eta: 0:01:47 lr: 0.007990 min_lr: 0.007990 loss: 3.9808 (3.8342) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [26] [ 50/156] eta: 0:01:31 lr: 0.007990 min_lr: 0.007990 loss: 4.0943 (3.8017) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [26] [ 60/156] eta: 0:01:18 lr: 0.007990 min_lr: 0.007990 loss: 3.9701 (3.8550) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [26] [ 70/156] eta: 0:01:07 lr: 0.007990 min_lr: 0.007990 loss: 4.0191 (3.8829) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [26] [ 80/156] eta: 0:00:57 lr: 0.007989 min_lr: 0.007989 loss: 3.9947 (3.8781) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [26] [ 90/156] eta: 0:00:49 lr: 0.007989 min_lr: 0.007989 loss: 3.9606 (3.8818) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [26] [100/156] eta: 0:00:40 lr: 0.007989 min_lr: 0.007989 loss: 4.0368 (3.9146) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [26] [110/156] eta: 0:00:32 lr: 0.007989 min_lr: 0.007989 loss: 4.1901 (3.9314) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [26] [120/156] eta: 0:00:25 lr: 0.007988 min_lr: 0.007988 loss: 4.2891 (3.9575) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [26] [130/156] eta: 0:00:18 lr: 0.007988 min_lr: 0.007988 loss: 4.1969 (3.9571) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0010 max mem: 55573 Epoch: [26] [140/156] eta: 0:00:11 lr: 0.007988 min_lr: 0.007988 loss: 4.0673 (3.9585) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0009 max mem: 55573 Epoch: [26] [150/156] eta: 0:00:04 lr: 0.007988 min_lr: 0.007988 loss: 4.1353 (3.9609) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [26] [155/156] eta: 0:00:00 lr: 0.007988 min_lr: 0.007988 loss: 4.1353 (3.9667) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [26] Total time: 0:01:46 (0.6842 s / it) Averaged stats: lr: 0.007988 min_lr: 0.007988 loss: 4.1353 (4.0190) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.6240 (1.6240) acc1: 69.0104 (69.0104) acc5: 90.6250 (90.6250) time: 6.9646 data: 6.7276 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.8326 (1.8489) acc1: 65.3646 (62.7200) acc5: 88.0208 (85.3760) time: 1.5392 data: 1.3456 max mem: 55573 Test: Total time: 0:00:07 (1.5795 s / it) * Acc@1 62.860 Acc@5 85.316 loss 1.847 Accuracy of the model on the 50000 test images: 62.9% Max accuracy: 62.86% Test: [0/5] eta: 0:00:33 loss: 6.9005 (6.9005) acc1: 0.0000 (0.0000) acc5: 0.7812 (0.7812) time: 6.7469 data: 6.5109 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9198 (6.9416) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.4951 data: 1.3023 max mem: 55573 Test: Total time: 0:00:07 (1.5134 s / it) * Acc@1 0.102 Acc@5 0.512 loss 6.927 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [27] [ 0/156] eta: 0:34:26 lr: 0.007988 min_lr: 0.007988 loss: 3.3325 (3.3325) weight_decay: 0.0500 (0.0500) time: 13.2439 data: 9.8337 max mem: 55573 Epoch: [27] [ 10/156] eta: 0:04:18 lr: 0.007987 min_lr: 0.007987 loss: 4.1658 (4.0630) weight_decay: 0.0500 (0.0500) time: 1.7681 data: 0.8945 max mem: 55573 Epoch: [27] [ 20/156] eta: 0:02:44 lr: 0.007987 min_lr: 0.007987 loss: 4.1248 (3.8720) weight_decay: 0.0500 (0.0500) time: 0.6054 data: 0.0005 max mem: 55573 Epoch: [27] [ 30/156] eta: 0:02:07 lr: 0.007987 min_lr: 0.007987 loss: 3.8229 (3.9291) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [27] [ 40/156] eta: 0:01:45 lr: 0.007987 min_lr: 0.007987 loss: 4.1415 (3.9703) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [27] [ 50/156] eta: 0:01:29 lr: 0.007987 min_lr: 0.007987 loss: 4.1752 (3.9732) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [27] [ 60/156] eta: 0:01:17 lr: 0.007986 min_lr: 0.007986 loss: 3.8146 (3.9468) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [27] [ 70/156] eta: 0:01:06 lr: 0.007986 min_lr: 0.007986 loss: 3.9548 (3.9472) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [27] [ 80/156] eta: 0:00:57 lr: 0.007986 min_lr: 0.007986 loss: 3.9548 (3.9479) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [27] [ 90/156] eta: 0:00:48 lr: 0.007986 min_lr: 0.007986 loss: 3.9363 (3.9263) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [27] [100/156] eta: 0:00:40 lr: 0.007985 min_lr: 0.007985 loss: 4.1559 (3.9604) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [27] [110/156] eta: 0:00:32 lr: 0.007985 min_lr: 0.007985 loss: 4.2598 (3.9713) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0004 max mem: 55573 Epoch: [27] [120/156] eta: 0:00:25 lr: 0.007985 min_lr: 0.007985 loss: 4.1621 (3.9666) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.0005 max mem: 55573 Epoch: [27] [130/156] eta: 0:00:17 lr: 0.007985 min_lr: 0.007985 loss: 4.0009 (3.9503) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0010 max mem: 55573 Epoch: [27] [140/156] eta: 0:00:10 lr: 0.007984 min_lr: 0.007984 loss: 4.0009 (3.9501) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0008 max mem: 55573 Epoch: [27] [150/156] eta: 0:00:04 lr: 0.007984 min_lr: 0.007984 loss: 4.1844 (3.9539) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [27] [155/156] eta: 0:00:00 lr: 0.007984 min_lr: 0.007984 loss: 4.2664 (3.9554) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [27] Total time: 0:01:45 (0.6792 s / it) Averaged stats: lr: 0.007984 min_lr: 0.007984 loss: 4.2664 (4.0092) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.6575 (1.6575) acc1: 67.5781 (67.5781) acc5: 91.1458 (91.1458) time: 6.8897 data: 6.6525 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6827 (1.7312) acc1: 66.0156 (62.4960) acc5: 89.3229 (86.3040) time: 1.5243 data: 1.3306 max mem: 55573 Test: Total time: 0:00:07 (1.5569 s / it) * Acc@1 63.330 Acc@5 85.988 loss 1.726 Accuracy of the model on the 50000 test images: 63.3% Max accuracy: 63.33% Test: [0/5] eta: 0:00:35 loss: 6.9005 (6.9005) acc1: 0.0000 (0.0000) acc5: 0.7812 (0.7812) time: 7.0139 data: 6.7779 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9213 (6.9428) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5481 data: 1.3557 max mem: 55573 Test: Total time: 0:00:07 (1.5626 s / it) * Acc@1 0.102 Acc@5 0.514 loss 6.927 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [28] [ 0/156] eta: 0:31:47 lr: 0.007984 min_lr: 0.007984 loss: 4.0935 (4.0935) weight_decay: 0.0500 (0.0500) time: 12.2290 data: 9.6337 max mem: 55573 Epoch: [28] [ 10/156] eta: 0:04:16 lr: 0.007984 min_lr: 0.007984 loss: 4.0888 (3.9062) weight_decay: 0.0500 (0.0500) time: 1.7573 data: 0.8762 max mem: 55573 Epoch: [28] [ 20/156] eta: 0:02:43 lr: 0.007983 min_lr: 0.007983 loss: 4.0571 (3.9362) weight_decay: 0.0500 (0.0500) time: 0.6503 data: 0.0004 max mem: 55573 Epoch: [28] [ 30/156] eta: 0:02:06 lr: 0.007983 min_lr: 0.007983 loss: 4.1829 (4.0220) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0003 max mem: 55573 Epoch: [28] [ 40/156] eta: 0:01:44 lr: 0.007983 min_lr: 0.007983 loss: 4.1829 (4.0459) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [28] [ 50/156] eta: 0:01:29 lr: 0.007983 min_lr: 0.007983 loss: 4.0589 (3.9911) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [28] [ 60/156] eta: 0:01:16 lr: 0.007982 min_lr: 0.007982 loss: 3.9352 (3.9654) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [28] [ 70/156] eta: 0:01:06 lr: 0.007982 min_lr: 0.007982 loss: 4.1539 (3.9834) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [28] [ 80/156] eta: 0:00:56 lr: 0.007982 min_lr: 0.007982 loss: 4.2916 (3.9999) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [28] [ 90/156] eta: 0:00:48 lr: 0.007981 min_lr: 0.007981 loss: 4.2336 (3.9624) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [28] [100/156] eta: 0:00:40 lr: 0.007981 min_lr: 0.007981 loss: 4.1373 (3.9947) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [28] [110/156] eta: 0:00:32 lr: 0.007981 min_lr: 0.007981 loss: 4.3270 (4.0037) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [28] [120/156] eta: 0:00:25 lr: 0.007981 min_lr: 0.007981 loss: 4.1607 (3.9957) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [28] [130/156] eta: 0:00:17 lr: 0.007980 min_lr: 0.007980 loss: 4.0847 (3.9925) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0010 max mem: 55573 Epoch: [28] [140/156] eta: 0:00:10 lr: 0.007980 min_lr: 0.007980 loss: 4.1724 (3.9989) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0008 max mem: 55573 Epoch: [28] [150/156] eta: 0:00:04 lr: 0.007980 min_lr: 0.007980 loss: 4.2949 (3.9925) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [28] [155/156] eta: 0:00:00 lr: 0.007980 min_lr: 0.007980 loss: 4.2949 (3.9937) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [28] Total time: 0:01:45 (0.6777 s / it) Averaged stats: lr: 0.007980 min_lr: 0.007980 loss: 4.2949 (4.0063) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.5828 (1.5828) acc1: 66.1458 (66.1458) acc5: 89.9740 (89.9740) time: 7.1002 data: 6.8624 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.7777 (1.7607) acc1: 63.0208 (60.4800) acc5: 87.7604 (84.2240) time: 1.5665 data: 1.3726 max mem: 55573 Test: Total time: 0:00:08 (1.6147 s / it) * Acc@1 61.036 Acc@5 84.250 loss 1.779 Accuracy of the model on the 50000 test images: 61.0% Max accuracy: 63.33% Test: [0/5] eta: 0:00:34 loss: 6.9007 (6.9007) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.9104 data: 6.6744 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9228 (6.9443) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5274 data: 1.3350 max mem: 55573 Test: Total time: 0:00:07 (1.5441 s / it) * Acc@1 0.102 Acc@5 0.514 loss 6.928 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [29] [ 0/156] eta: 0:32:14 lr: 0.007980 min_lr: 0.007980 loss: 2.8638 (2.8638) weight_decay: 0.0500 (0.0500) time: 12.3998 data: 8.4643 max mem: 55573 Epoch: [29] [ 10/156] eta: 0:04:16 lr: 0.007979 min_lr: 0.007979 loss: 4.2574 (4.0782) weight_decay: 0.0500 (0.0500) time: 1.7585 data: 0.7699 max mem: 55573 Epoch: [29] [ 20/156] eta: 0:02:43 lr: 0.007979 min_lr: 0.007979 loss: 4.1295 (3.9222) weight_decay: 0.0500 (0.0500) time: 0.6430 data: 0.0004 max mem: 55573 Epoch: [29] [ 30/156] eta: 0:02:06 lr: 0.007979 min_lr: 0.007979 loss: 4.0121 (4.0040) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [29] [ 40/156] eta: 0:01:44 lr: 0.007978 min_lr: 0.007978 loss: 4.2753 (4.0268) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [29] [ 50/156] eta: 0:01:29 lr: 0.007978 min_lr: 0.007978 loss: 4.1676 (3.9948) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [29] [ 60/156] eta: 0:01:16 lr: 0.007978 min_lr: 0.007978 loss: 4.1818 (4.0319) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [29] [ 70/156] eta: 0:01:06 lr: 0.007978 min_lr: 0.007978 loss: 4.1818 (4.0289) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [29] [ 80/156] eta: 0:00:56 lr: 0.007977 min_lr: 0.007977 loss: 4.0584 (4.0314) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [29] [ 90/156] eta: 0:00:48 lr: 0.007977 min_lr: 0.007977 loss: 4.0538 (4.0211) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [29] [100/156] eta: 0:00:40 lr: 0.007977 min_lr: 0.007977 loss: 3.8530 (4.0079) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [29] [110/156] eta: 0:00:32 lr: 0.007976 min_lr: 0.007976 loss: 4.1211 (4.0151) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [29] [120/156] eta: 0:00:25 lr: 0.007976 min_lr: 0.007976 loss: 4.1335 (4.0208) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [29] [130/156] eta: 0:00:17 lr: 0.007976 min_lr: 0.007976 loss: 4.1085 (4.0257) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0011 max mem: 55573 Epoch: [29] [140/156] eta: 0:00:10 lr: 0.007975 min_lr: 0.007975 loss: 4.1844 (4.0324) weight_decay: 0.0500 (0.0500) time: 0.5854 data: 0.0009 max mem: 55573 Epoch: [29] [150/156] eta: 0:00:04 lr: 0.007975 min_lr: 0.007975 loss: 4.1844 (4.0425) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [29] [155/156] eta: 0:00:00 lr: 0.007975 min_lr: 0.007975 loss: 4.0908 (4.0388) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [29] Total time: 0:01:45 (0.6782 s / it) Averaged stats: lr: 0.007975 min_lr: 0.007975 loss: 4.0908 (3.9740) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.6218 (1.6218) acc1: 69.9219 (69.9219) acc5: 91.1458 (91.1458) time: 6.8469 data: 6.6100 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.7838 (1.8727) acc1: 67.8385 (64.3840) acc5: 88.9323 (86.1120) time: 1.5158 data: 1.3221 max mem: 55573 Test: Total time: 0:00:07 (1.5632 s / it) * Acc@1 63.440 Acc@5 85.954 loss 1.905 Accuracy of the model on the 50000 test images: 63.4% Max accuracy: 63.44% Test: [0/5] eta: 0:00:35 loss: 6.9012 (6.9012) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.1200 data: 6.8840 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9248 (6.9460) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5692 data: 1.3769 max mem: 55573 Test: Total time: 0:00:07 (1.5860 s / it) * Acc@1 0.102 Acc@5 0.510 loss 6.929 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [30] [ 0/156] eta: 0:35:36 lr: 0.007975 min_lr: 0.007975 loss: 4.0529 (4.0529) weight_decay: 0.0500 (0.0500) time: 13.6966 data: 11.5007 max mem: 55573 Epoch: [30] [ 10/156] eta: 0:04:20 lr: 0.007975 min_lr: 0.007975 loss: 4.1267 (4.0341) weight_decay: 0.0500 (0.0500) time: 1.7836 data: 1.0459 max mem: 55573 Epoch: [30] [ 20/156] eta: 0:02:45 lr: 0.007974 min_lr: 0.007974 loss: 4.1267 (4.1404) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [30] [ 30/156] eta: 0:02:07 lr: 0.007974 min_lr: 0.007974 loss: 4.0204 (4.0563) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [30] [ 40/156] eta: 0:01:45 lr: 0.007974 min_lr: 0.007974 loss: 4.0764 (4.0546) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [30] [ 50/156] eta: 0:01:29 lr: 0.007973 min_lr: 0.007973 loss: 4.0764 (3.9942) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [30] [ 60/156] eta: 0:01:17 lr: 0.007973 min_lr: 0.007973 loss: 4.0317 (4.0177) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [30] [ 70/156] eta: 0:01:06 lr: 0.007973 min_lr: 0.007973 loss: 4.1959 (4.0045) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [30] [ 80/156] eta: 0:00:57 lr: 0.007972 min_lr: 0.007972 loss: 4.1156 (3.9785) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [30] [ 90/156] eta: 0:00:48 lr: 0.007972 min_lr: 0.007972 loss: 3.9550 (3.9730) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [30] [100/156] eta: 0:00:40 lr: 0.007972 min_lr: 0.007972 loss: 3.9598 (3.9710) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [30] [110/156] eta: 0:00:32 lr: 0.007971 min_lr: 0.007971 loss: 4.0934 (3.9747) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [30] [120/156] eta: 0:00:25 lr: 0.007971 min_lr: 0.007971 loss: 4.1383 (3.9703) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [30] [130/156] eta: 0:00:17 lr: 0.007970 min_lr: 0.007970 loss: 4.0658 (3.9722) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0015 max mem: 55573 Epoch: [30] [140/156] eta: 0:00:10 lr: 0.007970 min_lr: 0.007970 loss: 4.1089 (3.9720) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0013 max mem: 55573 Epoch: [30] [150/156] eta: 0:00:04 lr: 0.007970 min_lr: 0.007970 loss: 3.9966 (3.9596) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [30] [155/156] eta: 0:00:00 lr: 0.007970 min_lr: 0.007970 loss: 3.9966 (3.9674) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [30] Total time: 0:01:46 (0.6797 s / it) Averaged stats: lr: 0.007970 min_lr: 0.007970 loss: 3.9966 (3.9523) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.5745 (1.5745) acc1: 67.9688 (67.9688) acc5: 89.8438 (89.8438) time: 7.1304 data: 6.8933 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6442 (1.7811) acc1: 66.4062 (62.0160) acc5: 89.8438 (84.8640) time: 1.5723 data: 1.3787 max mem: 55573 Test: Total time: 0:00:08 (1.6146 s / it) * Acc@1 62.216 Acc@5 85.046 loss 1.791 Accuracy of the model on the 50000 test images: 62.2% Max accuracy: 63.44% Test: [0/5] eta: 0:00:34 loss: 6.9019 (6.9019) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.9832 data: 6.7471 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9263 (6.9476) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5420 data: 1.3495 max mem: 55573 Test: Total time: 0:00:07 (1.5613 s / it) * Acc@1 0.102 Acc@5 0.510 loss 6.929 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [31] [ 0/156] eta: 0:33:38 lr: 0.007970 min_lr: 0.007970 loss: 4.7284 (4.7284) weight_decay: 0.0500 (0.0500) time: 12.9390 data: 11.1059 max mem: 55573 Epoch: [31] [ 10/156] eta: 0:04:14 lr: 0.007969 min_lr: 0.007969 loss: 4.1389 (3.9806) weight_decay: 0.0500 (0.0500) time: 1.7437 data: 1.0100 max mem: 55573 Epoch: [31] [ 20/156] eta: 0:02:42 lr: 0.007969 min_lr: 0.007969 loss: 4.0278 (3.8391) weight_decay: 0.0500 (0.0500) time: 0.6070 data: 0.0004 max mem: 55573 Epoch: [31] [ 30/156] eta: 0:02:05 lr: 0.007969 min_lr: 0.007969 loss: 3.4589 (3.7237) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [31] [ 40/156] eta: 0:01:44 lr: 0.007968 min_lr: 0.007968 loss: 3.6847 (3.7563) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [31] [ 50/156] eta: 0:01:28 lr: 0.007968 min_lr: 0.007968 loss: 3.9102 (3.7863) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [31] [ 60/156] eta: 0:01:16 lr: 0.007967 min_lr: 0.007967 loss: 3.8521 (3.7736) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [31] [ 70/156] eta: 0:01:06 lr: 0.007967 min_lr: 0.007967 loss: 3.8994 (3.8178) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [31] [ 80/156] eta: 0:00:56 lr: 0.007967 min_lr: 0.007967 loss: 4.1076 (3.8242) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [31] [ 90/156] eta: 0:00:48 lr: 0.007966 min_lr: 0.007966 loss: 3.9521 (3.8311) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [31] [100/156] eta: 0:00:40 lr: 0.007966 min_lr: 0.007966 loss: 4.1410 (3.8707) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [31] [110/156] eta: 0:00:32 lr: 0.007966 min_lr: 0.007966 loss: 4.2060 (3.8832) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [31] [120/156] eta: 0:00:25 lr: 0.007965 min_lr: 0.007965 loss: 4.1603 (3.8879) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [31] [130/156] eta: 0:00:17 lr: 0.007965 min_lr: 0.007965 loss: 3.8358 (3.8804) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0010 max mem: 55573 Epoch: [31] [140/156] eta: 0:00:10 lr: 0.007964 min_lr: 0.007964 loss: 4.0148 (3.8964) weight_decay: 0.0500 (0.0500) time: 0.5876 data: 0.0008 max mem: 55573 Epoch: [31] [150/156] eta: 0:00:04 lr: 0.007964 min_lr: 0.007964 loss: 4.2039 (3.9153) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [31] [155/156] eta: 0:00:00 lr: 0.007964 min_lr: 0.007964 loss: 4.0914 (3.9100) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [31] Total time: 0:01:45 (0.6774 s / it) Averaged stats: lr: 0.007964 min_lr: 0.007964 loss: 4.0914 (3.9297) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.3286 (1.3286) acc1: 71.7448 (71.7448) acc5: 91.0156 (91.0156) time: 6.9226 data: 6.6855 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4694 (1.5508) acc1: 67.7083 (63.7440) acc5: 91.0156 (86.4320) time: 1.5299 data: 1.3372 max mem: 55573 Test: Total time: 0:00:07 (1.5454 s / it) * Acc@1 64.440 Acc@5 86.498 loss 1.569 Accuracy of the model on the 50000 test images: 64.4% Max accuracy: 64.44% Test: [0/5] eta: 0:00:33 loss: 6.9027 (6.9027) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.6443 data: 6.4077 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9280 (6.9492) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.4760 data: 1.2832 max mem: 55573 Test: Total time: 0:00:07 (1.4937 s / it) * Acc@1 0.102 Acc@5 0.514 loss 6.930 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [32] [ 0/156] eta: 0:32:50 lr: 0.007964 min_lr: 0.007964 loss: 3.8302 (3.8302) weight_decay: 0.0500 (0.0500) time: 12.6286 data: 11.3443 max mem: 55573 Epoch: [32] [ 10/156] eta: 0:04:11 lr: 0.007963 min_lr: 0.007963 loss: 4.1203 (4.0125) weight_decay: 0.0500 (0.0500) time: 1.7224 data: 1.0318 max mem: 55573 Epoch: [32] [ 20/156] eta: 0:02:40 lr: 0.007963 min_lr: 0.007963 loss: 4.1203 (4.0098) weight_decay: 0.0500 (0.0500) time: 0.6111 data: 0.0005 max mem: 55573 Epoch: [32] [ 30/156] eta: 0:02:04 lr: 0.007963 min_lr: 0.007963 loss: 3.9810 (3.9538) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [32] [ 40/156] eta: 0:01:43 lr: 0.007962 min_lr: 0.007962 loss: 3.8368 (3.9084) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [32] [ 50/156] eta: 0:01:28 lr: 0.007962 min_lr: 0.007962 loss: 3.9830 (3.9042) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [32] [ 60/156] eta: 0:01:16 lr: 0.007961 min_lr: 0.007961 loss: 3.9989 (3.8935) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [32] [ 70/156] eta: 0:01:05 lr: 0.007961 min_lr: 0.007961 loss: 4.1450 (3.9369) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [32] [ 80/156] eta: 0:00:56 lr: 0.007961 min_lr: 0.007961 loss: 4.1725 (3.9116) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [32] [ 90/156] eta: 0:00:47 lr: 0.007960 min_lr: 0.007960 loss: 4.0564 (3.9229) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [32] [100/156] eta: 0:00:39 lr: 0.007960 min_lr: 0.007960 loss: 3.9707 (3.9001) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [32] [110/156] eta: 0:00:32 lr: 0.007959 min_lr: 0.007959 loss: 3.7249 (3.9028) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [32] [120/156] eta: 0:00:24 lr: 0.007959 min_lr: 0.007959 loss: 3.8354 (3.9063) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [32] [130/156] eta: 0:00:17 lr: 0.007959 min_lr: 0.007959 loss: 3.8354 (3.8996) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0010 max mem: 55573 Epoch: [32] [140/156] eta: 0:00:10 lr: 0.007958 min_lr: 0.007958 loss: 4.0086 (3.9164) weight_decay: 0.0500 (0.0500) time: 0.5879 data: 0.0009 max mem: 55573 Epoch: [32] [150/156] eta: 0:00:04 lr: 0.007958 min_lr: 0.007958 loss: 4.2393 (3.9316) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [32] [155/156] eta: 0:00:00 lr: 0.007958 min_lr: 0.007958 loss: 4.1997 (3.9189) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [32] Total time: 0:01:45 (0.6754 s / it) Averaged stats: lr: 0.007958 min_lr: 0.007958 loss: 4.1997 (3.9383) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.5006 (1.5006) acc1: 70.9635 (70.9635) acc5: 90.1042 (90.1042) time: 7.2652 data: 7.0278 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5962 (1.6794) acc1: 64.9740 (63.2960) acc5: 89.4531 (86.0800) time: 1.5995 data: 1.4056 max mem: 55573 Test: Total time: 0:00:08 (1.6575 s / it) * Acc@1 63.570 Acc@5 86.108 loss 1.686 Accuracy of the model on the 50000 test images: 63.6% Max accuracy: 64.44% Test: [0/5] eta: 0:00:36 loss: 6.9029 (6.9029) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.2051 data: 6.9693 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9301 (6.9510) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5863 data: 1.3940 max mem: 55573 Test: Total time: 0:00:08 (1.6052 s / it) * Acc@1 0.100 Acc@5 0.514 loss 6.931 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [33] [ 0/156] eta: 0:32:20 lr: 0.007958 min_lr: 0.007958 loss: 4.5243 (4.5243) weight_decay: 0.0500 (0.0500) time: 12.4375 data: 8.7119 max mem: 55573 Epoch: [33] [ 10/156] eta: 0:04:13 lr: 0.007957 min_lr: 0.007957 loss: 4.1227 (4.0923) weight_decay: 0.0500 (0.0500) time: 1.7332 data: 0.7924 max mem: 55573 Epoch: [33] [ 20/156] eta: 0:02:41 lr: 0.007957 min_lr: 0.007957 loss: 4.0931 (4.0945) weight_decay: 0.0500 (0.0500) time: 0.6270 data: 0.0004 max mem: 55573 Epoch: [33] [ 30/156] eta: 0:02:05 lr: 0.007956 min_lr: 0.007956 loss: 4.0766 (4.0107) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [33] [ 40/156] eta: 0:01:44 lr: 0.007956 min_lr: 0.007956 loss: 3.9179 (3.9766) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [33] [ 50/156] eta: 0:01:28 lr: 0.007955 min_lr: 0.007955 loss: 3.8618 (3.9539) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [33] [ 60/156] eta: 0:01:16 lr: 0.007955 min_lr: 0.007955 loss: 4.1914 (3.9846) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [33] [ 70/156] eta: 0:01:05 lr: 0.007955 min_lr: 0.007955 loss: 4.1528 (4.0042) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [33] [ 80/156] eta: 0:00:56 lr: 0.007954 min_lr: 0.007954 loss: 3.9981 (3.9654) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [33] [ 90/156] eta: 0:00:48 lr: 0.007954 min_lr: 0.007954 loss: 4.1180 (3.9983) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [33] [100/156] eta: 0:00:40 lr: 0.007953 min_lr: 0.007953 loss: 4.2165 (3.9850) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [33] [110/156] eta: 0:00:32 lr: 0.007953 min_lr: 0.007953 loss: 4.1041 (3.9938) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [33] [120/156] eta: 0:00:24 lr: 0.007952 min_lr: 0.007952 loss: 4.1489 (3.9872) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [33] [130/156] eta: 0:00:17 lr: 0.007952 min_lr: 0.007952 loss: 3.9516 (3.9865) weight_decay: 0.0500 (0.0500) time: 0.5881 data: 0.0010 max mem: 55573 Epoch: [33] [140/156] eta: 0:00:10 lr: 0.007951 min_lr: 0.007951 loss: 4.1491 (3.9963) weight_decay: 0.0500 (0.0500) time: 0.5849 data: 0.0008 max mem: 55573 Epoch: [33] [150/156] eta: 0:00:04 lr: 0.007951 min_lr: 0.007951 loss: 4.1722 (3.9998) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [33] [155/156] eta: 0:00:00 lr: 0.007951 min_lr: 0.007951 loss: 4.1764 (4.0051) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [33] Total time: 0:01:45 (0.6757 s / it) Averaged stats: lr: 0.007951 min_lr: 0.007951 loss: 4.1764 (3.9170) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.4096 (1.4096) acc1: 69.6615 (69.6615) acc5: 90.4948 (90.4948) time: 6.8967 data: 6.6595 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6456 (1.6381) acc1: 65.8854 (62.9760) acc5: 88.8021 (86.3360) time: 1.5258 data: 1.3320 max mem: 55573 Test: Total time: 0:00:07 (1.5694 s / it) * Acc@1 64.274 Acc@5 86.562 loss 1.614 Accuracy of the model on the 50000 test images: 64.3% Max accuracy: 64.44% Test: [0/5] eta: 0:00:36 loss: 6.9028 (6.9028) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.3576 data: 7.1211 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9323 (6.9530) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.6174 data: 1.4243 max mem: 55573 Test: Total time: 0:00:08 (1.6356 s / it) * Acc@1 0.100 Acc@5 0.508 loss 6.933 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [34] [ 0/156] eta: 0:30:39 lr: 0.007951 min_lr: 0.007951 loss: 3.7036 (3.7036) weight_decay: 0.0500 (0.0500) time: 11.7933 data: 9.4561 max mem: 55573 Epoch: [34] [ 10/156] eta: 0:04:18 lr: 0.007950 min_lr: 0.007950 loss: 3.7036 (3.6902) weight_decay: 0.0500 (0.0500) time: 1.7734 data: 0.8601 max mem: 55573 Epoch: [34] [ 20/156] eta: 0:02:44 lr: 0.007950 min_lr: 0.007950 loss: 3.8188 (3.7520) weight_decay: 0.0500 (0.0500) time: 0.6807 data: 0.0005 max mem: 55573 Epoch: [34] [ 30/156] eta: 0:02:07 lr: 0.007949 min_lr: 0.007949 loss: 3.8188 (3.7398) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [34] [ 40/156] eta: 0:01:45 lr: 0.007949 min_lr: 0.007949 loss: 4.0815 (3.8101) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [34] [ 50/156] eta: 0:01:29 lr: 0.007948 min_lr: 0.007948 loss: 4.1681 (3.8586) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [34] [ 60/156] eta: 0:01:17 lr: 0.007948 min_lr: 0.007948 loss: 3.9746 (3.8518) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0005 max mem: 55573 Epoch: [34] [ 70/156] eta: 0:01:06 lr: 0.007948 min_lr: 0.007948 loss: 3.7215 (3.8419) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0005 max mem: 55573 Epoch: [34] [ 80/156] eta: 0:00:57 lr: 0.007947 min_lr: 0.007947 loss: 3.9190 (3.8360) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [34] [ 90/156] eta: 0:00:48 lr: 0.007947 min_lr: 0.007947 loss: 3.9190 (3.8383) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [34] [100/156] eta: 0:00:40 lr: 0.007946 min_lr: 0.007946 loss: 4.0646 (3.8632) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [34] [110/156] eta: 0:00:32 lr: 0.007946 min_lr: 0.007946 loss: 4.1066 (3.8711) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [34] [120/156] eta: 0:00:25 lr: 0.007945 min_lr: 0.007945 loss: 3.9027 (3.8476) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [34] [130/156] eta: 0:00:17 lr: 0.007945 min_lr: 0.007945 loss: 3.9121 (3.8491) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0010 max mem: 55573 Epoch: [34] [140/156] eta: 0:00:10 lr: 0.007944 min_lr: 0.007944 loss: 4.1295 (3.8505) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0009 max mem: 55573 Epoch: [34] [150/156] eta: 0:00:04 lr: 0.007944 min_lr: 0.007944 loss: 3.9719 (3.8579) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [34] [155/156] eta: 0:00:00 lr: 0.007944 min_lr: 0.007944 loss: 4.1492 (3.8588) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [34] Total time: 0:01:45 (0.6787 s / it) Averaged stats: lr: 0.007944 min_lr: 0.007944 loss: 4.1492 (3.9114) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.5540 (1.5540) acc1: 70.5729 (70.5729) acc5: 90.7552 (90.7552) time: 6.7660 data: 6.5281 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6123 (1.6995) acc1: 66.7969 (63.9040) acc5: 89.7135 (86.0800) time: 1.4998 data: 1.3057 max mem: 55573 Test: Total time: 0:00:07 (1.5436 s / it) * Acc@1 64.036 Acc@5 86.398 loss 1.683 Accuracy of the model on the 50000 test images: 64.0% Max accuracy: 64.44% Test: [0/5] eta: 0:00:35 loss: 6.9027 (6.9027) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.0745 data: 6.8386 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9350 (6.9551) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5601 data: 1.3678 max mem: 55573 Test: Total time: 0:00:07 (1.5793 s / it) * Acc@1 0.100 Acc@5 0.506 loss 6.934 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [35] [ 0/156] eta: 0:33:20 lr: 0.007943 min_lr: 0.007943 loss: 4.3675 (4.3675) weight_decay: 0.0500 (0.0500) time: 12.8215 data: 10.3399 max mem: 55573 Epoch: [35] [ 10/156] eta: 0:04:26 lr: 0.007943 min_lr: 0.007943 loss: 4.1266 (4.0783) weight_decay: 0.0500 (0.0500) time: 1.8232 data: 0.9404 max mem: 55573 Epoch: [35] [ 20/156] eta: 0:02:48 lr: 0.007943 min_lr: 0.007943 loss: 4.0234 (3.9470) weight_decay: 0.0500 (0.0500) time: 0.6564 data: 0.0003 max mem: 55573 Epoch: [35] [ 30/156] eta: 0:02:09 lr: 0.007942 min_lr: 0.007942 loss: 3.8776 (3.8948) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0003 max mem: 55573 Epoch: [35] [ 40/156] eta: 0:01:46 lr: 0.007942 min_lr: 0.007942 loss: 3.8776 (3.8647) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [35] [ 50/156] eta: 0:01:30 lr: 0.007941 min_lr: 0.007941 loss: 4.0763 (3.9005) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [35] [ 60/156] eta: 0:01:17 lr: 0.007941 min_lr: 0.007941 loss: 4.0591 (3.8974) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [35] [ 70/156] eta: 0:01:07 lr: 0.007940 min_lr: 0.007940 loss: 3.8981 (3.8903) weight_decay: 0.0500 (0.0500) time: 0.5881 data: 0.0005 max mem: 55573 Epoch: [35] [ 80/156] eta: 0:00:57 lr: 0.007940 min_lr: 0.007940 loss: 3.8008 (3.8741) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [35] [ 90/156] eta: 0:00:48 lr: 0.007939 min_lr: 0.007939 loss: 3.7913 (3.8355) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [35] [100/156] eta: 0:00:40 lr: 0.007939 min_lr: 0.007939 loss: 3.8418 (3.8276) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [35] [110/156] eta: 0:00:32 lr: 0.007938 min_lr: 0.007938 loss: 3.9976 (3.8530) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [35] [120/156] eta: 0:00:25 lr: 0.007938 min_lr: 0.007938 loss: 4.0409 (3.8464) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [35] [130/156] eta: 0:00:18 lr: 0.007937 min_lr: 0.007937 loss: 4.0739 (3.8653) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0011 max mem: 55573 Epoch: [35] [140/156] eta: 0:00:10 lr: 0.007937 min_lr: 0.007937 loss: 4.0575 (3.8620) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0009 max mem: 55573 Epoch: [35] [150/156] eta: 0:00:04 lr: 0.007936 min_lr: 0.007936 loss: 4.0010 (3.8606) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [35] [155/156] eta: 0:00:00 lr: 0.007936 min_lr: 0.007936 loss: 4.0852 (3.8594) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [35] Total time: 0:01:46 (0.6822 s / it) Averaged stats: lr: 0.007936 min_lr: 0.007936 loss: 4.0852 (3.8772) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.4541 (1.4541) acc1: 68.7500 (68.7500) acc5: 91.2760 (91.2760) time: 7.1459 data: 6.9085 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6289 (1.7222) acc1: 66.4062 (63.5200) acc5: 88.6792 (86.5600) time: 1.5755 data: 1.3818 max mem: 55573 Test: Total time: 0:00:08 (1.6173 s / it) * Acc@1 63.878 Acc@5 86.618 loss 1.719 Accuracy of the model on the 50000 test images: 63.9% Max accuracy: 64.44% Test: [0/5] eta: 0:00:36 loss: 6.9032 (6.9032) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.3762 data: 7.1402 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9375 (6.9580) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.6205 data: 1.4282 max mem: 55573 Test: Total time: 0:00:08 (1.6398 s / it) * Acc@1 0.098 Acc@5 0.512 loss 6.936 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [36] [ 0/156] eta: 0:39:26 lr: 0.007936 min_lr: 0.007936 loss: 3.1379 (3.1379) weight_decay: 0.0500 (0.0500) time: 15.1713 data: 9.4588 max mem: 55573 Epoch: [36] [ 10/156] eta: 0:04:41 lr: 0.007935 min_lr: 0.007935 loss: 4.1412 (4.0353) weight_decay: 0.0500 (0.0500) time: 1.9300 data: 0.8603 max mem: 55573 Epoch: [36] [ 20/156] eta: 0:02:55 lr: 0.007935 min_lr: 0.007935 loss: 4.1412 (4.0850) weight_decay: 0.0500 (0.0500) time: 0.5987 data: 0.0004 max mem: 55573 Epoch: [36] [ 30/156] eta: 0:02:14 lr: 0.007934 min_lr: 0.007934 loss: 4.1199 (3.9811) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [36] [ 40/156] eta: 0:01:50 lr: 0.007934 min_lr: 0.007934 loss: 3.8677 (3.9185) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [36] [ 50/156] eta: 0:01:33 lr: 0.007933 min_lr: 0.007933 loss: 3.8486 (3.9306) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [36] [ 60/156] eta: 0:01:19 lr: 0.007933 min_lr: 0.007933 loss: 3.8431 (3.9116) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [36] [ 70/156] eta: 0:01:08 lr: 0.007932 min_lr: 0.007932 loss: 4.0550 (3.9217) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [36] [ 80/156] eta: 0:00:58 lr: 0.007932 min_lr: 0.007932 loss: 4.0596 (3.9311) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [36] [ 90/156] eta: 0:00:49 lr: 0.007931 min_lr: 0.007931 loss: 4.1431 (3.9288) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [36] [100/156] eta: 0:00:41 lr: 0.007930 min_lr: 0.007930 loss: 3.9484 (3.9044) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [36] [110/156] eta: 0:00:33 lr: 0.007930 min_lr: 0.007930 loss: 3.7784 (3.8993) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [36] [120/156] eta: 0:00:25 lr: 0.007929 min_lr: 0.007929 loss: 3.7784 (3.8796) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [36] [130/156] eta: 0:00:18 lr: 0.007929 min_lr: 0.007929 loss: 3.9321 (3.8808) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0010 max mem: 55573 Epoch: [36] [140/156] eta: 0:00:11 lr: 0.007928 min_lr: 0.007928 loss: 4.0445 (3.8880) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0009 max mem: 55573 Epoch: [36] [150/156] eta: 0:00:04 lr: 0.007928 min_lr: 0.007928 loss: 4.0653 (3.8918) weight_decay: 0.0500 (0.0500) time: 0.5808 data: 0.0001 max mem: 55573 Epoch: [36] [155/156] eta: 0:00:00 lr: 0.007928 min_lr: 0.007928 loss: 3.9706 (3.8935) weight_decay: 0.0500 (0.0500) time: 0.5811 data: 0.0001 max mem: 55573 Epoch: [36] Total time: 0:01:47 (0.6909 s / it) Averaged stats: lr: 0.007928 min_lr: 0.007928 loss: 3.9706 (3.8850) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.6926 (1.6926) acc1: 69.6615 (69.6615) acc5: 90.8854 (90.8854) time: 7.2076 data: 6.9701 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.9794 (2.0492) acc1: 67.0573 (63.1040) acc5: 88.9323 (85.4080) time: 1.5877 data: 1.3941 max mem: 55573 Test: Total time: 0:00:08 (1.6266 s / it) * Acc@1 63.326 Acc@5 85.734 loss 2.063 Accuracy of the model on the 50000 test images: 63.3% Max accuracy: 64.44% Test: [0/5] eta: 0:00:32 loss: 6.9048 (6.9048) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 6.5594 data: 6.3234 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9398 (6.9601) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.4612 data: 1.2688 max mem: 55573 Test: Total time: 0:00:07 (1.4804 s / it) * Acc@1 0.100 Acc@5 0.516 loss 6.937 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [37] [ 0/156] eta: 0:36:02 lr: 0.007927 min_lr: 0.007927 loss: 2.9370 (2.9370) weight_decay: 0.0500 (0.0500) time: 13.8621 data: 9.5656 max mem: 55573 Epoch: [37] [ 10/156] eta: 0:04:25 lr: 0.007927 min_lr: 0.007927 loss: 3.6975 (3.7159) weight_decay: 0.0500 (0.0500) time: 1.8167 data: 0.8700 max mem: 55573 Epoch: [37] [ 20/156] eta: 0:02:47 lr: 0.007926 min_lr: 0.007926 loss: 3.8209 (3.7205) weight_decay: 0.0500 (0.0500) time: 0.6013 data: 0.0004 max mem: 55573 Epoch: [37] [ 30/156] eta: 0:02:09 lr: 0.007926 min_lr: 0.007926 loss: 3.7957 (3.7932) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [37] [ 40/156] eta: 0:01:46 lr: 0.007925 min_lr: 0.007925 loss: 3.7957 (3.8185) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [37] [ 50/156] eta: 0:01:30 lr: 0.007925 min_lr: 0.007925 loss: 4.1329 (3.8423) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [37] [ 60/156] eta: 0:01:17 lr: 0.007924 min_lr: 0.007924 loss: 4.1590 (3.8739) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [37] [ 70/156] eta: 0:01:07 lr: 0.007924 min_lr: 0.007924 loss: 3.9903 (3.8530) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [37] [ 80/156] eta: 0:00:57 lr: 0.007923 min_lr: 0.007923 loss: 3.8599 (3.8454) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0006 max mem: 55573 Epoch: [37] [ 90/156] eta: 0:00:48 lr: 0.007922 min_lr: 0.007922 loss: 4.0552 (3.8810) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [37] [100/156] eta: 0:00:40 lr: 0.007922 min_lr: 0.007922 loss: 4.2634 (3.9202) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [37] [110/156] eta: 0:00:32 lr: 0.007921 min_lr: 0.007921 loss: 4.1435 (3.9271) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [37] [120/156] eta: 0:00:25 lr: 0.007921 min_lr: 0.007921 loss: 4.1397 (3.9294) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [37] [130/156] eta: 0:00:18 lr: 0.007920 min_lr: 0.007920 loss: 3.8708 (3.8953) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0013 max mem: 55573 Epoch: [37] [140/156] eta: 0:00:10 lr: 0.007920 min_lr: 0.007920 loss: 3.8708 (3.9098) weight_decay: 0.0500 (0.0500) time: 0.5867 data: 0.0011 max mem: 55573 Epoch: [37] [150/156] eta: 0:00:04 lr: 0.007919 min_lr: 0.007919 loss: 4.2078 (3.9152) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [37] [155/156] eta: 0:00:00 lr: 0.007919 min_lr: 0.007919 loss: 4.1963 (3.9083) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [37] Total time: 0:01:46 (0.6820 s / it) Averaged stats: lr: 0.007919 min_lr: 0.007919 loss: 4.1963 (3.8795) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.5819 (1.5819) acc1: 71.4844 (71.4844) acc5: 89.0625 (89.0625) time: 6.9539 data: 6.7165 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5819 (1.5995) acc1: 68.8802 (65.7920) acc5: 89.0625 (87.3600) time: 1.5374 data: 1.3434 max mem: 55573 Test: Total time: 0:00:07 (1.5841 s / it) * Acc@1 65.640 Acc@5 87.394 loss 1.605 Accuracy of the model on the 50000 test images: 65.6% Max accuracy: 65.64% Test: [0/5] eta: 0:00:35 loss: 6.9058 (6.9058) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.0229 data: 6.7868 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9419 (6.9621) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5498 data: 1.3575 max mem: 55573 Test: Total time: 0:00:07 (1.5639 s / it) * Acc@1 0.098 Acc@5 0.512 loss 6.938 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [38] [ 0/156] eta: 0:33:13 lr: 0.007919 min_lr: 0.007919 loss: 4.1979 (4.1979) weight_decay: 0.0500 (0.0500) time: 12.7773 data: 10.9820 max mem: 55573 Epoch: [38] [ 10/156] eta: 0:04:10 lr: 0.007918 min_lr: 0.007918 loss: 4.1669 (4.0397) weight_decay: 0.0500 (0.0500) time: 1.7128 data: 0.9989 max mem: 55573 Epoch: [38] [ 20/156] eta: 0:02:40 lr: 0.007918 min_lr: 0.007918 loss: 4.0312 (3.9746) weight_decay: 0.0500 (0.0500) time: 0.5988 data: 0.0005 max mem: 55573 Epoch: [38] [ 30/156] eta: 0:02:04 lr: 0.007917 min_lr: 0.007917 loss: 3.9099 (3.8376) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [38] [ 40/156] eta: 0:01:43 lr: 0.007916 min_lr: 0.007916 loss: 4.0551 (3.8841) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [38] [ 50/156] eta: 0:01:28 lr: 0.007916 min_lr: 0.007916 loss: 4.1520 (3.8890) weight_decay: 0.0500 (0.0500) time: 0.5964 data: 0.0004 max mem: 55573 Epoch: [38] [ 60/156] eta: 0:01:16 lr: 0.007915 min_lr: 0.007915 loss: 4.0162 (3.9116) weight_decay: 0.0500 (0.0500) time: 0.5959 data: 0.0005 max mem: 55573 Epoch: [38] [ 70/156] eta: 0:01:05 lr: 0.007915 min_lr: 0.007915 loss: 3.9426 (3.9023) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [38] [ 80/156] eta: 0:00:56 lr: 0.007914 min_lr: 0.007914 loss: 4.0145 (3.9088) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [38] [ 90/156] eta: 0:00:48 lr: 0.007913 min_lr: 0.007913 loss: 4.0887 (3.9140) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [38] [100/156] eta: 0:00:40 lr: 0.007913 min_lr: 0.007913 loss: 3.9455 (3.8951) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [38] [110/156] eta: 0:00:32 lr: 0.007912 min_lr: 0.007912 loss: 3.7328 (3.8746) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [38] [120/156] eta: 0:00:24 lr: 0.007912 min_lr: 0.007912 loss: 3.7328 (3.8632) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [38] [130/156] eta: 0:00:17 lr: 0.007911 min_lr: 0.007911 loss: 3.7605 (3.8621) weight_decay: 0.0500 (0.0500) time: 0.5879 data: 0.0010 max mem: 55573 Epoch: [38] [140/156] eta: 0:00:10 lr: 0.007910 min_lr: 0.007910 loss: 4.1293 (3.8866) weight_decay: 0.0500 (0.0500) time: 0.5847 data: 0.0008 max mem: 55573 Epoch: [38] [150/156] eta: 0:00:04 lr: 0.007910 min_lr: 0.007910 loss: 4.1898 (3.8950) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [38] [155/156] eta: 0:00:00 lr: 0.007910 min_lr: 0.007910 loss: 4.1293 (3.8927) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [38] Total time: 0:01:45 (0.6757 s / it) Averaged stats: lr: 0.007910 min_lr: 0.007910 loss: 4.1293 (3.8546) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.5498 (1.5498) acc1: 69.0104 (69.0104) acc5: 91.4062 (91.4062) time: 6.8416 data: 6.6043 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6334 (1.6642) acc1: 66.9271 (64.0640) acc5: 90.6250 (87.4240) time: 1.5148 data: 1.3209 max mem: 55573 Test: Total time: 0:00:07 (1.5559 s / it) * Acc@1 64.618 Acc@5 87.266 loss 1.667 Accuracy of the model on the 50000 test images: 64.6% Max accuracy: 65.64% Test: [0/5] eta: 0:00:36 loss: 6.9070 (6.9070) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.3092 data: 7.0723 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9439 (6.9640) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.6078 data: 1.4146 max mem: 55573 Test: Total time: 0:00:08 (1.6283 s / it) * Acc@1 0.096 Acc@5 0.508 loss 6.939 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [39] [ 0/156] eta: 0:37:34 lr: 0.007909 min_lr: 0.007909 loss: 3.3552 (3.3552) weight_decay: 0.0500 (0.0500) time: 14.4490 data: 9.5486 max mem: 55573 Epoch: [39] [ 10/156] eta: 0:04:30 lr: 0.007909 min_lr: 0.007909 loss: 4.0679 (3.8349) weight_decay: 0.0500 (0.0500) time: 1.8534 data: 0.8685 max mem: 55573 Epoch: [39] [ 20/156] eta: 0:02:50 lr: 0.007908 min_lr: 0.007908 loss: 3.8955 (3.8275) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0005 max mem: 55573 Epoch: [39] [ 30/156] eta: 0:02:11 lr: 0.007908 min_lr: 0.007908 loss: 3.5375 (3.7250) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [39] [ 40/156] eta: 0:01:47 lr: 0.007907 min_lr: 0.007907 loss: 3.5756 (3.7484) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [39] [ 50/156] eta: 0:01:31 lr: 0.007906 min_lr: 0.007906 loss: 3.9392 (3.7852) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [39] [ 60/156] eta: 0:01:18 lr: 0.007906 min_lr: 0.007906 loss: 3.6848 (3.7381) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [39] [ 70/156] eta: 0:01:07 lr: 0.007905 min_lr: 0.007905 loss: 3.6622 (3.7603) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [39] [ 80/156] eta: 0:00:57 lr: 0.007905 min_lr: 0.007905 loss: 3.9881 (3.7718) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [39] [ 90/156] eta: 0:00:49 lr: 0.007904 min_lr: 0.007904 loss: 4.0309 (3.8003) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [39] [100/156] eta: 0:00:40 lr: 0.007903 min_lr: 0.007903 loss: 3.9594 (3.7999) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [39] [110/156] eta: 0:00:32 lr: 0.007903 min_lr: 0.007903 loss: 4.0513 (3.8117) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [39] [120/156] eta: 0:00:25 lr: 0.007902 min_lr: 0.007902 loss: 4.0791 (3.8292) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [39] [130/156] eta: 0:00:18 lr: 0.007901 min_lr: 0.007901 loss: 3.6313 (3.7943) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0010 max mem: 55573 Epoch: [39] [140/156] eta: 0:00:11 lr: 0.007901 min_lr: 0.007901 loss: 3.7127 (3.8032) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [39] [150/156] eta: 0:00:04 lr: 0.007900 min_lr: 0.007900 loss: 3.8434 (3.7917) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [39] [155/156] eta: 0:00:00 lr: 0.007900 min_lr: 0.007900 loss: 3.9437 (3.8049) weight_decay: 0.0500 (0.0500) time: 0.5811 data: 0.0001 max mem: 55573 Epoch: [39] Total time: 0:01:46 (0.6853 s / it) Averaged stats: lr: 0.007900 min_lr: 0.007900 loss: 3.9437 (3.8457) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.5038 (1.5038) acc1: 72.2656 (72.2656) acc5: 91.5365 (91.5365) time: 6.8858 data: 6.6469 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5626 (1.6040) acc1: 67.4479 (65.3120) acc5: 90.4948 (87.3920) time: 1.5238 data: 1.3295 max mem: 55573 Test: Total time: 0:00:07 (1.5738 s / it) * Acc@1 66.260 Acc@5 87.454 loss 1.582 Accuracy of the model on the 50000 test images: 66.3% Max accuracy: 66.26% Test: [0/5] eta: 0:00:35 loss: 6.9077 (6.9077) acc1: 0.0000 (0.0000) acc5: 0.3906 (0.3906) time: 7.1500 data: 6.9141 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9471 (6.9661) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5752 data: 1.3829 max mem: 55573 Test: Total time: 0:00:07 (1.5918 s / it) * Acc@1 0.096 Acc@5 0.504 loss 6.940 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [40] [ 0/156] eta: 0:35:27 lr: 0.007900 min_lr: 0.007900 loss: 3.8804 (3.8804) weight_decay: 0.0500 (0.0500) time: 13.6359 data: 11.0192 max mem: 55573 Epoch: [40] [ 10/156] eta: 0:04:19 lr: 0.007899 min_lr: 0.007899 loss: 4.2174 (4.1050) weight_decay: 0.0500 (0.0500) time: 1.7796 data: 1.0022 max mem: 55573 Epoch: [40] [ 20/156] eta: 0:02:45 lr: 0.007898 min_lr: 0.007898 loss: 4.0518 (4.0012) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [40] [ 30/156] eta: 0:02:07 lr: 0.007898 min_lr: 0.007898 loss: 4.0356 (4.0520) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [40] [ 40/156] eta: 0:01:45 lr: 0.007897 min_lr: 0.007897 loss: 4.0488 (3.9868) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [40] [ 50/156] eta: 0:01:29 lr: 0.007896 min_lr: 0.007896 loss: 3.9284 (3.9848) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [40] [ 60/156] eta: 0:01:17 lr: 0.007896 min_lr: 0.007896 loss: 4.1112 (4.0125) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [40] [ 70/156] eta: 0:01:06 lr: 0.007895 min_lr: 0.007895 loss: 4.2110 (4.0452) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [40] [ 80/156] eta: 0:00:57 lr: 0.007895 min_lr: 0.007895 loss: 4.0877 (4.0027) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [40] [ 90/156] eta: 0:00:48 lr: 0.007894 min_lr: 0.007894 loss: 3.9907 (3.9923) weight_decay: 0.0500 (0.0500) time: 0.5884 data: 0.0005 max mem: 55573 Epoch: [40] [100/156] eta: 0:00:40 lr: 0.007893 min_lr: 0.007893 loss: 3.9640 (3.9788) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [40] [110/156] eta: 0:00:32 lr: 0.007893 min_lr: 0.007893 loss: 4.0080 (3.9641) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [40] [120/156] eta: 0:00:25 lr: 0.007892 min_lr: 0.007892 loss: 4.0566 (3.9525) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [40] [130/156] eta: 0:00:17 lr: 0.007891 min_lr: 0.007891 loss: 4.0878 (3.9516) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0010 max mem: 55573 Epoch: [40] [140/156] eta: 0:00:10 lr: 0.007891 min_lr: 0.007891 loss: 4.0928 (3.9599) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [40] [150/156] eta: 0:00:04 lr: 0.007890 min_lr: 0.007890 loss: 4.0776 (3.9483) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [40] [155/156] eta: 0:00:00 lr: 0.007890 min_lr: 0.007890 loss: 3.7692 (3.9492) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [40] Total time: 0:01:46 (0.6797 s / it) Averaged stats: lr: 0.007890 min_lr: 0.007890 loss: 3.7692 (3.8451) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.6127 (1.6127) acc1: 70.7031 (70.7031) acc5: 91.0156 (91.0156) time: 6.8432 data: 6.6053 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.7863 (1.8060) acc1: 66.4062 (63.7760) acc5: 88.4115 (85.9520) time: 1.5151 data: 1.3212 max mem: 55573 Test: Total time: 0:00:07 (1.5579 s / it) * Acc@1 64.718 Acc@5 86.814 loss 1.807 Accuracy of the model on the 50000 test images: 64.7% Max accuracy: 66.26% Test: [0/5] eta: 0:00:35 loss: 6.9092 (6.9092) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.1908 data: 6.9549 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9507 (6.9685) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5835 data: 1.3911 max mem: 55573 Test: Total time: 0:00:08 (1.6063 s / it) * Acc@1 0.096 Acc@5 0.508 loss 6.942 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [41] [ 0/156] eta: 0:37:09 lr: 0.007889 min_lr: 0.007889 loss: 4.1249 (4.1249) weight_decay: 0.0500 (0.0500) time: 14.2924 data: 12.0192 max mem: 55573 Epoch: [41] [ 10/156] eta: 0:04:30 lr: 0.007889 min_lr: 0.007889 loss: 3.8268 (3.8527) weight_decay: 0.0500 (0.0500) time: 1.8549 data: 1.0930 max mem: 55573 Epoch: [41] [ 20/156] eta: 0:02:50 lr: 0.007888 min_lr: 0.007888 loss: 3.7609 (3.8465) weight_decay: 0.0500 (0.0500) time: 0.6016 data: 0.0004 max mem: 55573 Epoch: [41] [ 30/156] eta: 0:02:11 lr: 0.007887 min_lr: 0.007887 loss: 4.0573 (3.9043) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [41] [ 40/156] eta: 0:01:47 lr: 0.007887 min_lr: 0.007887 loss: 3.7810 (3.8127) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [41] [ 50/156] eta: 0:01:31 lr: 0.007886 min_lr: 0.007886 loss: 3.6182 (3.7700) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [41] [ 60/156] eta: 0:01:18 lr: 0.007885 min_lr: 0.007885 loss: 3.9982 (3.7722) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [41] [ 70/156] eta: 0:01:07 lr: 0.007885 min_lr: 0.007885 loss: 3.8091 (3.7366) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [41] [ 80/156] eta: 0:00:57 lr: 0.007884 min_lr: 0.007884 loss: 3.4140 (3.7205) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [41] [ 90/156] eta: 0:00:49 lr: 0.007883 min_lr: 0.007883 loss: 3.8200 (3.7380) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [41] [100/156] eta: 0:00:40 lr: 0.007883 min_lr: 0.007883 loss: 3.9871 (3.7432) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [41] [110/156] eta: 0:00:32 lr: 0.007882 min_lr: 0.007882 loss: 3.9133 (3.7457) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [41] [120/156] eta: 0:00:25 lr: 0.007881 min_lr: 0.007881 loss: 3.9396 (3.7603) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [41] [130/156] eta: 0:00:18 lr: 0.007881 min_lr: 0.007881 loss: 3.9396 (3.7575) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0010 max mem: 55573 Epoch: [41] [140/156] eta: 0:00:11 lr: 0.007880 min_lr: 0.007880 loss: 4.0687 (3.7703) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0009 max mem: 55573 Epoch: [41] [150/156] eta: 0:00:04 lr: 0.007879 min_lr: 0.007879 loss: 4.0816 (3.7832) weight_decay: 0.0500 (0.0500) time: 0.5831 data: 0.0001 max mem: 55573 Epoch: [41] [155/156] eta: 0:00:00 lr: 0.007879 min_lr: 0.007879 loss: 4.1181 (3.7949) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [41] Total time: 0:01:46 (0.6848 s / it) Averaged stats: lr: 0.007879 min_lr: 0.007879 loss: 4.1181 (3.8207) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.6496 (1.6496) acc1: 69.6615 (69.6615) acc5: 90.4948 (90.4948) time: 7.1259 data: 6.8889 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.7219 (1.7574) acc1: 67.3177 (63.9360) acc5: 90.1042 (87.0080) time: 1.5715 data: 1.3779 max mem: 55573 Test: Total time: 0:00:08 (1.6225 s / it) * Acc@1 65.092 Acc@5 86.582 loss 1.736 Accuracy of the model on the 50000 test images: 65.1% Max accuracy: 66.26% Test: [0/5] eta: 0:00:35 loss: 6.9092 (6.9092) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.1588 data: 6.9228 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9546 (6.9709) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5770 data: 1.3847 max mem: 55573 Test: Total time: 0:00:07 (1.5947 s / it) * Acc@1 0.094 Acc@5 0.506 loss 6.943 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [42] [ 0/156] eta: 0:32:21 lr: 0.007879 min_lr: 0.007879 loss: 3.3599 (3.3599) weight_decay: 0.0500 (0.0500) time: 12.4469 data: 10.7423 max mem: 55573 Epoch: [42] [ 10/156] eta: 0:04:08 lr: 0.007878 min_lr: 0.007878 loss: 3.4847 (3.7164) weight_decay: 0.0500 (0.0500) time: 1.7049 data: 0.9770 max mem: 55573 Epoch: [42] [ 20/156] eta: 0:02:40 lr: 0.007877 min_lr: 0.007877 loss: 3.8078 (3.9080) weight_decay: 0.0500 (0.0500) time: 0.6144 data: 0.0006 max mem: 55573 Epoch: [42] [ 30/156] eta: 0:02:04 lr: 0.007877 min_lr: 0.007877 loss: 3.9619 (3.8031) weight_decay: 0.0500 (0.0500) time: 0.5946 data: 0.0005 max mem: 55573 Epoch: [42] [ 40/156] eta: 0:01:43 lr: 0.007876 min_lr: 0.007876 loss: 3.9197 (3.8539) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [42] [ 50/156] eta: 0:01:28 lr: 0.007875 min_lr: 0.007875 loss: 3.9707 (3.8455) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [42] [ 60/156] eta: 0:01:16 lr: 0.007875 min_lr: 0.007875 loss: 3.6706 (3.8037) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [42] [ 70/156] eta: 0:01:05 lr: 0.007874 min_lr: 0.007874 loss: 3.9559 (3.8376) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [42] [ 80/156] eta: 0:00:56 lr: 0.007873 min_lr: 0.007873 loss: 3.9559 (3.8100) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [42] [ 90/156] eta: 0:00:47 lr: 0.007872 min_lr: 0.007872 loss: 3.9370 (3.8197) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [42] [100/156] eta: 0:00:39 lr: 0.007872 min_lr: 0.007872 loss: 3.8672 (3.8194) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [42] [110/156] eta: 0:00:32 lr: 0.007871 min_lr: 0.007871 loss: 3.8865 (3.8132) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [42] [120/156] eta: 0:00:24 lr: 0.007870 min_lr: 0.007870 loss: 3.8865 (3.7990) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [42] [130/156] eta: 0:00:17 lr: 0.007869 min_lr: 0.007869 loss: 3.7478 (3.7853) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0013 max mem: 55573 Epoch: [42] [140/156] eta: 0:00:10 lr: 0.007869 min_lr: 0.007869 loss: 3.6228 (3.7706) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0011 max mem: 55573 Epoch: [42] [150/156] eta: 0:00:04 lr: 0.007868 min_lr: 0.007868 loss: 3.8377 (3.7904) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [42] [155/156] eta: 0:00:00 lr: 0.007868 min_lr: 0.007868 loss: 4.0923 (3.7989) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [42] Total time: 0:01:45 (0.6753 s / it) Averaged stats: lr: 0.007868 min_lr: 0.007868 loss: 4.0923 (3.8069) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.4277 (1.4277) acc1: 72.1354 (72.1354) acc5: 92.0573 (92.0573) time: 7.0471 data: 6.8097 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6825 (1.6366) acc1: 66.5365 (65.1840) acc5: 90.1042 (88.1280) time: 1.5560 data: 1.3620 max mem: 55573 Test: Total time: 0:00:08 (1.6043 s / it) * Acc@1 65.844 Acc@5 87.834 loss 1.634 Accuracy of the model on the 50000 test images: 65.8% Max accuracy: 66.26% Test: [0/5] eta: 0:00:35 loss: 6.9090 (6.9090) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.0537 data: 6.8177 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9590 (6.9734) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5561 data: 1.3636 max mem: 55573 Test: Total time: 0:00:07 (1.5722 s / it) * Acc@1 0.094 Acc@5 0.504 loss 6.945 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [43] [ 0/156] eta: 0:36:56 lr: 0.007868 min_lr: 0.007868 loss: 3.6134 (3.6134) weight_decay: 0.0500 (0.0500) time: 14.2053 data: 11.3915 max mem: 55573 Epoch: [43] [ 10/156] eta: 0:04:29 lr: 0.007867 min_lr: 0.007867 loss: 3.9085 (3.9599) weight_decay: 0.0500 (0.0500) time: 1.8437 data: 1.0359 max mem: 55573 Epoch: [43] [ 20/156] eta: 0:02:49 lr: 0.007866 min_lr: 0.007866 loss: 3.9973 (3.9403) weight_decay: 0.0500 (0.0500) time: 0.5993 data: 0.0004 max mem: 55573 Epoch: [43] [ 30/156] eta: 0:02:10 lr: 0.007865 min_lr: 0.007865 loss: 3.9030 (3.8935) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [43] [ 40/156] eta: 0:01:47 lr: 0.007865 min_lr: 0.007865 loss: 3.3381 (3.7191) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [43] [ 50/156] eta: 0:01:31 lr: 0.007864 min_lr: 0.007864 loss: 3.3260 (3.7419) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [43] [ 60/156] eta: 0:01:18 lr: 0.007863 min_lr: 0.007863 loss: 3.7889 (3.7235) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [43] [ 70/156] eta: 0:01:07 lr: 0.007862 min_lr: 0.007862 loss: 3.8261 (3.7666) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [43] [ 80/156] eta: 0:00:57 lr: 0.007862 min_lr: 0.007862 loss: 4.1444 (3.7996) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [43] [ 90/156] eta: 0:00:48 lr: 0.007861 min_lr: 0.007861 loss: 4.1319 (3.8055) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0004 max mem: 55573 Epoch: [43] [100/156] eta: 0:00:40 lr: 0.007860 min_lr: 0.007860 loss: 4.0954 (3.8022) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [43] [110/156] eta: 0:00:32 lr: 0.007859 min_lr: 0.007859 loss: 3.8918 (3.8038) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [43] [120/156] eta: 0:00:25 lr: 0.007859 min_lr: 0.007859 loss: 3.7965 (3.8094) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [43] [130/156] eta: 0:00:18 lr: 0.007858 min_lr: 0.007858 loss: 3.7810 (3.7959) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [43] [140/156] eta: 0:00:11 lr: 0.007857 min_lr: 0.007857 loss: 3.7810 (3.7975) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0009 max mem: 55573 Epoch: [43] [150/156] eta: 0:00:04 lr: 0.007856 min_lr: 0.007856 loss: 3.5130 (3.7754) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [43] [155/156] eta: 0:00:00 lr: 0.007856 min_lr: 0.007856 loss: 3.4231 (3.7791) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [43] Total time: 0:01:46 (0.6843 s / it) Averaged stats: lr: 0.007856 min_lr: 0.007856 loss: 3.4231 (3.7971) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.5428 (1.5428) acc1: 71.4844 (71.4844) acc5: 91.2760 (91.2760) time: 7.2828 data: 7.0454 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5598 (1.6263) acc1: 69.6615 (65.4400) acc5: 89.8438 (87.4240) time: 1.6031 data: 1.4092 max mem: 55573 Test: Total time: 0:00:08 (1.6587 s / it) * Acc@1 65.914 Acc@5 87.544 loss 1.654 Accuracy of the model on the 50000 test images: 65.9% Max accuracy: 66.26% Test: [0/5] eta: 0:00:35 loss: 6.9091 (6.9091) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.0492 data: 6.8133 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9645 (6.9763) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5551 data: 1.3628 max mem: 55573 Test: Total time: 0:00:07 (1.5720 s / it) * Acc@1 0.098 Acc@5 0.506 loss 6.947 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [44] [ 0/156] eta: 0:34:14 lr: 0.007856 min_lr: 0.007856 loss: 3.4522 (3.4522) weight_decay: 0.0500 (0.0500) time: 13.1678 data: 10.4710 max mem: 55573 Epoch: [44] [ 10/156] eta: 0:04:15 lr: 0.007855 min_lr: 0.007855 loss: 3.8639 (3.8579) weight_decay: 0.0500 (0.0500) time: 1.7519 data: 0.9523 max mem: 55573 Epoch: [44] [ 20/156] eta: 0:02:43 lr: 0.007854 min_lr: 0.007854 loss: 3.7823 (3.7451) weight_decay: 0.0500 (0.0500) time: 0.6005 data: 0.0005 max mem: 55573 Epoch: [44] [ 30/156] eta: 0:02:06 lr: 0.007854 min_lr: 0.007854 loss: 3.8285 (3.7687) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [44] [ 40/156] eta: 0:01:44 lr: 0.007853 min_lr: 0.007853 loss: 3.8285 (3.7248) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [44] [ 50/156] eta: 0:01:29 lr: 0.007852 min_lr: 0.007852 loss: 3.7053 (3.7326) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [44] [ 60/156] eta: 0:01:17 lr: 0.007851 min_lr: 0.007851 loss: 3.6318 (3.7254) weight_decay: 0.0500 (0.0500) time: 0.5985 data: 0.0004 max mem: 55573 Epoch: [44] [ 70/156] eta: 0:01:06 lr: 0.007850 min_lr: 0.007850 loss: 3.7081 (3.7544) weight_decay: 0.0500 (0.0500) time: 0.5980 data: 0.0004 max mem: 55573 Epoch: [44] [ 80/156] eta: 0:00:56 lr: 0.007850 min_lr: 0.007850 loss: 3.7085 (3.7320) weight_decay: 0.0500 (0.0500) time: 0.5878 data: 0.0004 max mem: 55573 Epoch: [44] [ 90/156] eta: 0:00:48 lr: 0.007849 min_lr: 0.007849 loss: 3.7085 (3.7114) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [44] [100/156] eta: 0:00:40 lr: 0.007848 min_lr: 0.007848 loss: 3.8692 (3.7306) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [44] [110/156] eta: 0:00:32 lr: 0.007847 min_lr: 0.007847 loss: 3.8375 (3.7190) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [44] [120/156] eta: 0:00:25 lr: 0.007847 min_lr: 0.007847 loss: 3.7289 (3.7286) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [44] [130/156] eta: 0:00:17 lr: 0.007846 min_lr: 0.007846 loss: 3.8847 (3.7273) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0010 max mem: 55573 Epoch: [44] [140/156] eta: 0:00:10 lr: 0.007845 min_lr: 0.007845 loss: 3.4793 (3.7026) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0008 max mem: 55573 Epoch: [44] [150/156] eta: 0:00:04 lr: 0.007844 min_lr: 0.007844 loss: 3.5604 (3.7078) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [44] [155/156] eta: 0:00:00 lr: 0.007844 min_lr: 0.007844 loss: 3.7624 (3.7120) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [44] Total time: 0:01:45 (0.6782 s / it) Averaged stats: lr: 0.007844 min_lr: 0.007844 loss: 3.7624 (3.7888) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2774 (1.2774) acc1: 73.3073 (73.3073) acc5: 93.6198 (93.6198) time: 7.1497 data: 6.9122 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5526 (1.5871) acc1: 68.2292 (66.7200) acc5: 89.8438 (87.8080) time: 1.5765 data: 1.3825 max mem: 55573 Test: Total time: 0:00:08 (1.6173 s / it) * Acc@1 66.512 Acc@5 87.896 loss 1.589 Accuracy of the model on the 50000 test images: 66.5% Max accuracy: 66.51% Test: [0/5] eta: 0:00:33 loss: 6.9095 (6.9095) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.7734 data: 6.5373 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9705 (6.9799) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5000 data: 1.3076 max mem: 55573 Test: Total time: 0:00:07 (1.5201 s / it) * Acc@1 0.100 Acc@5 0.508 loss 6.949 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [45] [ 0/156] eta: 0:37:15 lr: 0.007844 min_lr: 0.007844 loss: 3.4263 (3.4263) weight_decay: 0.0500 (0.0500) time: 14.3271 data: 10.5767 max mem: 55573 Epoch: [45] [ 10/156] eta: 0:04:28 lr: 0.007843 min_lr: 0.007843 loss: 4.0582 (3.7897) weight_decay: 0.0500 (0.0500) time: 1.8414 data: 0.9619 max mem: 55573 Epoch: [45] [ 20/156] eta: 0:02:49 lr: 0.007842 min_lr: 0.007842 loss: 3.7817 (3.7275) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [45] [ 30/156] eta: 0:02:10 lr: 0.007841 min_lr: 0.007841 loss: 3.9123 (3.7163) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [45] [ 40/156] eta: 0:01:47 lr: 0.007840 min_lr: 0.007840 loss: 4.0048 (3.7358) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [45] [ 50/156] eta: 0:01:31 lr: 0.007840 min_lr: 0.007840 loss: 3.9427 (3.7600) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0005 max mem: 55573 Epoch: [45] [ 60/156] eta: 0:01:18 lr: 0.007839 min_lr: 0.007839 loss: 3.7522 (3.6987) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [45] [ 70/156] eta: 0:01:07 lr: 0.007838 min_lr: 0.007838 loss: 3.6746 (3.7187) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [45] [ 80/156] eta: 0:00:57 lr: 0.007837 min_lr: 0.007837 loss: 3.8787 (3.7462) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [45] [ 90/156] eta: 0:00:48 lr: 0.007836 min_lr: 0.007836 loss: 3.5823 (3.7095) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [45] [100/156] eta: 0:00:40 lr: 0.007836 min_lr: 0.007836 loss: 3.4573 (3.7256) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [45] [110/156] eta: 0:00:32 lr: 0.007835 min_lr: 0.007835 loss: 4.0694 (3.7364) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [45] [120/156] eta: 0:00:25 lr: 0.007834 min_lr: 0.007834 loss: 3.8887 (3.7336) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [45] [130/156] eta: 0:00:18 lr: 0.007833 min_lr: 0.007833 loss: 3.9425 (3.7609) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0012 max mem: 55573 Epoch: [45] [140/156] eta: 0:00:10 lr: 0.007832 min_lr: 0.007832 loss: 4.3430 (3.8006) weight_decay: 0.0500 (0.0500) time: 0.5850 data: 0.0010 max mem: 55573 Epoch: [45] [150/156] eta: 0:00:04 lr: 0.007832 min_lr: 0.007832 loss: 4.0825 (3.8142) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [45] [155/156] eta: 0:00:00 lr: 0.007831 min_lr: 0.007831 loss: 4.0524 (3.8110) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [45] Total time: 0:01:46 (0.6838 s / it) Averaged stats: lr: 0.007831 min_lr: 0.007831 loss: 4.0524 (3.7865) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.4144 (1.4144) acc1: 72.6562 (72.6562) acc5: 91.9271 (91.9271) time: 6.8671 data: 6.6297 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6038 (1.6222) acc1: 68.8802 (66.4320) acc5: 91.9271 (88.8320) time: 1.5188 data: 1.3260 max mem: 55573 Test: Total time: 0:00:07 (1.5443 s / it) * Acc@1 67.310 Acc@5 88.576 loss 1.622 Accuracy of the model on the 50000 test images: 67.3% Max accuracy: 67.31% Test: [0/5] eta: 0:00:33 loss: 6.9115 (6.9115) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.6219 data: 6.3858 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9759 (6.9846) acc1: 0.0000 (0.0960) acc5: 0.1302 (0.5440) time: 1.4697 data: 1.2773 max mem: 55573 Test: Total time: 0:00:07 (1.4874 s / it) * Acc@1 0.100 Acc@5 0.510 loss 6.952 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [46] [ 0/156] eta: 0:33:18 lr: 0.007831 min_lr: 0.007831 loss: 3.8008 (3.8008) weight_decay: 0.0500 (0.0500) time: 12.8108 data: 9.4432 max mem: 55573 Epoch: [46] [ 10/156] eta: 0:04:11 lr: 0.007830 min_lr: 0.007830 loss: 3.5282 (3.3861) weight_decay: 0.0500 (0.0500) time: 1.7250 data: 0.8590 max mem: 55573 Epoch: [46] [ 20/156] eta: 0:02:41 lr: 0.007829 min_lr: 0.007829 loss: 3.8772 (3.6498) weight_decay: 0.0500 (0.0500) time: 0.6026 data: 0.0006 max mem: 55573 Epoch: [46] [ 30/156] eta: 0:02:05 lr: 0.007829 min_lr: 0.007829 loss: 3.8772 (3.6380) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [46] [ 40/156] eta: 0:01:43 lr: 0.007828 min_lr: 0.007828 loss: 3.7414 (3.6873) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [46] [ 50/156] eta: 0:01:28 lr: 0.007827 min_lr: 0.007827 loss: 3.8049 (3.7028) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [46] [ 60/156] eta: 0:01:16 lr: 0.007826 min_lr: 0.007826 loss: 3.9236 (3.7272) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [46] [ 70/156] eta: 0:01:05 lr: 0.007825 min_lr: 0.007825 loss: 3.8498 (3.7362) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [46] [ 80/156] eta: 0:00:56 lr: 0.007824 min_lr: 0.007824 loss: 3.8740 (3.7547) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [46] [ 90/156] eta: 0:00:48 lr: 0.007823 min_lr: 0.007823 loss: 4.0488 (3.7945) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [46] [100/156] eta: 0:00:39 lr: 0.007823 min_lr: 0.007823 loss: 4.0488 (3.8006) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [46] [110/156] eta: 0:00:32 lr: 0.007822 min_lr: 0.007822 loss: 3.5977 (3.7634) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [46] [120/156] eta: 0:00:24 lr: 0.007821 min_lr: 0.007821 loss: 3.8292 (3.7690) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0004 max mem: 55573 Epoch: [46] [130/156] eta: 0:00:17 lr: 0.007820 min_lr: 0.007820 loss: 3.8490 (3.7756) weight_decay: 0.0500 (0.0500) time: 0.5884 data: 0.0010 max mem: 55573 Epoch: [46] [140/156] eta: 0:00:10 lr: 0.007819 min_lr: 0.007819 loss: 3.8978 (3.7867) weight_decay: 0.0500 (0.0500) time: 0.5853 data: 0.0008 max mem: 55573 Epoch: [46] [150/156] eta: 0:00:04 lr: 0.007818 min_lr: 0.007818 loss: 3.6464 (3.7801) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [46] [155/156] eta: 0:00:00 lr: 0.007818 min_lr: 0.007818 loss: 3.6114 (3.7781) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [46] Total time: 0:01:45 (0.6752 s / it) Averaged stats: lr: 0.007818 min_lr: 0.007818 loss: 3.6114 (3.7811) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.4988 (1.4988) acc1: 70.7031 (70.7031) acc5: 92.3177 (92.3177) time: 7.1342 data: 6.8967 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5418 (1.6788) acc1: 69.1406 (65.1840) acc5: 90.5660 (88.6400) time: 1.5732 data: 1.3794 max mem: 55573 Test: Total time: 0:00:08 (1.6137 s / it) * Acc@1 66.422 Acc@5 88.210 loss 1.644 Accuracy of the model on the 50000 test images: 66.4% Max accuracy: 67.31% Test: [0/5] eta: 0:00:34 loss: 6.9129 (6.9129) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9317 data: 6.6957 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9826 (6.9900) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.5316 data: 1.3392 max mem: 55573 Test: Total time: 0:00:07 (1.5556 s / it) * Acc@1 0.100 Acc@5 0.514 loss 6.957 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [47] [ 0/156] eta: 0:37:14 lr: 0.007818 min_lr: 0.007818 loss: 3.8641 (3.8641) weight_decay: 0.0500 (0.0500) time: 14.3239 data: 7.8135 max mem: 55573 Epoch: [47] [ 10/156] eta: 0:04:28 lr: 0.007817 min_lr: 0.007817 loss: 3.9462 (3.8788) weight_decay: 0.0500 (0.0500) time: 1.8390 data: 0.7107 max mem: 55573 Epoch: [47] [ 20/156] eta: 0:02:49 lr: 0.007816 min_lr: 0.007816 loss: 3.8319 (3.8624) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [47] [ 30/156] eta: 0:02:10 lr: 0.007815 min_lr: 0.007815 loss: 3.8741 (3.8476) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [47] [ 40/156] eta: 0:01:47 lr: 0.007814 min_lr: 0.007814 loss: 3.8054 (3.7832) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [47] [ 50/156] eta: 0:01:31 lr: 0.007814 min_lr: 0.007814 loss: 3.7891 (3.8088) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [47] [ 60/156] eta: 0:01:18 lr: 0.007813 min_lr: 0.007813 loss: 3.9941 (3.8268) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [47] [ 70/156] eta: 0:01:07 lr: 0.007812 min_lr: 0.007812 loss: 3.7265 (3.7547) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [47] [ 80/156] eta: 0:00:57 lr: 0.007811 min_lr: 0.007811 loss: 3.7265 (3.7706) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [47] [ 90/156] eta: 0:00:48 lr: 0.007810 min_lr: 0.007810 loss: 3.8841 (3.7701) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [47] [100/156] eta: 0:00:40 lr: 0.007809 min_lr: 0.007809 loss: 3.9108 (3.7765) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [47] [110/156] eta: 0:00:32 lr: 0.007808 min_lr: 0.007808 loss: 3.6263 (3.7491) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [47] [120/156] eta: 0:00:25 lr: 0.007807 min_lr: 0.007807 loss: 3.5549 (3.7386) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [47] [130/156] eta: 0:00:18 lr: 0.007807 min_lr: 0.007807 loss: 3.9562 (3.7520) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0011 max mem: 55573 Epoch: [47] [140/156] eta: 0:00:10 lr: 0.007806 min_lr: 0.007806 loss: 4.0011 (3.7645) weight_decay: 0.0500 (0.0500) time: 0.5839 data: 0.0009 max mem: 55573 Epoch: [47] [150/156] eta: 0:00:04 lr: 0.007805 min_lr: 0.007805 loss: 3.8608 (3.7725) weight_decay: 0.0500 (0.0500) time: 0.5810 data: 0.0001 max mem: 55573 Epoch: [47] [155/156] eta: 0:00:00 lr: 0.007804 min_lr: 0.007804 loss: 3.7052 (3.7593) weight_decay: 0.0500 (0.0500) time: 0.5810 data: 0.0001 max mem: 55573 Epoch: [47] Total time: 0:01:46 (0.6835 s / it) Averaged stats: lr: 0.007804 min_lr: 0.007804 loss: 3.7052 (3.7670) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.4720 (1.4720) acc1: 71.0938 (71.0938) acc5: 92.0573 (92.0573) time: 7.3096 data: 7.0722 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5652 (1.6448) acc1: 68.4896 (65.0880) acc5: 90.2344 (86.8800) time: 1.6083 data: 1.4145 max mem: 55573 Test: Total time: 0:00:08 (1.6513 s / it) * Acc@1 65.680 Acc@5 87.350 loss 1.645 Accuracy of the model on the 50000 test images: 65.7% Max accuracy: 67.31% Test: [0/5] eta: 0:00:33 loss: 6.9139 (6.9139) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.6981 data: 6.4622 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9910 (6.9948) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.5120) time: 1.4849 data: 1.2926 max mem: 55573 Test: Total time: 0:00:07 (1.5021 s / it) * Acc@1 0.100 Acc@5 0.510 loss 6.960 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [48] [ 0/156] eta: 0:37:51 lr: 0.007804 min_lr: 0.007804 loss: 4.0850 (4.0850) weight_decay: 0.0500 (0.0500) time: 14.5591 data: 10.2769 max mem: 55573 Epoch: [48] [ 10/156] eta: 0:04:31 lr: 0.007803 min_lr: 0.007803 loss: 4.0375 (3.7508) weight_decay: 0.0500 (0.0500) time: 1.8593 data: 0.9346 max mem: 55573 Epoch: [48] [ 20/156] eta: 0:02:50 lr: 0.007802 min_lr: 0.007802 loss: 3.7175 (3.6487) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [48] [ 30/156] eta: 0:02:11 lr: 0.007802 min_lr: 0.007802 loss: 3.6595 (3.6747) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [48] [ 40/156] eta: 0:01:47 lr: 0.007801 min_lr: 0.007801 loss: 3.8061 (3.6805) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [48] [ 50/156] eta: 0:01:31 lr: 0.007800 min_lr: 0.007800 loss: 3.6735 (3.6651) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [48] [ 60/156] eta: 0:01:18 lr: 0.007799 min_lr: 0.007799 loss: 3.8399 (3.7137) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [48] [ 70/156] eta: 0:01:07 lr: 0.007798 min_lr: 0.007798 loss: 3.8399 (3.6751) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [48] [ 80/156] eta: 0:00:57 lr: 0.007797 min_lr: 0.007797 loss: 3.5175 (3.6447) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [48] [ 90/156] eta: 0:00:49 lr: 0.007796 min_lr: 0.007796 loss: 3.6889 (3.6637) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [48] [100/156] eta: 0:00:40 lr: 0.007795 min_lr: 0.007795 loss: 3.9456 (3.6833) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [48] [110/156] eta: 0:00:32 lr: 0.007794 min_lr: 0.007794 loss: 3.8398 (3.6797) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [48] [120/156] eta: 0:00:25 lr: 0.007793 min_lr: 0.007793 loss: 3.8298 (3.6938) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [48] [130/156] eta: 0:00:18 lr: 0.007793 min_lr: 0.007793 loss: 4.0864 (3.6933) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0012 max mem: 55573 Epoch: [48] [140/156] eta: 0:00:11 lr: 0.007792 min_lr: 0.007792 loss: 4.0130 (3.7031) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0010 max mem: 55573 Epoch: [48] [150/156] eta: 0:00:04 lr: 0.007791 min_lr: 0.007791 loss: 4.0055 (3.7038) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [48] [155/156] eta: 0:00:00 lr: 0.007790 min_lr: 0.007790 loss: 3.8419 (3.7031) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [48] Total time: 0:01:46 (0.6853 s / it) Averaged stats: lr: 0.007790 min_lr: 0.007790 loss: 3.8419 (3.7460) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2155 (1.2155) acc1: 73.0469 (73.0469) acc5: 93.8802 (93.8802) time: 7.0381 data: 6.8009 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3920 (1.4498) acc1: 68.0990 (66.3680) acc5: 89.7135 (88.0320) time: 1.5540 data: 1.3603 max mem: 55573 Test: Total time: 0:00:07 (1.5941 s / it) * Acc@1 67.460 Acc@5 88.456 loss 1.427 Accuracy of the model on the 50000 test images: 67.5% Max accuracy: 67.46% Test: [0/5] eta: 0:00:34 loss: 6.9149 (6.9149) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9216 data: 6.6856 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9999 (7.0006) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5120) time: 1.5296 data: 1.3372 max mem: 55573 Test: Total time: 0:00:07 (1.5460 s / it) * Acc@1 0.100 Acc@5 0.524 loss 6.965 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [49] [ 0/156] eta: 0:34:54 lr: 0.007790 min_lr: 0.007790 loss: 3.5800 (3.5800) weight_decay: 0.0500 (0.0500) time: 13.4266 data: 10.6837 max mem: 55573 Epoch: [49] [ 10/156] eta: 0:04:18 lr: 0.007789 min_lr: 0.007789 loss: 3.8326 (3.7204) weight_decay: 0.0500 (0.0500) time: 1.7710 data: 0.9716 max mem: 55573 Epoch: [49] [ 20/156] eta: 0:02:44 lr: 0.007788 min_lr: 0.007788 loss: 3.9382 (3.8628) weight_decay: 0.0500 (0.0500) time: 0.5976 data: 0.0004 max mem: 55573 Epoch: [49] [ 30/156] eta: 0:02:07 lr: 0.007787 min_lr: 0.007787 loss: 4.0245 (3.8156) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [49] [ 40/156] eta: 0:01:45 lr: 0.007786 min_lr: 0.007786 loss: 3.6625 (3.7688) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [49] [ 50/156] eta: 0:01:29 lr: 0.007786 min_lr: 0.007786 loss: 3.7027 (3.7883) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [49] [ 60/156] eta: 0:01:17 lr: 0.007785 min_lr: 0.007785 loss: 4.0535 (3.8229) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [49] [ 70/156] eta: 0:01:06 lr: 0.007784 min_lr: 0.007784 loss: 3.8744 (3.7919) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [49] [ 80/156] eta: 0:00:57 lr: 0.007783 min_lr: 0.007783 loss: 3.3248 (3.7187) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [49] [ 90/156] eta: 0:00:48 lr: 0.007782 min_lr: 0.007782 loss: 3.5709 (3.7284) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [49] [100/156] eta: 0:00:40 lr: 0.007781 min_lr: 0.007781 loss: 3.6890 (3.7161) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [49] [110/156] eta: 0:00:32 lr: 0.007780 min_lr: 0.007780 loss: 3.6257 (3.7279) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [49] [120/156] eta: 0:00:25 lr: 0.007779 min_lr: 0.007779 loss: 3.8114 (3.7207) weight_decay: 0.0500 (0.0500) time: 0.5882 data: 0.0004 max mem: 55573 Epoch: [49] [130/156] eta: 0:00:17 lr: 0.007778 min_lr: 0.007778 loss: 3.8041 (3.7178) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0010 max mem: 55573 Epoch: [49] [140/156] eta: 0:00:10 lr: 0.007777 min_lr: 0.007777 loss: 3.6001 (3.7242) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0008 max mem: 55573 Epoch: [49] [150/156] eta: 0:00:04 lr: 0.007776 min_lr: 0.007776 loss: 3.8585 (3.7329) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [49] [155/156] eta: 0:00:00 lr: 0.007776 min_lr: 0.007776 loss: 3.6475 (3.7203) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [49] Total time: 0:01:45 (0.6785 s / it) Averaged stats: lr: 0.007776 min_lr: 0.007776 loss: 3.6475 (3.7720) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.3519 (1.3519) acc1: 73.9583 (73.9583) acc5: 92.4479 (92.4479) time: 6.9075 data: 6.6701 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5439 (1.6046) acc1: 70.3125 (67.2000) acc5: 90.8854 (88.2880) time: 1.5280 data: 1.3341 max mem: 55573 Test: Total time: 0:00:07 (1.5750 s / it) * Acc@1 67.566 Acc@5 88.680 loss 1.598 Accuracy of the model on the 50000 test images: 67.6% Max accuracy: 67.57% Test: [0/5] eta: 0:00:34 loss: 6.9164 (6.9164) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9047 data: 6.6687 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.0096 (7.0062) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5120) time: 1.5263 data: 1.3339 max mem: 55573 Test: Total time: 0:00:07 (1.5424 s / it) * Acc@1 0.100 Acc@5 0.508 loss 6.969 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [50] [ 0/156] eta: 0:37:30 lr: 0.007776 min_lr: 0.007776 loss: 4.6127 (4.6127) weight_decay: 0.0500 (0.0500) time: 14.4256 data: 9.2316 max mem: 55573 Epoch: [50] [ 10/156] eta: 0:04:30 lr: 0.007775 min_lr: 0.007775 loss: 3.8223 (3.8859) weight_decay: 0.0500 (0.0500) time: 1.8512 data: 0.8397 max mem: 55573 Epoch: [50] [ 20/156] eta: 0:02:50 lr: 0.007774 min_lr: 0.007774 loss: 3.8213 (3.8609) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [50] [ 30/156] eta: 0:02:10 lr: 0.007773 min_lr: 0.007773 loss: 3.7164 (3.7635) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [50] [ 40/156] eta: 0:01:47 lr: 0.007772 min_lr: 0.007772 loss: 3.7053 (3.7645) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [50] [ 50/156] eta: 0:01:31 lr: 0.007771 min_lr: 0.007771 loss: 3.9955 (3.7873) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [50] [ 60/156] eta: 0:01:18 lr: 0.007770 min_lr: 0.007770 loss: 3.8697 (3.7503) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [50] [ 70/156] eta: 0:01:07 lr: 0.007769 min_lr: 0.007769 loss: 3.8661 (3.7884) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [50] [ 80/156] eta: 0:00:57 lr: 0.007768 min_lr: 0.007768 loss: 3.8500 (3.7805) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [50] [ 90/156] eta: 0:00:49 lr: 0.007767 min_lr: 0.007767 loss: 3.7884 (3.7755) weight_decay: 0.0500 (0.0500) time: 0.5950 data: 0.0004 max mem: 55573 Epoch: [50] [100/156] eta: 0:00:40 lr: 0.007766 min_lr: 0.007766 loss: 3.9329 (3.7853) weight_decay: 0.0500 (0.0500) time: 0.5946 data: 0.0004 max mem: 55573 Epoch: [50] [110/156] eta: 0:00:32 lr: 0.007765 min_lr: 0.007765 loss: 3.9307 (3.8009) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [50] [120/156] eta: 0:00:25 lr: 0.007764 min_lr: 0.007764 loss: 3.6411 (3.7786) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [50] [130/156] eta: 0:00:18 lr: 0.007763 min_lr: 0.007763 loss: 3.6210 (3.7751) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0027 max mem: 55573 Epoch: [50] [140/156] eta: 0:00:11 lr: 0.007762 min_lr: 0.007762 loss: 3.9260 (3.7854) weight_decay: 0.0500 (0.0500) time: 0.5882 data: 0.0026 max mem: 55573 Epoch: [50] [150/156] eta: 0:00:04 lr: 0.007761 min_lr: 0.007761 loss: 3.9552 (3.7896) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [50] [155/156] eta: 0:00:00 lr: 0.007761 min_lr: 0.007761 loss: 3.8189 (3.7708) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [50] Total time: 0:01:46 (0.6853 s / it) Averaged stats: lr: 0.007761 min_lr: 0.007761 loss: 3.8189 (3.7638) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.3404 (1.3404) acc1: 73.4375 (73.4375) acc5: 92.0573 (92.0573) time: 6.8163 data: 6.5776 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4329 (1.4717) acc1: 69.4010 (67.1040) acc5: 90.8854 (88.2560) time: 1.5100 data: 1.3156 max mem: 55573 Test: Total time: 0:00:07 (1.5539 s / it) * Acc@1 67.128 Acc@5 88.424 loss 1.461 Accuracy of the model on the 50000 test images: 67.1% Max accuracy: 67.57% Test: [0/5] eta: 0:00:36 loss: 6.9191 (6.9191) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2899 data: 7.0534 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.0187 (7.0134) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5120) time: 1.6036 data: 1.4108 max mem: 55573 Test: Total time: 0:00:08 (1.6198 s / it) * Acc@1 0.100 Acc@5 0.508 loss 6.975 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [51] [ 0/156] eta: 0:37:49 lr: 0.007761 min_lr: 0.007761 loss: 4.0468 (4.0468) weight_decay: 0.0500 (0.0500) time: 14.5503 data: 8.0450 max mem: 55573 Epoch: [51] [ 10/156] eta: 0:04:46 lr: 0.007760 min_lr: 0.007760 loss: 3.8744 (3.5760) weight_decay: 0.0500 (0.0500) time: 1.9655 data: 0.7318 max mem: 55573 Epoch: [51] [ 20/156] eta: 0:02:58 lr: 0.007759 min_lr: 0.007759 loss: 3.7807 (3.6358) weight_decay: 0.0500 (0.0500) time: 0.6487 data: 0.0005 max mem: 55573 Epoch: [51] [ 30/156] eta: 0:02:15 lr: 0.007758 min_lr: 0.007758 loss: 3.6712 (3.6425) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [51] [ 40/156] eta: 0:01:51 lr: 0.007757 min_lr: 0.007757 loss: 3.6712 (3.6403) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [51] [ 50/156] eta: 0:01:34 lr: 0.007756 min_lr: 0.007756 loss: 3.8292 (3.6697) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0004 max mem: 55573 Epoch: [51] [ 60/156] eta: 0:01:20 lr: 0.007755 min_lr: 0.007755 loss: 3.7276 (3.6595) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.0004 max mem: 55573 Epoch: [51] [ 70/156] eta: 0:01:09 lr: 0.007754 min_lr: 0.007754 loss: 3.4917 (3.6383) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [51] [ 80/156] eta: 0:00:59 lr: 0.007753 min_lr: 0.007753 loss: 3.7645 (3.6627) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [51] [ 90/156] eta: 0:00:49 lr: 0.007752 min_lr: 0.007752 loss: 3.8903 (3.6579) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [51] [100/156] eta: 0:00:41 lr: 0.007751 min_lr: 0.007751 loss: 3.4514 (3.6232) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [51] [110/156] eta: 0:00:33 lr: 0.007750 min_lr: 0.007750 loss: 3.7006 (3.6373) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [51] [120/156] eta: 0:00:25 lr: 0.007749 min_lr: 0.007749 loss: 4.0665 (3.6670) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [51] [130/156] eta: 0:00:18 lr: 0.007748 min_lr: 0.007748 loss: 4.0310 (3.6687) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0010 max mem: 55573 Epoch: [51] [140/156] eta: 0:00:11 lr: 0.007747 min_lr: 0.007747 loss: 3.9264 (3.6974) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0008 max mem: 55573 Epoch: [51] [150/156] eta: 0:00:04 lr: 0.007746 min_lr: 0.007746 loss: 3.8878 (3.6822) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [51] [155/156] eta: 0:00:00 lr: 0.007745 min_lr: 0.007745 loss: 3.6592 (3.6831) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [51] Total time: 0:01:48 (0.6930 s / it) Averaged stats: lr: 0.007745 min_lr: 0.007745 loss: 3.6592 (3.7518) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2718 (1.2718) acc1: 74.0885 (74.0885) acc5: 93.2292 (93.2292) time: 7.1008 data: 6.8634 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3920 (1.4053) acc1: 70.5729 (67.8080) acc5: 90.8854 (88.9600) time: 1.5666 data: 1.3728 max mem: 55573 Test: Total time: 0:00:08 (1.6055 s / it) * Acc@1 68.890 Acc@5 89.144 loss 1.375 Accuracy of the model on the 50000 test images: 68.9% Max accuracy: 68.89% Test: [0/5] eta: 0:00:34 loss: 6.9215 (6.9215) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9034 data: 6.6671 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.0290 (7.0218) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5120) time: 1.5260 data: 1.3335 max mem: 55573 Test: Total time: 0:00:07 (1.5435 s / it) * Acc@1 0.100 Acc@5 0.502 loss 6.981 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [52] [ 0/156] eta: 0:37:14 lr: 0.007745 min_lr: 0.007745 loss: 4.0576 (4.0576) weight_decay: 0.0500 (0.0500) time: 14.3255 data: 9.7790 max mem: 55573 Epoch: [52] [ 10/156] eta: 0:04:28 lr: 0.007744 min_lr: 0.007744 loss: 3.8367 (3.7163) weight_decay: 0.0500 (0.0500) time: 1.8393 data: 0.8894 max mem: 55573 Epoch: [52] [ 20/156] eta: 0:02:49 lr: 0.007743 min_lr: 0.007743 loss: 3.6809 (3.7139) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [52] [ 30/156] eta: 0:02:10 lr: 0.007742 min_lr: 0.007742 loss: 3.6742 (3.7192) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [52] [ 40/156] eta: 0:01:47 lr: 0.007741 min_lr: 0.007741 loss: 3.6742 (3.7018) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [52] [ 50/156] eta: 0:01:31 lr: 0.007740 min_lr: 0.007740 loss: 3.6067 (3.6874) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [52] [ 60/156] eta: 0:01:18 lr: 0.007739 min_lr: 0.007739 loss: 3.8046 (3.7105) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [52] [ 70/156] eta: 0:01:07 lr: 0.007738 min_lr: 0.007738 loss: 3.8683 (3.7180) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [52] [ 80/156] eta: 0:00:57 lr: 0.007737 min_lr: 0.007737 loss: 3.8963 (3.7201) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [52] [ 90/156] eta: 0:00:48 lr: 0.007736 min_lr: 0.007736 loss: 3.7743 (3.7195) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [52] [100/156] eta: 0:00:40 lr: 0.007735 min_lr: 0.007735 loss: 3.7703 (3.7171) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [52] [110/156] eta: 0:00:32 lr: 0.007734 min_lr: 0.007734 loss: 3.8111 (3.7145) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [52] [120/156] eta: 0:00:25 lr: 0.007733 min_lr: 0.007733 loss: 3.3198 (3.6909) weight_decay: 0.0500 (0.0500) time: 0.5970 data: 0.0004 max mem: 55573 Epoch: [52] [130/156] eta: 0:00:18 lr: 0.007732 min_lr: 0.007732 loss: 3.4583 (3.6901) weight_decay: 0.0500 (0.0500) time: 0.5975 data: 0.0010 max mem: 55573 Epoch: [52] [140/156] eta: 0:00:11 lr: 0.007731 min_lr: 0.007731 loss: 3.8577 (3.6984) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0008 max mem: 55573 Epoch: [52] [150/156] eta: 0:00:04 lr: 0.007730 min_lr: 0.007730 loss: 3.9935 (3.7149) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [52] [155/156] eta: 0:00:00 lr: 0.007729 min_lr: 0.007729 loss: 4.0777 (3.7263) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [52] Total time: 0:01:46 (0.6850 s / it) Averaged stats: lr: 0.007729 min_lr: 0.007729 loss: 4.0777 (3.7284) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.3455 (1.3455) acc1: 75.3906 (75.3906) acc5: 92.9688 (92.9688) time: 6.8814 data: 6.6441 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4775 (1.5077) acc1: 69.9219 (68.8320) acc5: 92.0573 (89.4720) time: 1.5228 data: 1.3289 max mem: 55573 Test: Total time: 0:00:07 (1.5665 s / it) * Acc@1 68.324 Acc@5 89.152 loss 1.519 Accuracy of the model on the 50000 test images: 68.3% Max accuracy: 68.89% Test: [0/5] eta: 0:00:34 loss: 6.9246 (6.9246) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9216 data: 6.6856 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.0399 (7.0301) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5296 data: 1.3372 max mem: 55573 Test: Total time: 0:00:07 (1.5495 s / it) * Acc@1 0.100 Acc@5 0.516 loss 6.988 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [53] [ 0/156] eta: 0:34:11 lr: 0.007729 min_lr: 0.007729 loss: 3.9868 (3.9868) weight_decay: 0.0500 (0.0500) time: 13.1500 data: 10.7853 max mem: 55573 Epoch: [53] [ 10/156] eta: 0:04:26 lr: 0.007728 min_lr: 0.007728 loss: 3.8338 (3.7809) weight_decay: 0.0500 (0.0500) time: 1.8220 data: 1.0617 max mem: 55573 Epoch: [53] [ 20/156] eta: 0:02:47 lr: 0.007727 min_lr: 0.007727 loss: 3.8257 (3.7988) weight_decay: 0.0500 (0.0500) time: 0.6393 data: 0.0449 max mem: 55573 Epoch: [53] [ 30/156] eta: 0:02:09 lr: 0.007726 min_lr: 0.007726 loss: 3.7013 (3.7460) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0006 max mem: 55573 Epoch: [53] [ 40/156] eta: 0:01:46 lr: 0.007725 min_lr: 0.007725 loss: 3.6286 (3.7341) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0007 max mem: 55573 Epoch: [53] [ 50/156] eta: 0:01:30 lr: 0.007724 min_lr: 0.007724 loss: 3.6286 (3.7113) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0007 max mem: 55573 Epoch: [53] [ 60/156] eta: 0:01:17 lr: 0.007723 min_lr: 0.007723 loss: 3.8822 (3.7320) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0006 max mem: 55573 Epoch: [53] [ 70/156] eta: 0:01:07 lr: 0.007722 min_lr: 0.007722 loss: 3.8778 (3.7378) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [53] [ 80/156] eta: 0:00:57 lr: 0.007721 min_lr: 0.007721 loss: 3.7225 (3.7009) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0006 max mem: 55573 Epoch: [53] [ 90/156] eta: 0:00:48 lr: 0.007720 min_lr: 0.007720 loss: 3.7271 (3.7167) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0006 max mem: 55573 Epoch: [53] [100/156] eta: 0:00:40 lr: 0.007718 min_lr: 0.007718 loss: 3.9819 (3.7137) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0006 max mem: 55573 Epoch: [53] [110/156] eta: 0:00:32 lr: 0.007717 min_lr: 0.007717 loss: 4.0150 (3.7328) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [53] [120/156] eta: 0:00:25 lr: 0.007716 min_lr: 0.007716 loss: 3.9610 (3.7470) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0006 max mem: 55573 Epoch: [53] [130/156] eta: 0:00:18 lr: 0.007715 min_lr: 0.007715 loss: 3.8053 (3.7474) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0024 max mem: 55573 Epoch: [53] [140/156] eta: 0:00:10 lr: 0.007714 min_lr: 0.007714 loss: 3.7151 (3.7559) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0022 max mem: 55573 Epoch: [53] [150/156] eta: 0:00:04 lr: 0.007713 min_lr: 0.007713 loss: 3.7151 (3.7355) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [53] [155/156] eta: 0:00:00 lr: 0.007713 min_lr: 0.007713 loss: 3.7265 (3.7375) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [53] Total time: 0:01:46 (0.6831 s / it) Averaged stats: lr: 0.007713 min_lr: 0.007713 loss: 3.7265 (3.7332) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3811 (1.3811) acc1: 73.9583 (73.9583) acc5: 92.7083 (92.7083) time: 7.0324 data: 6.7951 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5518 (1.6278) acc1: 69.9219 (66.6240) acc5: 90.7552 (87.7440) time: 1.5529 data: 1.3591 max mem: 55573 Test: Total time: 0:00:07 (1.5969 s / it) * Acc@1 67.006 Acc@5 87.944 loss 1.625 Accuracy of the model on the 50000 test images: 67.0% Max accuracy: 68.89% Test: [0/5] eta: 0:00:36 loss: 6.9278 (6.9278) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2127 data: 6.9767 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.0514 (7.0388) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.5878 data: 1.3954 max mem: 55573 Test: Total time: 0:00:08 (1.6097 s / it) * Acc@1 0.100 Acc@5 0.520 loss 6.995 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [54] [ 0/156] eta: 0:33:45 lr: 0.007712 min_lr: 0.007712 loss: 3.6926 (3.6926) weight_decay: 0.0500 (0.0500) time: 12.9869 data: 12.3963 max mem: 55573 Epoch: [54] [ 10/156] eta: 0:04:21 lr: 0.007711 min_lr: 0.007711 loss: 3.6926 (3.5525) weight_decay: 0.0500 (0.0500) time: 1.7919 data: 1.1273 max mem: 55573 Epoch: [54] [ 20/156] eta: 0:02:45 lr: 0.007710 min_lr: 0.007710 loss: 3.9273 (3.7717) weight_decay: 0.0500 (0.0500) time: 0.6314 data: 0.0004 max mem: 55573 Epoch: [54] [ 30/156] eta: 0:02:08 lr: 0.007709 min_lr: 0.007709 loss: 3.9388 (3.8103) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [54] [ 40/156] eta: 0:01:45 lr: 0.007708 min_lr: 0.007708 loss: 3.9225 (3.8083) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [54] [ 50/156] eta: 0:01:30 lr: 0.007707 min_lr: 0.007707 loss: 3.6822 (3.7456) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [54] [ 60/156] eta: 0:01:17 lr: 0.007706 min_lr: 0.007706 loss: 3.4983 (3.7105) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [54] [ 70/156] eta: 0:01:06 lr: 0.007705 min_lr: 0.007705 loss: 3.7253 (3.7469) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [54] [ 80/156] eta: 0:00:57 lr: 0.007704 min_lr: 0.007704 loss: 3.9519 (3.7616) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [54] [ 90/156] eta: 0:00:48 lr: 0.007703 min_lr: 0.007703 loss: 3.8644 (3.7654) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [54] [100/156] eta: 0:00:40 lr: 0.007702 min_lr: 0.007702 loss: 3.8384 (3.7621) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [54] [110/156] eta: 0:00:32 lr: 0.007701 min_lr: 0.007701 loss: 3.8299 (3.7613) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [54] [120/156] eta: 0:00:25 lr: 0.007700 min_lr: 0.007700 loss: 3.8878 (3.7743) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [54] [130/156] eta: 0:00:17 lr: 0.007698 min_lr: 0.007698 loss: 4.0217 (3.7870) weight_decay: 0.0500 (0.0500) time: 0.5932 data: 0.0010 max mem: 55573 Epoch: [54] [140/156] eta: 0:00:10 lr: 0.007697 min_lr: 0.007697 loss: 4.0314 (3.7874) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0009 max mem: 55573 Epoch: [54] [150/156] eta: 0:00:04 lr: 0.007696 min_lr: 0.007696 loss: 3.8666 (3.7846) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [54] [155/156] eta: 0:00:00 lr: 0.007696 min_lr: 0.007696 loss: 3.8498 (3.7736) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [54] Total time: 0:01:46 (0.6814 s / it) Averaged stats: lr: 0.007696 min_lr: 0.007696 loss: 3.8498 (3.7331) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.5150 (1.5150) acc1: 72.6562 (72.6562) acc5: 91.7969 (91.7969) time: 7.1821 data: 6.9446 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5150 (1.6194) acc1: 70.7031 (67.2960) acc5: 91.7969 (88.6080) time: 1.5830 data: 1.3890 max mem: 55573 Test: Total time: 0:00:08 (1.6242 s / it) * Acc@1 67.616 Acc@5 88.606 loss 1.596 Accuracy of the model on the 50000 test images: 67.6% Max accuracy: 68.89% Test: [0/5] eta: 0:00:36 loss: 6.9322 (6.9322) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.3162 data: 7.0802 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.0640 (7.0486) acc1: 0.0000 (0.0960) acc5: 0.3906 (0.4800) time: 1.6086 data: 1.4161 max mem: 55573 Test: Total time: 0:00:08 (1.6320 s / it) * Acc@1 0.100 Acc@5 0.510 loss 7.003 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [55] [ 0/156] eta: 0:36:26 lr: 0.007696 min_lr: 0.007696 loss: 4.0795 (4.0795) weight_decay: 0.0500 (0.0500) time: 14.0135 data: 9.5808 max mem: 55573 Epoch: [55] [ 10/156] eta: 0:04:25 lr: 0.007694 min_lr: 0.007694 loss: 4.0007 (3.7109) weight_decay: 0.0500 (0.0500) time: 1.8218 data: 0.8714 max mem: 55573 Epoch: [55] [ 20/156] eta: 0:02:48 lr: 0.007693 min_lr: 0.007693 loss: 3.9110 (3.8183) weight_decay: 0.0500 (0.0500) time: 0.5978 data: 0.0005 max mem: 55573 Epoch: [55] [ 30/156] eta: 0:02:09 lr: 0.007692 min_lr: 0.007692 loss: 3.9110 (3.8331) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [55] [ 40/156] eta: 0:01:46 lr: 0.007691 min_lr: 0.007691 loss: 3.8815 (3.8385) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [55] [ 50/156] eta: 0:01:30 lr: 0.007690 min_lr: 0.007690 loss: 3.7799 (3.8273) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [55] [ 60/156] eta: 0:01:18 lr: 0.007689 min_lr: 0.007689 loss: 3.7263 (3.7930) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [55] [ 70/156] eta: 0:01:07 lr: 0.007688 min_lr: 0.007688 loss: 3.7263 (3.7801) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [55] [ 80/156] eta: 0:00:57 lr: 0.007687 min_lr: 0.007687 loss: 3.7158 (3.7445) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [55] [ 90/156] eta: 0:00:48 lr: 0.007686 min_lr: 0.007686 loss: 3.7158 (3.7407) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [55] [100/156] eta: 0:00:40 lr: 0.007684 min_lr: 0.007684 loss: 3.8344 (3.7300) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [55] [110/156] eta: 0:00:32 lr: 0.007683 min_lr: 0.007683 loss: 3.5782 (3.7128) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [55] [120/156] eta: 0:00:25 lr: 0.007682 min_lr: 0.007682 loss: 3.5782 (3.7103) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [55] [130/156] eta: 0:00:18 lr: 0.007681 min_lr: 0.007681 loss: 3.8252 (3.7188) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0011 max mem: 55573 Epoch: [55] [140/156] eta: 0:00:10 lr: 0.007680 min_lr: 0.007680 loss: 3.7439 (3.7176) weight_decay: 0.0500 (0.0500) time: 0.5879 data: 0.0009 max mem: 55573 Epoch: [55] [150/156] eta: 0:00:04 lr: 0.007679 min_lr: 0.007679 loss: 3.6881 (3.7149) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [55] [155/156] eta: 0:00:00 lr: 0.007678 min_lr: 0.007678 loss: 3.8751 (3.7200) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [55] Total time: 0:01:46 (0.6831 s / it) Averaged stats: lr: 0.007678 min_lr: 0.007678 loss: 3.8751 (3.7153) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.3520 (1.3520) acc1: 73.0469 (73.0469) acc5: 92.9688 (92.9688) time: 6.9387 data: 6.7013 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5345 (1.4971) acc1: 68.3594 (67.0720) acc5: 90.5660 (88.3520) time: 1.5340 data: 1.3403 max mem: 55573 Test: Total time: 0:00:07 (1.5812 s / it) * Acc@1 67.012 Acc@5 87.896 loss 1.509 Accuracy of the model on the 50000 test images: 67.0% Max accuracy: 68.89% Test: [0/5] eta: 0:00:34 loss: 6.9363 (6.9363) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.8797 data: 6.6435 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.0784 (7.0601) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5213 data: 1.3289 max mem: 55573 Test: Total time: 0:00:07 (1.5417 s / it) * Acc@1 0.100 Acc@5 0.506 loss 7.012 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [56] [ 0/156] eta: 0:37:17 lr: 0.007678 min_lr: 0.007678 loss: 4.4975 (4.4975) weight_decay: 0.0500 (0.0500) time: 14.3407 data: 9.5337 max mem: 55573 Epoch: [56] [ 10/156] eta: 0:04:28 lr: 0.007677 min_lr: 0.007677 loss: 3.9072 (3.9071) weight_decay: 0.0500 (0.0500) time: 1.8404 data: 0.8671 max mem: 55573 Epoch: [56] [ 20/156] eta: 0:02:49 lr: 0.007676 min_lr: 0.007676 loss: 3.9072 (3.9027) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [56] [ 30/156] eta: 0:02:10 lr: 0.007675 min_lr: 0.007675 loss: 3.9330 (3.8739) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [56] [ 40/156] eta: 0:01:47 lr: 0.007674 min_lr: 0.007674 loss: 3.8306 (3.8539) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [56] [ 50/156] eta: 0:01:31 lr: 0.007672 min_lr: 0.007672 loss: 3.8584 (3.8388) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [56] [ 60/156] eta: 0:01:18 lr: 0.007671 min_lr: 0.007671 loss: 3.9664 (3.8353) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [56] [ 70/156] eta: 0:01:07 lr: 0.007670 min_lr: 0.007670 loss: 3.7292 (3.7910) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [56] [ 80/156] eta: 0:00:57 lr: 0.007669 min_lr: 0.007669 loss: 3.5945 (3.7775) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [56] [ 90/156] eta: 0:00:48 lr: 0.007668 min_lr: 0.007668 loss: 3.7132 (3.7760) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [56] [100/156] eta: 0:00:40 lr: 0.007667 min_lr: 0.007667 loss: 3.8055 (3.7761) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [56] [110/156] eta: 0:00:32 lr: 0.007666 min_lr: 0.007666 loss: 3.8833 (3.7875) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [56] [120/156] eta: 0:00:25 lr: 0.007664 min_lr: 0.007664 loss: 3.8639 (3.7742) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [56] [130/156] eta: 0:00:18 lr: 0.007663 min_lr: 0.007663 loss: 3.6378 (3.7646) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [56] [140/156] eta: 0:00:10 lr: 0.007662 min_lr: 0.007662 loss: 3.8659 (3.7622) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0009 max mem: 55573 Epoch: [56] [150/156] eta: 0:00:04 lr: 0.007661 min_lr: 0.007661 loss: 3.8659 (3.7583) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [56] [155/156] eta: 0:00:00 lr: 0.007660 min_lr: 0.007660 loss: 3.8649 (3.7504) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [56] Total time: 0:01:46 (0.6839 s / it) Averaged stats: lr: 0.007660 min_lr: 0.007660 loss: 3.8649 (3.7154) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:32 loss: 1.2266 (1.2266) acc1: 74.8698 (74.8698) acc5: 93.7500 (93.7500) time: 6.5501 data: 6.3128 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3855 (1.4537) acc1: 71.0938 (67.8400) acc5: 90.4948 (89.1840) time: 1.4567 data: 1.2627 max mem: 55573 Test: Total time: 0:00:07 (1.5031 s / it) * Acc@1 68.682 Acc@5 89.216 loss 1.458 Accuracy of the model on the 50000 test images: 68.7% Max accuracy: 68.89% Test: [0/5] eta: 0:00:36 loss: 6.9410 (6.9410) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2304 data: 6.9943 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.0957 (7.0730) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5914 data: 1.3989 max mem: 55573 Test: Total time: 0:00:08 (1.6102 s / it) * Acc@1 0.100 Acc@5 0.514 loss 7.023 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [57] [ 0/156] eta: 0:35:53 lr: 0.007660 min_lr: 0.007660 loss: 3.0118 (3.0118) weight_decay: 0.0500 (0.0500) time: 13.8035 data: 9.7881 max mem: 55573 Epoch: [57] [ 10/156] eta: 0:04:21 lr: 0.007659 min_lr: 0.007659 loss: 3.7405 (3.5113) weight_decay: 0.0500 (0.0500) time: 1.7923 data: 0.8902 max mem: 55573 Epoch: [57] [ 20/156] eta: 0:02:45 lr: 0.007658 min_lr: 0.007658 loss: 3.7873 (3.6860) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [57] [ 30/156] eta: 0:02:08 lr: 0.007657 min_lr: 0.007657 loss: 3.7823 (3.6923) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [57] [ 40/156] eta: 0:01:45 lr: 0.007656 min_lr: 0.007656 loss: 3.7575 (3.7388) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [57] [ 50/156] eta: 0:01:30 lr: 0.007654 min_lr: 0.007654 loss: 3.8775 (3.7320) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [57] [ 60/156] eta: 0:01:17 lr: 0.007653 min_lr: 0.007653 loss: 3.5811 (3.7006) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [57] [ 70/156] eta: 0:01:06 lr: 0.007652 min_lr: 0.007652 loss: 3.9129 (3.7410) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [57] [ 80/156] eta: 0:00:57 lr: 0.007651 min_lr: 0.007651 loss: 4.0346 (3.7744) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [57] [ 90/156] eta: 0:00:48 lr: 0.007650 min_lr: 0.007650 loss: 3.9268 (3.7374) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [57] [100/156] eta: 0:00:40 lr: 0.007649 min_lr: 0.007649 loss: 3.6933 (3.7469) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [57] [110/156] eta: 0:00:32 lr: 0.007647 min_lr: 0.007647 loss: 3.6657 (3.7028) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [57] [120/156] eta: 0:00:25 lr: 0.007646 min_lr: 0.007646 loss: 3.4639 (3.6761) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [57] [130/156] eta: 0:00:17 lr: 0.007645 min_lr: 0.007645 loss: 3.6886 (3.6972) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0010 max mem: 55573 Epoch: [57] [140/156] eta: 0:00:10 lr: 0.007644 min_lr: 0.007644 loss: 3.9501 (3.6998) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0009 max mem: 55573 Epoch: [57] [150/156] eta: 0:00:04 lr: 0.007643 min_lr: 0.007643 loss: 3.6747 (3.6988) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [57] [155/156] eta: 0:00:00 lr: 0.007642 min_lr: 0.007642 loss: 3.5376 (3.6857) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [57] Total time: 0:01:46 (0.6805 s / it) Averaged stats: lr: 0.007642 min_lr: 0.007642 loss: 3.5376 (3.6825) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1860 (1.1860) acc1: 73.5677 (73.5677) acc5: 93.7500 (93.7500) time: 7.1287 data: 6.8857 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3865 (1.4332) acc1: 69.4010 (67.7760) acc5: 90.7552 (89.7600) time: 1.5720 data: 1.3772 max mem: 55573 Test: Total time: 0:00:08 (1.6107 s / it) * Acc@1 68.888 Acc@5 89.346 loss 1.419 Accuracy of the model on the 50000 test images: 68.9% Max accuracy: 68.89% Test: [0/5] eta: 0:00:33 loss: 6.9470 (6.9470) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.7973 data: 6.5614 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.1124 (7.0864) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5049 data: 1.3124 max mem: 55573 Test: Total time: 0:00:07 (1.5279 s / it) * Acc@1 0.100 Acc@5 0.506 loss 7.034 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [58] [ 0/156] eta: 0:33:14 lr: 0.007642 min_lr: 0.007642 loss: 3.0269 (3.0269) weight_decay: 0.0500 (0.0500) time: 12.7858 data: 10.1727 max mem: 55573 Epoch: [58] [ 10/156] eta: 0:04:18 lr: 0.007641 min_lr: 0.007641 loss: 3.7025 (3.6045) weight_decay: 0.0500 (0.0500) time: 1.7707 data: 0.9253 max mem: 55573 Epoch: [58] [ 20/156] eta: 0:02:44 lr: 0.007640 min_lr: 0.007640 loss: 3.7035 (3.5973) weight_decay: 0.0500 (0.0500) time: 0.6312 data: 0.0005 max mem: 55573 Epoch: [58] [ 30/156] eta: 0:02:07 lr: 0.007638 min_lr: 0.007638 loss: 3.7759 (3.6597) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [58] [ 40/156] eta: 0:01:45 lr: 0.007637 min_lr: 0.007637 loss: 3.8012 (3.6791) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [58] [ 50/156] eta: 0:01:29 lr: 0.007636 min_lr: 0.007636 loss: 3.6738 (3.6878) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [58] [ 60/156] eta: 0:01:17 lr: 0.007635 min_lr: 0.007635 loss: 3.9464 (3.7349) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [58] [ 70/156] eta: 0:01:06 lr: 0.007634 min_lr: 0.007634 loss: 3.7090 (3.6894) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [58] [ 80/156] eta: 0:00:57 lr: 0.007632 min_lr: 0.007632 loss: 3.4980 (3.6846) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [58] [ 90/156] eta: 0:00:48 lr: 0.007631 min_lr: 0.007631 loss: 3.7213 (3.6783) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [58] [100/156] eta: 0:00:40 lr: 0.007630 min_lr: 0.007630 loss: 3.7950 (3.6751) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0004 max mem: 55573 Epoch: [58] [110/156] eta: 0:00:32 lr: 0.007629 min_lr: 0.007629 loss: 3.4505 (3.6581) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [58] [120/156] eta: 0:00:25 lr: 0.007628 min_lr: 0.007628 loss: 3.6453 (3.6701) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [58] [130/156] eta: 0:00:17 lr: 0.007626 min_lr: 0.007626 loss: 3.6938 (3.6522) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0010 max mem: 55573 Epoch: [58] [140/156] eta: 0:00:10 lr: 0.007625 min_lr: 0.007625 loss: 3.5870 (3.6500) weight_decay: 0.0500 (0.0500) time: 0.5871 data: 0.0009 max mem: 55573 Epoch: [58] [150/156] eta: 0:00:04 lr: 0.007624 min_lr: 0.007624 loss: 3.5870 (3.6564) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [58] [155/156] eta: 0:00:00 lr: 0.007623 min_lr: 0.007623 loss: 3.5870 (3.6532) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [58] Total time: 0:01:45 (0.6795 s / it) Averaged stats: lr: 0.007623 min_lr: 0.007623 loss: 3.5870 (3.7024) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.5003 (1.5003) acc1: 72.3958 (72.3958) acc5: 92.8385 (92.8385) time: 6.6232 data: 6.3860 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6075 (1.6421) acc1: 69.2708 (66.3680) acc5: 90.3646 (88.7680) time: 1.4714 data: 1.2773 max mem: 55573 Test: Total time: 0:00:07 (1.5100 s / it) * Acc@1 67.854 Acc@5 88.758 loss 1.633 Accuracy of the model on the 50000 test images: 67.9% Max accuracy: 68.89% Test: [0/5] eta: 0:00:34 loss: 6.9543 (6.9543) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9774 data: 6.7414 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.1303 (7.1009) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5120) time: 1.5409 data: 1.3484 max mem: 55573 Test: Total time: 0:00:07 (1.5638 s / it) * Acc@1 0.100 Acc@5 0.500 loss 7.046 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [59] [ 0/156] eta: 0:34:04 lr: 0.007623 min_lr: 0.007623 loss: 4.1156 (4.1156) weight_decay: 0.0500 (0.0500) time: 13.1050 data: 11.7190 max mem: 55573 Epoch: [59] [ 10/156] eta: 0:04:18 lr: 0.007622 min_lr: 0.007622 loss: 3.9806 (3.7451) weight_decay: 0.0500 (0.0500) time: 1.7674 data: 1.0657 max mem: 55573 Epoch: [59] [ 20/156] eta: 0:02:44 lr: 0.007621 min_lr: 0.007621 loss: 3.9806 (3.8477) weight_decay: 0.0500 (0.0500) time: 0.6121 data: 0.0004 max mem: 55573 Epoch: [59] [ 30/156] eta: 0:02:06 lr: 0.007620 min_lr: 0.007620 loss: 3.6613 (3.7408) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [59] [ 40/156] eta: 0:01:45 lr: 0.007618 min_lr: 0.007618 loss: 3.5123 (3.6933) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [59] [ 50/156] eta: 0:01:29 lr: 0.007617 min_lr: 0.007617 loss: 3.6875 (3.7062) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [59] [ 60/156] eta: 0:01:17 lr: 0.007616 min_lr: 0.007616 loss: 3.8577 (3.7077) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [59] [ 70/156] eta: 0:01:06 lr: 0.007615 min_lr: 0.007615 loss: 3.8129 (3.7082) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [59] [ 80/156] eta: 0:00:56 lr: 0.007613 min_lr: 0.007613 loss: 3.8129 (3.7114) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [59] [ 90/156] eta: 0:00:48 lr: 0.007612 min_lr: 0.007612 loss: 3.8544 (3.7151) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [59] [100/156] eta: 0:00:40 lr: 0.007611 min_lr: 0.007611 loss: 3.8544 (3.6982) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [59] [110/156] eta: 0:00:32 lr: 0.007610 min_lr: 0.007610 loss: 3.7537 (3.6926) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [59] [120/156] eta: 0:00:25 lr: 0.007608 min_lr: 0.007608 loss: 3.8078 (3.6955) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [59] [130/156] eta: 0:00:17 lr: 0.007607 min_lr: 0.007607 loss: 3.6207 (3.6708) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0024 max mem: 55573 Epoch: [59] [140/156] eta: 0:00:10 lr: 0.007606 min_lr: 0.007606 loss: 3.5501 (3.6693) weight_decay: 0.0500 (0.0500) time: 0.5881 data: 0.0023 max mem: 55573 Epoch: [59] [150/156] eta: 0:00:04 lr: 0.007605 min_lr: 0.007605 loss: 3.6687 (3.6663) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [59] [155/156] eta: 0:00:00 lr: 0.007604 min_lr: 0.007604 loss: 3.6687 (3.6734) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [59] Total time: 0:01:45 (0.6789 s / it) Averaged stats: lr: 0.007604 min_lr: 0.007604 loss: 3.6687 (3.6728) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.3541 (1.3541) acc1: 75.0000 (75.0000) acc5: 93.0990 (93.0990) time: 6.7037 data: 6.4631 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5027 (1.5563) acc1: 69.4010 (66.8480) acc5: 91.2760 (88.7040) time: 1.5172 data: 1.3238 max mem: 55573 Test: Total time: 0:00:07 (1.5683 s / it) * Acc@1 67.868 Acc@5 88.620 loss 1.543 Accuracy of the model on the 50000 test images: 67.9% Max accuracy: 68.89% Test: [0/5] eta: 0:00:34 loss: 6.9614 (6.9614) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9314 data: 6.6949 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.1494 (7.1161) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5317 data: 1.3391 max mem: 55573 Test: Total time: 0:00:07 (1.5496 s / it) * Acc@1 0.100 Acc@5 0.502 loss 7.058 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [60] [ 0/156] eta: 0:36:47 lr: 0.007604 min_lr: 0.007604 loss: 4.3507 (4.3507) weight_decay: 0.0500 (0.0500) time: 14.1534 data: 11.1545 max mem: 55573 Epoch: [60] [ 10/156] eta: 0:04:26 lr: 0.007603 min_lr: 0.007603 loss: 3.9603 (3.8837) weight_decay: 0.0500 (0.0500) time: 1.8237 data: 1.0144 max mem: 55573 Epoch: [60] [ 20/156] eta: 0:02:48 lr: 0.007601 min_lr: 0.007601 loss: 3.8238 (3.7444) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [60] [ 30/156] eta: 0:02:09 lr: 0.007600 min_lr: 0.007600 loss: 3.7112 (3.7437) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [60] [ 40/156] eta: 0:01:46 lr: 0.007599 min_lr: 0.007599 loss: 3.7964 (3.7323) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [60] [ 50/156] eta: 0:01:30 lr: 0.007598 min_lr: 0.007598 loss: 3.9687 (3.7668) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [60] [ 60/156] eta: 0:01:18 lr: 0.007596 min_lr: 0.007596 loss: 4.0583 (3.7931) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [60] [ 70/156] eta: 0:01:07 lr: 0.007595 min_lr: 0.007595 loss: 4.0225 (3.8111) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [60] [ 80/156] eta: 0:00:57 lr: 0.007594 min_lr: 0.007594 loss: 3.8357 (3.7956) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [60] [ 90/156] eta: 0:00:48 lr: 0.007593 min_lr: 0.007593 loss: 3.7966 (3.7933) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [60] [100/156] eta: 0:00:40 lr: 0.007591 min_lr: 0.007591 loss: 3.6093 (3.7655) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [60] [110/156] eta: 0:00:32 lr: 0.007590 min_lr: 0.007590 loss: 3.7508 (3.7750) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [60] [120/156] eta: 0:00:25 lr: 0.007589 min_lr: 0.007589 loss: 3.8868 (3.7841) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [60] [130/156] eta: 0:00:18 lr: 0.007588 min_lr: 0.007588 loss: 3.8366 (3.7838) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0011 max mem: 55573 Epoch: [60] [140/156] eta: 0:00:10 lr: 0.007586 min_lr: 0.007586 loss: 3.7266 (3.7797) weight_decay: 0.0500 (0.0500) time: 0.5864 data: 0.0009 max mem: 55573 Epoch: [60] [150/156] eta: 0:00:04 lr: 0.007585 min_lr: 0.007585 loss: 3.2387 (3.7362) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [60] [155/156] eta: 0:00:00 lr: 0.007584 min_lr: 0.007584 loss: 3.1213 (3.7204) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [60] Total time: 0:01:46 (0.6829 s / it) Averaged stats: lr: 0.007584 min_lr: 0.007584 loss: 3.1213 (3.6824) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3568 (1.3568) acc1: 72.2656 (72.2656) acc5: 92.4479 (92.4479) time: 7.1658 data: 6.9284 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4452 (1.5126) acc1: 69.2708 (66.4960) acc5: 91.5365 (88.7040) time: 1.5795 data: 1.3858 max mem: 55573 Test: Total time: 0:00:08 (1.6224 s / it) * Acc@1 67.788 Acc@5 88.844 loss 1.497 Accuracy of the model on the 50000 test images: 67.8% Max accuracy: 68.89% Test: [0/5] eta: 0:00:34 loss: 6.9702 (6.9702) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9776 data: 6.7411 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.1697 (7.1320) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5411 data: 1.3483 max mem: 55573 Test: Total time: 0:00:07 (1.5605 s / it) * Acc@1 0.100 Acc@5 0.500 loss 7.071 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [61] [ 0/156] eta: 0:30:31 lr: 0.007584 min_lr: 0.007584 loss: 4.1524 (4.1524) weight_decay: 0.0500 (0.0500) time: 11.7399 data: 9.7822 max mem: 55573 Epoch: [61] [ 10/156] eta: 0:04:10 lr: 0.007583 min_lr: 0.007583 loss: 3.8777 (3.7792) weight_decay: 0.0500 (0.0500) time: 1.7157 data: 0.8897 max mem: 55573 Epoch: [61] [ 20/156] eta: 0:02:40 lr: 0.007582 min_lr: 0.007582 loss: 3.8694 (3.8482) weight_decay: 0.0500 (0.0500) time: 0.6522 data: 0.0005 max mem: 55573 Epoch: [61] [ 30/156] eta: 0:02:04 lr: 0.007580 min_lr: 0.007580 loss: 3.6890 (3.7863) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [61] [ 40/156] eta: 0:01:43 lr: 0.007579 min_lr: 0.007579 loss: 3.6324 (3.7514) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [61] [ 50/156] eta: 0:01:28 lr: 0.007578 min_lr: 0.007578 loss: 3.6929 (3.7537) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [61] [ 60/156] eta: 0:01:16 lr: 0.007577 min_lr: 0.007577 loss: 3.6152 (3.7457) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [61] [ 70/156] eta: 0:01:05 lr: 0.007575 min_lr: 0.007575 loss: 3.7065 (3.7334) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [61] [ 80/156] eta: 0:00:56 lr: 0.007574 min_lr: 0.007574 loss: 3.6994 (3.6965) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [61] [ 90/156] eta: 0:00:47 lr: 0.007573 min_lr: 0.007573 loss: 3.5574 (3.6962) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [61] [100/156] eta: 0:00:39 lr: 0.007571 min_lr: 0.007571 loss: 3.5735 (3.6710) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [61] [110/156] eta: 0:00:32 lr: 0.007570 min_lr: 0.007570 loss: 3.7532 (3.6979) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [61] [120/156] eta: 0:00:24 lr: 0.007569 min_lr: 0.007569 loss: 3.7510 (3.6804) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [61] [130/156] eta: 0:00:17 lr: 0.007567 min_lr: 0.007567 loss: 3.5191 (3.6611) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0010 max mem: 55573 Epoch: [61] [140/156] eta: 0:00:10 lr: 0.007566 min_lr: 0.007566 loss: 3.7105 (3.6813) weight_decay: 0.0500 (0.0500) time: 0.5856 data: 0.0008 max mem: 55573 Epoch: [61] [150/156] eta: 0:00:04 lr: 0.007565 min_lr: 0.007565 loss: 3.9118 (3.6898) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [61] [155/156] eta: 0:00:00 lr: 0.007564 min_lr: 0.007564 loss: 3.8992 (3.7023) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [61] Total time: 0:01:45 (0.6747 s / it) Averaged stats: lr: 0.007564 min_lr: 0.007564 loss: 3.8992 (3.6765) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.4131 (1.4131) acc1: 75.0000 (75.0000) acc5: 93.0990 (93.0990) time: 6.9415 data: 6.7041 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5090 (1.5322) acc1: 70.7031 (68.8640) acc5: 91.0156 (89.0880) time: 1.5345 data: 1.3409 max mem: 55573 Test: Total time: 0:00:07 (1.5783 s / it) * Acc@1 69.228 Acc@5 89.492 loss 1.548 Accuracy of the model on the 50000 test images: 69.2% Max accuracy: 69.23% Test: [0/5] eta: 0:00:33 loss: 6.9825 (6.9825) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.6625 data: 6.4259 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.1899 (7.1504) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.4781 data: 1.2853 max mem: 55573 Test: Total time: 0:00:07 (1.4940 s / it) * Acc@1 0.100 Acc@5 0.502 loss 7.087 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [62] [ 0/156] eta: 0:32:30 lr: 0.007564 min_lr: 0.007564 loss: 3.3726 (3.3726) weight_decay: 0.0500 (0.0500) time: 12.5006 data: 8.4870 max mem: 55573 Epoch: [62] [ 10/156] eta: 0:04:05 lr: 0.007563 min_lr: 0.007563 loss: 3.8785 (3.8624) weight_decay: 0.0500 (0.0500) time: 1.6815 data: 0.7719 max mem: 55573 Epoch: [62] [ 20/156] eta: 0:02:38 lr: 0.007561 min_lr: 0.007561 loss: 3.8785 (3.8054) weight_decay: 0.0500 (0.0500) time: 0.5980 data: 0.0004 max mem: 55573 Epoch: [62] [ 30/156] eta: 0:02:03 lr: 0.007560 min_lr: 0.007560 loss: 3.7243 (3.7936) weight_decay: 0.0500 (0.0500) time: 0.5941 data: 0.0004 max mem: 55573 Epoch: [62] [ 40/156] eta: 0:01:42 lr: 0.007559 min_lr: 0.007559 loss: 3.8300 (3.8102) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [62] [ 50/156] eta: 0:01:27 lr: 0.007558 min_lr: 0.007558 loss: 3.8520 (3.7419) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [62] [ 60/156] eta: 0:01:15 lr: 0.007556 min_lr: 0.007556 loss: 3.8488 (3.7564) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [62] [ 70/156] eta: 0:01:05 lr: 0.007555 min_lr: 0.007555 loss: 3.8602 (3.7422) weight_decay: 0.0500 (0.0500) time: 0.5883 data: 0.0004 max mem: 55573 Epoch: [62] [ 80/156] eta: 0:00:56 lr: 0.007554 min_lr: 0.007554 loss: 3.6109 (3.7188) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [62] [ 90/156] eta: 0:00:47 lr: 0.007552 min_lr: 0.007552 loss: 3.7314 (3.7192) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [62] [100/156] eta: 0:00:39 lr: 0.007551 min_lr: 0.007551 loss: 3.6121 (3.6913) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [62] [110/156] eta: 0:00:32 lr: 0.007550 min_lr: 0.007550 loss: 3.6121 (3.6853) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [62] [120/156] eta: 0:00:24 lr: 0.007548 min_lr: 0.007548 loss: 3.7891 (3.6880) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [62] [130/156] eta: 0:00:17 lr: 0.007547 min_lr: 0.007547 loss: 3.6984 (3.6842) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0010 max mem: 55573 Epoch: [62] [140/156] eta: 0:00:10 lr: 0.007546 min_lr: 0.007546 loss: 3.6942 (3.6773) weight_decay: 0.0500 (0.0500) time: 0.5868 data: 0.0008 max mem: 55573 Epoch: [62] [150/156] eta: 0:00:04 lr: 0.007544 min_lr: 0.007544 loss: 3.8642 (3.6778) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [62] [155/156] eta: 0:00:00 lr: 0.007544 min_lr: 0.007544 loss: 3.8642 (3.6850) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [62] Total time: 0:01:45 (0.6735 s / it) Averaged stats: lr: 0.007544 min_lr: 0.007544 loss: 3.8642 (3.6696) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.4726 (1.4726) acc1: 72.9167 (72.9167) acc5: 92.4479 (92.4479) time: 7.2107 data: 6.9732 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5584 (1.6441) acc1: 70.3125 (67.2640) acc5: 92.4479 (89.1520) time: 1.5886 data: 1.3947 max mem: 55573 Test: Total time: 0:00:08 (1.6336 s / it) * Acc@1 68.172 Acc@5 88.904 loss 1.644 Accuracy of the model on the 50000 test images: 68.2% Max accuracy: 69.23% Test: [0/5] eta: 0:00:34 loss: 6.9964 (6.9964) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9454 data: 6.7094 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.2121 (7.1699) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5343 data: 1.3420 max mem: 55573 Test: Total time: 0:00:07 (1.5563 s / it) * Acc@1 0.100 Acc@5 0.510 loss 7.103 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [63] [ 0/156] eta: 0:35:17 lr: 0.007543 min_lr: 0.007543 loss: 2.6704 (2.6704) weight_decay: 0.0500 (0.0500) time: 13.5762 data: 10.6600 max mem: 55573 Epoch: [63] [ 10/156] eta: 0:04:26 lr: 0.007542 min_lr: 0.007542 loss: 3.4398 (3.5437) weight_decay: 0.0500 (0.0500) time: 1.8262 data: 0.9695 max mem: 55573 Epoch: [63] [ 20/156] eta: 0:02:48 lr: 0.007541 min_lr: 0.007541 loss: 3.5610 (3.5539) weight_decay: 0.0500 (0.0500) time: 0.6210 data: 0.0005 max mem: 55573 Epoch: [63] [ 30/156] eta: 0:02:09 lr: 0.007539 min_lr: 0.007539 loss: 3.5610 (3.5082) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [63] [ 40/156] eta: 0:01:47 lr: 0.007538 min_lr: 0.007538 loss: 3.6408 (3.5547) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [63] [ 50/156] eta: 0:01:30 lr: 0.007537 min_lr: 0.007537 loss: 3.6408 (3.5628) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [63] [ 60/156] eta: 0:01:18 lr: 0.007535 min_lr: 0.007535 loss: 3.8152 (3.5758) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [63] [ 70/156] eta: 0:01:07 lr: 0.007534 min_lr: 0.007534 loss: 3.8073 (3.5782) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [63] [ 80/156] eta: 0:00:57 lr: 0.007533 min_lr: 0.007533 loss: 3.6082 (3.5827) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [63] [ 90/156] eta: 0:00:48 lr: 0.007531 min_lr: 0.007531 loss: 3.8815 (3.5979) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0005 max mem: 55573 Epoch: [63] [100/156] eta: 0:00:40 lr: 0.007530 min_lr: 0.007530 loss: 3.8225 (3.5946) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [63] [110/156] eta: 0:00:32 lr: 0.007529 min_lr: 0.007529 loss: 3.8133 (3.6033) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [63] [120/156] eta: 0:00:25 lr: 0.007527 min_lr: 0.007527 loss: 3.9610 (3.6280) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [63] [130/156] eta: 0:00:18 lr: 0.007526 min_lr: 0.007526 loss: 3.7798 (3.6096) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0010 max mem: 55573 Epoch: [63] [140/156] eta: 0:00:10 lr: 0.007525 min_lr: 0.007525 loss: 3.5145 (3.6145) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0009 max mem: 55573 Epoch: [63] [150/156] eta: 0:00:04 lr: 0.007523 min_lr: 0.007523 loss: 3.6314 (3.6135) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [63] [155/156] eta: 0:00:00 lr: 0.007523 min_lr: 0.007523 loss: 3.8886 (3.6223) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [63] Total time: 0:01:46 (0.6830 s / it) Averaged stats: lr: 0.007523 min_lr: 0.007523 loss: 3.8886 (3.6536) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.3702 (1.3702) acc1: 73.1771 (73.1771) acc5: 92.5781 (92.5781) time: 7.2350 data: 6.9976 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5157 (1.5342) acc1: 69.0104 (67.3280) acc5: 91.5365 (88.9600) time: 1.5935 data: 1.3996 max mem: 55573 Test: Total time: 0:00:08 (1.6381 s / it) * Acc@1 68.352 Acc@5 89.252 loss 1.526 Accuracy of the model on the 50000 test images: 68.4% Max accuracy: 69.23% Test: [0/5] eta: 0:00:36 loss: 7.0108 (7.0108) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.3432 data: 7.1073 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.2357 (7.1907) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.6141 data: 1.4216 max mem: 55573 Test: Total time: 0:00:08 (1.6363 s / it) * Acc@1 0.100 Acc@5 0.512 loss 7.121 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [64] [ 0/156] eta: 0:37:05 lr: 0.007522 min_lr: 0.007522 loss: 3.2708 (3.2708) weight_decay: 0.0500 (0.0500) time: 14.2668 data: 12.6012 max mem: 55573 Epoch: [64] [ 10/156] eta: 0:04:27 lr: 0.007521 min_lr: 0.007521 loss: 3.6016 (3.5085) weight_decay: 0.0500 (0.0500) time: 1.8344 data: 1.1460 max mem: 55573 Epoch: [64] [ 20/156] eta: 0:02:49 lr: 0.007520 min_lr: 0.007520 loss: 3.6194 (3.5699) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [64] [ 30/156] eta: 0:02:10 lr: 0.007518 min_lr: 0.007518 loss: 3.7709 (3.6633) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [64] [ 40/156] eta: 0:01:47 lr: 0.007517 min_lr: 0.007517 loss: 3.8483 (3.6895) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [64] [ 50/156] eta: 0:01:31 lr: 0.007516 min_lr: 0.007516 loss: 4.0223 (3.7458) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [64] [ 60/156] eta: 0:01:18 lr: 0.007514 min_lr: 0.007514 loss: 4.0301 (3.7446) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [64] [ 70/156] eta: 0:01:07 lr: 0.007513 min_lr: 0.007513 loss: 3.7700 (3.7338) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [64] [ 80/156] eta: 0:00:57 lr: 0.007511 min_lr: 0.007511 loss: 3.8461 (3.7574) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [64] [ 90/156] eta: 0:00:48 lr: 0.007510 min_lr: 0.007510 loss: 3.8792 (3.7560) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [64] [100/156] eta: 0:00:40 lr: 0.007509 min_lr: 0.007509 loss: 3.7884 (3.7636) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [64] [110/156] eta: 0:00:32 lr: 0.007507 min_lr: 0.007507 loss: 3.8517 (3.7545) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [64] [120/156] eta: 0:00:25 lr: 0.007506 min_lr: 0.007506 loss: 3.8094 (3.7342) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [64] [130/156] eta: 0:00:18 lr: 0.007505 min_lr: 0.007505 loss: 3.7600 (3.7419) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0010 max mem: 55573 Epoch: [64] [140/156] eta: 0:00:11 lr: 0.007503 min_lr: 0.007503 loss: 3.7813 (3.7423) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0008 max mem: 55573 Epoch: [64] [150/156] eta: 0:00:04 lr: 0.007502 min_lr: 0.007502 loss: 3.6702 (3.7330) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [64] [155/156] eta: 0:00:00 lr: 0.007501 min_lr: 0.007501 loss: 3.6702 (3.7290) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [64] Total time: 0:01:46 (0.6842 s / it) Averaged stats: lr: 0.007501 min_lr: 0.007501 loss: 3.6702 (3.6503) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:37 loss: 1.3487 (1.3487) acc1: 73.4375 (73.4375) acc5: 92.7083 (92.7083) time: 7.5023 data: 7.2649 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4762 (1.4940) acc1: 70.7031 (68.1280) acc5: 91.1458 (89.0880) time: 1.6471 data: 1.4531 max mem: 55573 Test: Total time: 0:00:08 (1.6990 s / it) * Acc@1 68.598 Acc@5 89.282 loss 1.515 Accuracy of the model on the 50000 test images: 68.6% Max accuracy: 69.23% Test: [0/5] eta: 0:00:35 loss: 7.0252 (7.0252) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.0609 data: 6.8247 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.2637 (7.2150) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5575 data: 1.3651 max mem: 55573 Test: Total time: 0:00:07 (1.5793 s / it) * Acc@1 0.100 Acc@5 0.512 loss 7.142 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [65] [ 0/156] eta: 0:34:08 lr: 0.007501 min_lr: 0.007501 loss: 4.0552 (4.0552) weight_decay: 0.0500 (0.0500) time: 13.1284 data: 8.6262 max mem: 55573 Epoch: [65] [ 10/156] eta: 0:04:17 lr: 0.007500 min_lr: 0.007500 loss: 3.4529 (3.5803) weight_decay: 0.0500 (0.0500) time: 1.7665 data: 0.7853 max mem: 55573 Epoch: [65] [ 20/156] eta: 0:02:44 lr: 0.007498 min_lr: 0.007498 loss: 3.4430 (3.5408) weight_decay: 0.0500 (0.0500) time: 0.6111 data: 0.0011 max mem: 55573 Epoch: [65] [ 30/156] eta: 0:02:07 lr: 0.007497 min_lr: 0.007497 loss: 3.7263 (3.6366) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0009 max mem: 55573 Epoch: [65] [ 40/156] eta: 0:01:45 lr: 0.007495 min_lr: 0.007495 loss: 3.9839 (3.7017) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0009 max mem: 55573 Epoch: [65] [ 50/156] eta: 0:01:29 lr: 0.007494 min_lr: 0.007494 loss: 3.8426 (3.6549) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0009 max mem: 55573 Epoch: [65] [ 60/156] eta: 0:01:17 lr: 0.007493 min_lr: 0.007493 loss: 3.7749 (3.6841) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0009 max mem: 55573 Epoch: [65] [ 70/156] eta: 0:01:06 lr: 0.007491 min_lr: 0.007491 loss: 3.9036 (3.6874) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0009 max mem: 55573 Epoch: [65] [ 80/156] eta: 0:00:57 lr: 0.007490 min_lr: 0.007490 loss: 3.6815 (3.6574) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0009 max mem: 55573 Epoch: [65] [ 90/156] eta: 0:00:48 lr: 0.007488 min_lr: 0.007488 loss: 3.4978 (3.6405) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0009 max mem: 55573 Epoch: [65] [100/156] eta: 0:00:40 lr: 0.007487 min_lr: 0.007487 loss: 3.4978 (3.6271) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0009 max mem: 55573 Epoch: [65] [110/156] eta: 0:00:32 lr: 0.007486 min_lr: 0.007486 loss: 3.9659 (3.6748) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0009 max mem: 55573 Epoch: [65] [120/156] eta: 0:00:25 lr: 0.007484 min_lr: 0.007484 loss: 4.0678 (3.6956) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0009 max mem: 55573 Epoch: [65] [130/156] eta: 0:00:17 lr: 0.007483 min_lr: 0.007483 loss: 3.9444 (3.7007) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0020 max mem: 55573 Epoch: [65] [140/156] eta: 0:00:10 lr: 0.007481 min_lr: 0.007481 loss: 3.8485 (3.6940) weight_decay: 0.0500 (0.0500) time: 0.5853 data: 0.0016 max mem: 55573 Epoch: [65] [150/156] eta: 0:00:04 lr: 0.007480 min_lr: 0.007480 loss: 3.8485 (3.6983) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [65] [155/156] eta: 0:00:00 lr: 0.007479 min_lr: 0.007479 loss: 3.8897 (3.6958) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [65] Total time: 0:01:45 (0.6791 s / it) Averaged stats: lr: 0.007479 min_lr: 0.007479 loss: 3.8897 (3.6613) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3896 (1.3896) acc1: 72.9167 (72.9167) acc5: 92.0573 (92.0573) time: 7.0447 data: 6.8068 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4506 (1.4701) acc1: 70.1823 (67.7440) acc5: 92.0573 (89.4080) time: 1.5557 data: 1.3616 max mem: 55573 Test: Total time: 0:00:07 (1.5974 s / it) * Acc@1 69.020 Acc@5 89.550 loss 1.461 Accuracy of the model on the 50000 test images: 69.0% Max accuracy: 69.23% Test: [0/5] eta: 0:00:35 loss: 7.0433 (7.0433) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.1731 data: 6.9370 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.2927 (7.2418) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5799 data: 1.3875 max mem: 55573 Test: Total time: 0:00:07 (1.5987 s / it) * Acc@1 0.100 Acc@5 0.514 loss 7.165 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [66] [ 0/156] eta: 0:37:50 lr: 0.007479 min_lr: 0.007479 loss: 3.4993 (3.4993) weight_decay: 0.0500 (0.0500) time: 14.5570 data: 10.3143 max mem: 55573 Epoch: [66] [ 10/156] eta: 0:04:34 lr: 0.007478 min_lr: 0.007478 loss: 4.0905 (3.9692) weight_decay: 0.0500 (0.0500) time: 1.8817 data: 0.9381 max mem: 55573 Epoch: [66] [ 20/156] eta: 0:02:52 lr: 0.007476 min_lr: 0.007476 loss: 3.7041 (3.6499) weight_decay: 0.0500 (0.0500) time: 0.6023 data: 0.0004 max mem: 55573 Epoch: [66] [ 30/156] eta: 0:02:12 lr: 0.007475 min_lr: 0.007475 loss: 3.4754 (3.6424) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [66] [ 40/156] eta: 0:01:48 lr: 0.007473 min_lr: 0.007473 loss: 3.6056 (3.6548) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [66] [ 50/156] eta: 0:01:32 lr: 0.007472 min_lr: 0.007472 loss: 3.7541 (3.6970) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [66] [ 60/156] eta: 0:01:19 lr: 0.007470 min_lr: 0.007470 loss: 3.7883 (3.7106) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [66] [ 70/156] eta: 0:01:08 lr: 0.007469 min_lr: 0.007469 loss: 3.7688 (3.7141) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [66] [ 80/156] eta: 0:00:58 lr: 0.007468 min_lr: 0.007468 loss: 3.6345 (3.6799) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [66] [ 90/156] eta: 0:00:49 lr: 0.007466 min_lr: 0.007466 loss: 3.2086 (3.6527) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [66] [100/156] eta: 0:00:40 lr: 0.007465 min_lr: 0.007465 loss: 3.7385 (3.6620) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [66] [110/156] eta: 0:00:33 lr: 0.007463 min_lr: 0.007463 loss: 3.6709 (3.6339) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [66] [120/156] eta: 0:00:25 lr: 0.007462 min_lr: 0.007462 loss: 3.3792 (3.6451) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [66] [130/156] eta: 0:00:18 lr: 0.007460 min_lr: 0.007460 loss: 3.7873 (3.6419) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0010 max mem: 55573 Epoch: [66] [140/156] eta: 0:00:11 lr: 0.007459 min_lr: 0.007459 loss: 3.5415 (3.6321) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0009 max mem: 55573 Epoch: [66] [150/156] eta: 0:00:04 lr: 0.007458 min_lr: 0.007458 loss: 3.5415 (3.6274) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [66] [155/156] eta: 0:00:00 lr: 0.007457 min_lr: 0.007457 loss: 3.6145 (3.6327) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [66] Total time: 0:01:47 (0.6876 s / it) Averaged stats: lr: 0.007457 min_lr: 0.007457 loss: 3.6145 (3.6554) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.2986 (1.2986) acc1: 73.1771 (73.1771) acc5: 91.9271 (91.9271) time: 6.8549 data: 6.6175 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3871 (1.4808) acc1: 72.7865 (67.9040) acc5: 91.6667 (88.3840) time: 1.5175 data: 1.3236 max mem: 55573 Test: Total time: 0:00:07 (1.5615 s / it) * Acc@1 68.326 Acc@5 88.936 loss 1.462 Accuracy of the model on the 50000 test images: 68.3% Max accuracy: 69.23% Test: [0/5] eta: 0:00:36 loss: 7.0625 (7.0625) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2123 data: 6.9762 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.3189 (7.2669) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5760) time: 1.5878 data: 1.3953 max mem: 55573 Test: Total time: 0:00:08 (1.6059 s / it) * Acc@1 0.100 Acc@5 0.516 loss 7.187 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [67] [ 0/156] eta: 0:39:05 lr: 0.007457 min_lr: 0.007457 loss: 3.4193 (3.4193) weight_decay: 0.0500 (0.0500) time: 15.0324 data: 10.1103 max mem: 55573 Epoch: [67] [ 10/156] eta: 0:04:37 lr: 0.007455 min_lr: 0.007455 loss: 3.4514 (3.4561) weight_decay: 0.0500 (0.0500) time: 1.9028 data: 0.9194 max mem: 55573 Epoch: [67] [ 20/156] eta: 0:02:53 lr: 0.007454 min_lr: 0.007454 loss: 3.8548 (3.6051) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0003 max mem: 55573 Epoch: [67] [ 30/156] eta: 0:02:13 lr: 0.007452 min_lr: 0.007452 loss: 3.8660 (3.6337) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0003 max mem: 55573 Epoch: [67] [ 40/156] eta: 0:01:49 lr: 0.007451 min_lr: 0.007451 loss: 3.8660 (3.6701) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [67] [ 50/156] eta: 0:01:32 lr: 0.007449 min_lr: 0.007449 loss: 3.8242 (3.6475) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [67] [ 60/156] eta: 0:01:19 lr: 0.007448 min_lr: 0.007448 loss: 3.8290 (3.6792) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [67] [ 70/156] eta: 0:01:08 lr: 0.007446 min_lr: 0.007446 loss: 3.7197 (3.6522) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [67] [ 80/156] eta: 0:00:58 lr: 0.007445 min_lr: 0.007445 loss: 3.4640 (3.6679) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [67] [ 90/156] eta: 0:00:49 lr: 0.007444 min_lr: 0.007444 loss: 3.6125 (3.6507) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [67] [100/156] eta: 0:00:41 lr: 0.007442 min_lr: 0.007442 loss: 3.8278 (3.6561) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [67] [110/156] eta: 0:00:33 lr: 0.007441 min_lr: 0.007441 loss: 3.3134 (3.6231) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [67] [120/156] eta: 0:00:25 lr: 0.007439 min_lr: 0.007439 loss: 3.2840 (3.6078) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [67] [130/156] eta: 0:00:18 lr: 0.007438 min_lr: 0.007438 loss: 3.7564 (3.6281) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0010 max mem: 55573 Epoch: [67] [140/156] eta: 0:00:11 lr: 0.007436 min_lr: 0.007436 loss: 3.6696 (3.6153) weight_decay: 0.0500 (0.0500) time: 0.5864 data: 0.0008 max mem: 55573 Epoch: [67] [150/156] eta: 0:00:04 lr: 0.007435 min_lr: 0.007435 loss: 3.5409 (3.6044) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [67] [155/156] eta: 0:00:00 lr: 0.007434 min_lr: 0.007434 loss: 3.5409 (3.6029) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [67] Total time: 0:01:47 (0.6884 s / it) Averaged stats: lr: 0.007434 min_lr: 0.007434 loss: 3.5409 (3.6533) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2583 (1.2583) acc1: 72.9167 (72.9167) acc5: 92.5781 (92.5781) time: 7.1167 data: 6.8793 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3720 (1.3659) acc1: 70.0521 (67.8720) acc5: 91.2760 (89.7280) time: 1.5701 data: 1.3760 max mem: 55573 Test: Total time: 0:00:08 (1.6109 s / it) * Acc@1 69.668 Acc@5 89.988 loss 1.356 Accuracy of the model on the 50000 test images: 69.7% Max accuracy: 69.67% Test: [0/5] eta: 0:00:34 loss: 7.0829 (7.0829) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9900 data: 6.7522 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.3412 (7.2931) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5440) time: 1.5434 data: 1.3506 max mem: 55573 Test: Total time: 0:00:07 (1.5600 s / it) * Acc@1 0.100 Acc@5 0.518 loss 7.211 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [68] [ 0/156] eta: 0:34:07 lr: 0.007434 min_lr: 0.007434 loss: 3.9372 (3.9372) weight_decay: 0.0500 (0.0500) time: 13.1248 data: 11.8134 max mem: 55573 Epoch: [68] [ 10/156] eta: 0:04:17 lr: 0.007432 min_lr: 0.007432 loss: 3.5956 (3.5638) weight_decay: 0.0500 (0.0500) time: 1.7654 data: 1.0743 max mem: 55573 Epoch: [68] [ 20/156] eta: 0:02:44 lr: 0.007431 min_lr: 0.007431 loss: 3.5734 (3.5503) weight_decay: 0.0500 (0.0500) time: 0.6101 data: 0.0004 max mem: 55573 Epoch: [68] [ 30/156] eta: 0:02:06 lr: 0.007429 min_lr: 0.007429 loss: 3.8187 (3.7066) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [68] [ 40/156] eta: 0:01:45 lr: 0.007428 min_lr: 0.007428 loss: 3.8716 (3.6616) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [68] [ 50/156] eta: 0:01:29 lr: 0.007426 min_lr: 0.007426 loss: 3.5019 (3.6326) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [68] [ 60/156] eta: 0:01:17 lr: 0.007425 min_lr: 0.007425 loss: 3.5904 (3.6390) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [68] [ 70/156] eta: 0:01:06 lr: 0.007423 min_lr: 0.007423 loss: 3.7320 (3.6400) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [68] [ 80/156] eta: 0:00:57 lr: 0.007422 min_lr: 0.007422 loss: 3.8265 (3.6770) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [68] [ 90/156] eta: 0:00:48 lr: 0.007421 min_lr: 0.007421 loss: 3.7936 (3.6753) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [68] [100/156] eta: 0:00:40 lr: 0.007419 min_lr: 0.007419 loss: 3.6263 (3.6810) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [68] [110/156] eta: 0:00:32 lr: 0.007418 min_lr: 0.007418 loss: 3.8203 (3.6671) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [68] [120/156] eta: 0:00:25 lr: 0.007416 min_lr: 0.007416 loss: 3.8180 (3.6582) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [68] [130/156] eta: 0:00:17 lr: 0.007415 min_lr: 0.007415 loss: 3.8515 (3.6748) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0010 max mem: 55573 Epoch: [68] [140/156] eta: 0:00:10 lr: 0.007413 min_lr: 0.007413 loss: 3.9212 (3.6764) weight_decay: 0.0500 (0.0500) time: 0.5874 data: 0.0009 max mem: 55573 Epoch: [68] [150/156] eta: 0:00:04 lr: 0.007412 min_lr: 0.007412 loss: 3.6560 (3.6777) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [68] [155/156] eta: 0:00:00 lr: 0.007411 min_lr: 0.007411 loss: 3.5081 (3.6681) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [68] Total time: 0:01:46 (0.6796 s / it) Averaged stats: lr: 0.007411 min_lr: 0.007411 loss: 3.5081 (3.6588) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1781 (1.1781) acc1: 74.4792 (74.4792) acc5: 93.7500 (93.7500) time: 6.8750 data: 6.6375 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3242 (1.3553) acc1: 73.0469 (69.7600) acc5: 92.4479 (90.1120) time: 1.5216 data: 1.3276 max mem: 55573 Test: Total time: 0:00:07 (1.5688 s / it) * Acc@1 70.164 Acc@5 90.088 loss 1.345 Accuracy of the model on the 50000 test images: 70.2% Max accuracy: 70.16% Test: [0/5] eta: 0:00:36 loss: 7.1014 (7.1014) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2202 data: 6.9836 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.3647 (7.3195) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5120) time: 1.5896 data: 1.3968 max mem: 55573 Test: Total time: 0:00:08 (1.6095 s / it) * Acc@1 0.100 Acc@5 0.518 loss 7.235 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [69] [ 0/156] eta: 0:30:56 lr: 0.007411 min_lr: 0.007411 loss: 3.3690 (3.3690) weight_decay: 0.0500 (0.0500) time: 11.9022 data: 10.5920 max mem: 55573 Epoch: [69] [ 10/156] eta: 0:04:18 lr: 0.007409 min_lr: 0.007409 loss: 3.7891 (3.7014) weight_decay: 0.0500 (0.0500) time: 1.7718 data: 0.9633 max mem: 55573 Epoch: [69] [ 20/156] eta: 0:02:44 lr: 0.007408 min_lr: 0.007408 loss: 3.7549 (3.5930) weight_decay: 0.0500 (0.0500) time: 0.6760 data: 0.0004 max mem: 55573 Epoch: [69] [ 30/156] eta: 0:02:07 lr: 0.007406 min_lr: 0.007406 loss: 3.7549 (3.6275) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [69] [ 40/156] eta: 0:01:45 lr: 0.007405 min_lr: 0.007405 loss: 3.7486 (3.5899) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [69] [ 50/156] eta: 0:01:29 lr: 0.007403 min_lr: 0.007403 loss: 3.6994 (3.6072) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [69] [ 60/156] eta: 0:01:17 lr: 0.007402 min_lr: 0.007402 loss: 3.6570 (3.5762) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [69] [ 70/156] eta: 0:01:06 lr: 0.007400 min_lr: 0.007400 loss: 3.6656 (3.5755) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [69] [ 80/156] eta: 0:00:57 lr: 0.007399 min_lr: 0.007399 loss: 3.8029 (3.6001) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [69] [ 90/156] eta: 0:00:48 lr: 0.007397 min_lr: 0.007397 loss: 3.9576 (3.6323) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [69] [100/156] eta: 0:00:40 lr: 0.007396 min_lr: 0.007396 loss: 3.8362 (3.6478) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [69] [110/156] eta: 0:00:32 lr: 0.007394 min_lr: 0.007394 loss: 3.7992 (3.6568) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [69] [120/156] eta: 0:00:25 lr: 0.007392 min_lr: 0.007392 loss: 3.8058 (3.6567) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [69] [130/156] eta: 0:00:17 lr: 0.007391 min_lr: 0.007391 loss: 3.7921 (3.6518) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0010 max mem: 55573 Epoch: [69] [140/156] eta: 0:00:10 lr: 0.007389 min_lr: 0.007389 loss: 3.7412 (3.6411) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0009 max mem: 55573 Epoch: [69] [150/156] eta: 0:00:04 lr: 0.007388 min_lr: 0.007388 loss: 3.7412 (3.6421) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [69] [155/156] eta: 0:00:00 lr: 0.007387 min_lr: 0.007387 loss: 3.7412 (3.6272) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [69] Total time: 0:01:45 (0.6792 s / it) Averaged stats: lr: 0.007387 min_lr: 0.007387 loss: 3.7412 (3.6601) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.2034 (1.2034) acc1: 74.7396 (74.7396) acc5: 93.2292 (93.2292) time: 7.2372 data: 6.9998 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3396 (1.4097) acc1: 72.1354 (68.8960) acc5: 91.2760 (89.8240) time: 1.5939 data: 1.4000 max mem: 55573 Test: Total time: 0:00:08 (1.6325 s / it) * Acc@1 69.762 Acc@5 89.714 loss 1.402 Accuracy of the model on the 50000 test images: 69.8% Max accuracy: 70.16% Test: [0/5] eta: 0:00:34 loss: 7.1240 (7.1240) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9874 data: 6.7512 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.3882 (7.3462) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5428 data: 1.3503 max mem: 55573 Test: Total time: 0:00:07 (1.5670 s / it) * Acc@1 0.100 Acc@5 0.510 loss 7.260 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [70] [ 0/156] eta: 0:35:43 lr: 0.007387 min_lr: 0.007387 loss: 3.1363 (3.1363) weight_decay: 0.0500 (0.0500) time: 13.7389 data: 8.8863 max mem: 55573 Epoch: [70] [ 10/156] eta: 0:04:20 lr: 0.007385 min_lr: 0.007385 loss: 3.3972 (3.4653) weight_decay: 0.0500 (0.0500) time: 1.7868 data: 0.8082 max mem: 55573 Epoch: [70] [ 20/156] eta: 0:02:45 lr: 0.007384 min_lr: 0.007384 loss: 3.4557 (3.5029) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [70] [ 30/156] eta: 0:02:07 lr: 0.007382 min_lr: 0.007382 loss: 3.7200 (3.6215) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0003 max mem: 55573 Epoch: [70] [ 40/156] eta: 0:01:45 lr: 0.007381 min_lr: 0.007381 loss: 3.7495 (3.6051) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [70] [ 50/156] eta: 0:01:29 lr: 0.007379 min_lr: 0.007379 loss: 3.7495 (3.6309) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [70] [ 60/156] eta: 0:01:17 lr: 0.007378 min_lr: 0.007378 loss: 3.8747 (3.6595) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [70] [ 70/156] eta: 0:01:06 lr: 0.007376 min_lr: 0.007376 loss: 3.6878 (3.6230) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [70] [ 80/156] eta: 0:00:57 lr: 0.007375 min_lr: 0.007375 loss: 3.6878 (3.6365) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [70] [ 90/156] eta: 0:00:48 lr: 0.007373 min_lr: 0.007373 loss: 3.7876 (3.6618) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [70] [100/156] eta: 0:00:40 lr: 0.007372 min_lr: 0.007372 loss: 3.8509 (3.6805) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [70] [110/156] eta: 0:00:32 lr: 0.007370 min_lr: 0.007370 loss: 3.7404 (3.6832) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [70] [120/156] eta: 0:00:25 lr: 0.007368 min_lr: 0.007368 loss: 3.7003 (3.6728) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [70] [130/156] eta: 0:00:17 lr: 0.007367 min_lr: 0.007367 loss: 3.5681 (3.6745) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0010 max mem: 55573 Epoch: [70] [140/156] eta: 0:00:10 lr: 0.007365 min_lr: 0.007365 loss: 3.7107 (3.6642) weight_decay: 0.0500 (0.0500) time: 0.5848 data: 0.0008 max mem: 55573 Epoch: [70] [150/156] eta: 0:00:04 lr: 0.007364 min_lr: 0.007364 loss: 3.8821 (3.6754) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [70] [155/156] eta: 0:00:00 lr: 0.007363 min_lr: 0.007363 loss: 3.8346 (3.6632) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [70] Total time: 0:01:46 (0.6807 s / it) Averaged stats: lr: 0.007363 min_lr: 0.007363 loss: 3.8346 (3.6258) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2324 (1.2324) acc1: 74.7396 (74.7396) acc5: 92.1875 (92.1875) time: 7.0085 data: 6.7714 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4449 (1.4308) acc1: 70.5729 (68.2560) acc5: 90.7552 (89.1840) time: 1.5482 data: 1.3544 max mem: 55573 Test: Total time: 0:00:07 (1.5890 s / it) * Acc@1 69.432 Acc@5 89.758 loss 1.428 Accuracy of the model on the 50000 test images: 69.4% Max accuracy: 70.16% Test: [0/5] eta: 0:00:37 loss: 7.1504 (7.1504) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.4552 data: 7.2192 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4115 (7.3761) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.6364 data: 1.4439 max mem: 55573 Test: Total time: 0:00:08 (1.6559 s / it) * Acc@1 0.100 Acc@5 0.508 loss 7.288 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [71] [ 0/156] eta: 0:34:39 lr: 0.007363 min_lr: 0.007363 loss: 3.9375 (3.9375) weight_decay: 0.0500 (0.0500) time: 13.3291 data: 11.2247 max mem: 55573 Epoch: [71] [ 10/156] eta: 0:04:22 lr: 0.007361 min_lr: 0.007361 loss: 3.7503 (3.5207) weight_decay: 0.0500 (0.0500) time: 1.8002 data: 1.0208 max mem: 55573 Epoch: [71] [ 20/156] eta: 0:02:46 lr: 0.007360 min_lr: 0.007360 loss: 3.7820 (3.6316) weight_decay: 0.0500 (0.0500) time: 0.6192 data: 0.0003 max mem: 55573 Epoch: [71] [ 30/156] eta: 0:02:08 lr: 0.007358 min_lr: 0.007358 loss: 3.7172 (3.5813) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0003 max mem: 55573 Epoch: [71] [ 40/156] eta: 0:01:46 lr: 0.007357 min_lr: 0.007357 loss: 3.6378 (3.6118) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [71] [ 50/156] eta: 0:01:30 lr: 0.007355 min_lr: 0.007355 loss: 3.7129 (3.6034) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [71] [ 60/156] eta: 0:01:17 lr: 0.007354 min_lr: 0.007354 loss: 3.7168 (3.6139) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [71] [ 70/156] eta: 0:01:06 lr: 0.007352 min_lr: 0.007352 loss: 3.8591 (3.6512) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [71] [ 80/156] eta: 0:00:57 lr: 0.007350 min_lr: 0.007350 loss: 3.6917 (3.6457) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [71] [ 90/156] eta: 0:00:48 lr: 0.007349 min_lr: 0.007349 loss: 3.5241 (3.6484) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [71] [100/156] eta: 0:00:40 lr: 0.007347 min_lr: 0.007347 loss: 3.6010 (3.6259) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [71] [110/156] eta: 0:00:32 lr: 0.007346 min_lr: 0.007346 loss: 3.6010 (3.6200) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [71] [120/156] eta: 0:00:25 lr: 0.007344 min_lr: 0.007344 loss: 3.6326 (3.6164) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [71] [130/156] eta: 0:00:17 lr: 0.007342 min_lr: 0.007342 loss: 3.7745 (3.6081) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0010 max mem: 55573 Epoch: [71] [140/156] eta: 0:00:10 lr: 0.007341 min_lr: 0.007341 loss: 3.7977 (3.6190) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0009 max mem: 55573 Epoch: [71] [150/156] eta: 0:00:04 lr: 0.007339 min_lr: 0.007339 loss: 3.6708 (3.6113) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [71] [155/156] eta: 0:00:00 lr: 0.007339 min_lr: 0.007339 loss: 3.8617 (3.6232) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [71] Total time: 0:01:46 (0.6812 s / it) Averaged stats: lr: 0.007339 min_lr: 0.007339 loss: 3.8617 (3.6101) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.5641 (1.5641) acc1: 72.9167 (72.9167) acc5: 91.4062 (91.4062) time: 7.0367 data: 6.7962 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5641 (1.6381) acc1: 71.0938 (68.0640) acc5: 91.4062 (88.9920) time: 1.5527 data: 1.3593 max mem: 55573 Test: Total time: 0:00:08 (1.6014 s / it) * Acc@1 69.288 Acc@5 89.550 loss 1.610 Accuracy of the model on the 50000 test images: 69.3% Max accuracy: 70.16% Test: [0/5] eta: 0:00:36 loss: 7.1715 (7.1715) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2719 data: 7.0360 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4349 (7.4035) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.4800) time: 1.5997 data: 1.4073 max mem: 55573 Test: Total time: 0:00:08 (1.6176 s / it) * Acc@1 0.100 Acc@5 0.514 loss 7.314 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [72] [ 0/156] eta: 0:37:53 lr: 0.007338 min_lr: 0.007338 loss: 3.9429 (3.9429) weight_decay: 0.0500 (0.0500) time: 14.5738 data: 10.0827 max mem: 55573 Epoch: [72] [ 10/156] eta: 0:04:32 lr: 0.007337 min_lr: 0.007337 loss: 3.6694 (3.4890) weight_decay: 0.0500 (0.0500) time: 1.8632 data: 0.9170 max mem: 55573 Epoch: [72] [ 20/156] eta: 0:02:50 lr: 0.007335 min_lr: 0.007335 loss: 3.4105 (3.4939) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [72] [ 30/156] eta: 0:02:11 lr: 0.007334 min_lr: 0.007334 loss: 3.5831 (3.4913) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [72] [ 40/156] eta: 0:01:48 lr: 0.007332 min_lr: 0.007332 loss: 3.6773 (3.5289) weight_decay: 0.0500 (0.0500) time: 0.5939 data: 0.0005 max mem: 55573 Epoch: [72] [ 50/156] eta: 0:01:31 lr: 0.007330 min_lr: 0.007330 loss: 3.6025 (3.4939) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0004 max mem: 55573 Epoch: [72] [ 60/156] eta: 0:01:18 lr: 0.007329 min_lr: 0.007329 loss: 3.6729 (3.5421) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [72] [ 70/156] eta: 0:01:07 lr: 0.007327 min_lr: 0.007327 loss: 3.8317 (3.5678) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [72] [ 80/156] eta: 0:00:58 lr: 0.007326 min_lr: 0.007326 loss: 3.8079 (3.5795) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [72] [ 90/156] eta: 0:00:49 lr: 0.007324 min_lr: 0.007324 loss: 3.7313 (3.5868) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [72] [100/156] eta: 0:00:40 lr: 0.007322 min_lr: 0.007322 loss: 3.6661 (3.5895) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [72] [110/156] eta: 0:00:32 lr: 0.007321 min_lr: 0.007321 loss: 3.8003 (3.6130) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [72] [120/156] eta: 0:00:25 lr: 0.007319 min_lr: 0.007319 loss: 3.8706 (3.6334) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [72] [130/156] eta: 0:00:18 lr: 0.007318 min_lr: 0.007318 loss: 3.8425 (3.6292) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0021 max mem: 55573 Epoch: [72] [140/156] eta: 0:00:11 lr: 0.007316 min_lr: 0.007316 loss: 3.8425 (3.6357) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0020 max mem: 55573 Epoch: [72] [150/156] eta: 0:00:04 lr: 0.007314 min_lr: 0.007314 loss: 3.8448 (3.6357) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [72] [155/156] eta: 0:00:00 lr: 0.007314 min_lr: 0.007314 loss: 3.6331 (3.6335) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [72] Total time: 0:01:47 (0.6861 s / it) Averaged stats: lr: 0.007314 min_lr: 0.007314 loss: 3.6331 (3.6244) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2095 (1.2095) acc1: 76.0417 (76.0417) acc5: 93.6198 (93.6198) time: 7.1556 data: 6.9182 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2989 (1.3752) acc1: 72.2656 (70.4640) acc5: 93.0990 (90.6240) time: 1.5776 data: 1.3837 max mem: 55573 Test: Total time: 0:00:08 (1.6237 s / it) * Acc@1 70.368 Acc@5 90.410 loss 1.376 Accuracy of the model on the 50000 test images: 70.4% Max accuracy: 70.37% Test: [0/5] eta: 0:00:34 loss: 7.1984 (7.1984) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.8916 data: 6.6555 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4578 (7.4325) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5120) time: 1.5236 data: 1.3312 max mem: 55573 Test: Total time: 0:00:07 (1.5441 s / it) * Acc@1 0.114 Acc@5 0.522 loss 7.342 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.11% Epoch: [73] [ 0/156] eta: 0:32:14 lr: 0.007313 min_lr: 0.007313 loss: 3.7661 (3.7661) weight_decay: 0.0500 (0.0500) time: 12.4000 data: 11.1616 max mem: 55573 Epoch: [73] [ 10/156] eta: 0:04:06 lr: 0.007312 min_lr: 0.007312 loss: 3.7661 (3.6184) weight_decay: 0.0500 (0.0500) time: 1.6896 data: 1.0151 max mem: 55573 Epoch: [73] [ 20/156] eta: 0:02:38 lr: 0.007310 min_lr: 0.007310 loss: 3.6157 (3.5254) weight_decay: 0.0500 (0.0500) time: 0.6053 data: 0.0004 max mem: 55573 Epoch: [73] [ 30/156] eta: 0:02:03 lr: 0.007309 min_lr: 0.007309 loss: 3.8138 (3.6614) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [73] [ 40/156] eta: 0:01:42 lr: 0.007307 min_lr: 0.007307 loss: 3.8138 (3.6243) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [73] [ 50/156] eta: 0:01:27 lr: 0.007305 min_lr: 0.007305 loss: 3.7385 (3.6529) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [73] [ 60/156] eta: 0:01:15 lr: 0.007304 min_lr: 0.007304 loss: 3.7737 (3.6417) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [73] [ 70/156] eta: 0:01:05 lr: 0.007302 min_lr: 0.007302 loss: 3.8307 (3.6504) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [73] [ 80/156] eta: 0:00:56 lr: 0.007300 min_lr: 0.007300 loss: 3.8164 (3.6637) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0004 max mem: 55573 Epoch: [73] [ 90/156] eta: 0:00:47 lr: 0.007299 min_lr: 0.007299 loss: 3.7101 (3.6507) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [73] [100/156] eta: 0:00:39 lr: 0.007297 min_lr: 0.007297 loss: 3.9606 (3.6804) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [73] [110/156] eta: 0:00:32 lr: 0.007296 min_lr: 0.007296 loss: 3.9916 (3.7047) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [73] [120/156] eta: 0:00:24 lr: 0.007294 min_lr: 0.007294 loss: 3.9677 (3.7217) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [73] [130/156] eta: 0:00:17 lr: 0.007292 min_lr: 0.007292 loss: 3.8582 (3.7159) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [73] [140/156] eta: 0:00:10 lr: 0.007291 min_lr: 0.007291 loss: 3.5597 (3.6933) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0009 max mem: 55573 Epoch: [73] [150/156] eta: 0:00:04 lr: 0.007289 min_lr: 0.007289 loss: 3.5597 (3.6868) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [73] [155/156] eta: 0:00:00 lr: 0.007288 min_lr: 0.007288 loss: 3.5597 (3.6838) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [73] Total time: 0:01:44 (0.6730 s / it) Averaged stats: lr: 0.007288 min_lr: 0.007288 loss: 3.5597 (3.6356) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.2743 (1.2743) acc1: 75.2604 (75.2604) acc5: 94.0104 (94.0104) time: 6.8011 data: 6.5636 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3073 (1.3654) acc1: 73.1771 (70.6240) acc5: 93.8802 (90.8480) time: 1.5067 data: 1.3128 max mem: 55573 Test: Total time: 0:00:07 (1.5455 s / it) * Acc@1 70.196 Acc@5 90.460 loss 1.376 Accuracy of the model on the 50000 test images: 70.2% Max accuracy: 70.37% Test: [0/5] eta: 0:00:35 loss: 7.2225 (7.2225) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.1891 data: 6.9531 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4778 (7.4600) acc1: 0.0000 (0.1600) acc5: 0.0000 (0.5440) time: 1.5831 data: 1.3907 max mem: 55573 Test: Total time: 0:00:07 (1.5993 s / it) * Acc@1 0.110 Acc@5 0.524 loss 7.369 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [74] [ 0/156] eta: 0:38:19 lr: 0.007288 min_lr: 0.007288 loss: 4.3539 (4.3539) weight_decay: 0.0500 (0.0500) time: 14.7412 data: 9.8608 max mem: 55573 Epoch: [74] [ 10/156] eta: 0:04:37 lr: 0.007286 min_lr: 0.007286 loss: 3.9037 (3.8512) weight_decay: 0.0500 (0.0500) time: 1.9011 data: 0.8969 max mem: 55573 Epoch: [74] [ 20/156] eta: 0:02:53 lr: 0.007285 min_lr: 0.007285 loss: 3.6509 (3.6553) weight_decay: 0.0500 (0.0500) time: 0.6041 data: 0.0004 max mem: 55573 Epoch: [74] [ 30/156] eta: 0:02:13 lr: 0.007283 min_lr: 0.007283 loss: 3.4550 (3.6216) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [74] [ 40/156] eta: 0:01:49 lr: 0.007282 min_lr: 0.007282 loss: 3.4550 (3.5391) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [74] [ 50/156] eta: 0:01:32 lr: 0.007280 min_lr: 0.007280 loss: 3.5199 (3.5373) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [74] [ 60/156] eta: 0:01:19 lr: 0.007278 min_lr: 0.007278 loss: 3.6701 (3.5641) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [74] [ 70/156] eta: 0:01:08 lr: 0.007277 min_lr: 0.007277 loss: 3.7158 (3.5687) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [74] [ 80/156] eta: 0:00:58 lr: 0.007275 min_lr: 0.007275 loss: 3.4557 (3.5432) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [74] [ 90/156] eta: 0:00:49 lr: 0.007273 min_lr: 0.007273 loss: 3.4374 (3.5478) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0005 max mem: 55573 Epoch: [74] [100/156] eta: 0:00:41 lr: 0.007272 min_lr: 0.007272 loss: 3.6476 (3.5615) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [74] [110/156] eta: 0:00:33 lr: 0.007270 min_lr: 0.007270 loss: 3.8412 (3.5546) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [74] [120/156] eta: 0:00:25 lr: 0.007268 min_lr: 0.007268 loss: 3.8706 (3.5879) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0004 max mem: 55573 Epoch: [74] [130/156] eta: 0:00:18 lr: 0.007267 min_lr: 0.007267 loss: 3.6029 (3.5750) weight_decay: 0.0500 (0.0500) time: 0.5932 data: 0.0010 max mem: 55573 Epoch: [74] [140/156] eta: 0:00:11 lr: 0.007265 min_lr: 0.007265 loss: 3.4204 (3.5747) weight_decay: 0.0500 (0.0500) time: 0.5876 data: 0.0009 max mem: 55573 Epoch: [74] [150/156] eta: 0:00:04 lr: 0.007263 min_lr: 0.007263 loss: 3.7218 (3.5857) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [74] [155/156] eta: 0:00:00 lr: 0.007263 min_lr: 0.007263 loss: 3.6792 (3.5907) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [74] Total time: 0:01:47 (0.6890 s / it) Averaged stats: lr: 0.007263 min_lr: 0.007263 loss: 3.6792 (3.6126) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2545 (1.2545) acc1: 73.4375 (73.4375) acc5: 92.3177 (92.3177) time: 7.1047 data: 6.8673 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3370 (1.4115) acc1: 73.1771 (68.2560) acc5: 92.3177 (89.9840) time: 1.5676 data: 1.3736 max mem: 55573 Test: Total time: 0:00:08 (1.6092 s / it) * Acc@1 69.432 Acc@5 89.966 loss 1.399 Accuracy of the model on the 50000 test images: 69.4% Max accuracy: 70.37% Test: [0/5] eta: 0:00:33 loss: 7.2387 (7.2387) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.6109 data: 6.3748 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4961 (7.4810) acc1: 0.0000 (0.1280) acc5: 0.0000 (0.5760) time: 1.4675 data: 1.2751 max mem: 55573 Test: Total time: 0:00:07 (1.4862 s / it) * Acc@1 0.114 Acc@5 0.534 loss 7.391 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [75] [ 0/156] eta: 0:36:45 lr: 0.007262 min_lr: 0.007262 loss: 3.9622 (3.9622) weight_decay: 0.0500 (0.0500) time: 14.1351 data: 10.3581 max mem: 55573 Epoch: [75] [ 10/156] eta: 0:04:30 lr: 0.007261 min_lr: 0.007261 loss: 3.8577 (3.6639) weight_decay: 0.0500 (0.0500) time: 1.8517 data: 0.9419 max mem: 55573 Epoch: [75] [ 20/156] eta: 0:02:50 lr: 0.007259 min_lr: 0.007259 loss: 3.4439 (3.4239) weight_decay: 0.0500 (0.0500) time: 0.6071 data: 0.0003 max mem: 55573 Epoch: [75] [ 30/156] eta: 0:02:10 lr: 0.007257 min_lr: 0.007257 loss: 3.2928 (3.4288) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [75] [ 40/156] eta: 0:01:47 lr: 0.007256 min_lr: 0.007256 loss: 3.5321 (3.4534) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [75] [ 50/156] eta: 0:01:31 lr: 0.007254 min_lr: 0.007254 loss: 3.6503 (3.4501) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [75] [ 60/156] eta: 0:01:18 lr: 0.007252 min_lr: 0.007252 loss: 3.6503 (3.4709) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [75] [ 70/156] eta: 0:01:07 lr: 0.007251 min_lr: 0.007251 loss: 3.6665 (3.4936) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [75] [ 80/156] eta: 0:00:57 lr: 0.007249 min_lr: 0.007249 loss: 3.7852 (3.5256) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [75] [ 90/156] eta: 0:00:49 lr: 0.007247 min_lr: 0.007247 loss: 3.6147 (3.5284) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [75] [100/156] eta: 0:00:40 lr: 0.007246 min_lr: 0.007246 loss: 3.5612 (3.5237) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0005 max mem: 55573 Epoch: [75] [110/156] eta: 0:00:32 lr: 0.007244 min_lr: 0.007244 loss: 3.5958 (3.5556) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0005 max mem: 55573 Epoch: [75] [120/156] eta: 0:00:25 lr: 0.007242 min_lr: 0.007242 loss: 3.9157 (3.5614) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [75] [130/156] eta: 0:00:18 lr: 0.007241 min_lr: 0.007241 loss: 3.6506 (3.5600) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0011 max mem: 55573 Epoch: [75] [140/156] eta: 0:00:11 lr: 0.007239 min_lr: 0.007239 loss: 3.5971 (3.5629) weight_decay: 0.0500 (0.0500) time: 0.5853 data: 0.0009 max mem: 55573 Epoch: [75] [150/156] eta: 0:00:04 lr: 0.007237 min_lr: 0.007237 loss: 3.8299 (3.5812) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [75] [155/156] eta: 0:00:00 lr: 0.007236 min_lr: 0.007236 loss: 3.8299 (3.5788) weight_decay: 0.0500 (0.0500) time: 0.5811 data: 0.0001 max mem: 55573 Epoch: [75] Total time: 0:01:46 (0.6844 s / it) Averaged stats: lr: 0.007236 min_lr: 0.007236 loss: 3.8299 (3.6207) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.2819 (1.2819) acc1: 75.2604 (75.2604) acc5: 93.0990 (93.0990) time: 6.7442 data: 6.5067 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4129 (1.3855) acc1: 71.7448 (70.0480) acc5: 91.6667 (90.1440) time: 1.4943 data: 1.3015 max mem: 55573 Test: Total time: 0:00:07 (1.5123 s / it) * Acc@1 70.660 Acc@5 90.268 loss 1.390 Accuracy of the model on the 50000 test images: 70.7% Max accuracy: 70.66% Test: [0/5] eta: 0:00:33 loss: 7.2599 (7.2599) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.7269 data: 6.4908 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5158 (7.5049) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5760) time: 1.4909 data: 1.2983 max mem: 55573 Test: Total time: 0:00:07 (1.5085 s / it) * Acc@1 0.114 Acc@5 0.546 loss 7.415 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [76] [ 0/156] eta: 0:37:33 lr: 0.007236 min_lr: 0.007236 loss: 2.8566 (2.8566) weight_decay: 0.0500 (0.0500) time: 14.4473 data: 8.4237 max mem: 55573 Epoch: [76] [ 10/156] eta: 0:04:32 lr: 0.007234 min_lr: 0.007234 loss: 3.4879 (3.3928) weight_decay: 0.0500 (0.0500) time: 1.8685 data: 0.7662 max mem: 55573 Epoch: [76] [ 20/156] eta: 0:02:51 lr: 0.007233 min_lr: 0.007233 loss: 3.6725 (3.5007) weight_decay: 0.0500 (0.0500) time: 0.6001 data: 0.0004 max mem: 55573 Epoch: [76] [ 30/156] eta: 0:02:11 lr: 0.007231 min_lr: 0.007231 loss: 3.7497 (3.4912) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [76] [ 40/156] eta: 0:01:48 lr: 0.007229 min_lr: 0.007229 loss: 3.8281 (3.5341) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [76] [ 50/156] eta: 0:01:31 lr: 0.007228 min_lr: 0.007228 loss: 3.7870 (3.5726) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [76] [ 60/156] eta: 0:01:18 lr: 0.007226 min_lr: 0.007226 loss: 3.7561 (3.5729) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [76] [ 70/156] eta: 0:01:07 lr: 0.007224 min_lr: 0.007224 loss: 3.7091 (3.5945) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [76] [ 80/156] eta: 0:00:58 lr: 0.007223 min_lr: 0.007223 loss: 3.4858 (3.5650) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [76] [ 90/156] eta: 0:00:49 lr: 0.007221 min_lr: 0.007221 loss: 3.7231 (3.5790) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [76] [100/156] eta: 0:00:40 lr: 0.007219 min_lr: 0.007219 loss: 3.7884 (3.5766) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [76] [110/156] eta: 0:00:33 lr: 0.007217 min_lr: 0.007217 loss: 3.8052 (3.5894) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [76] [120/156] eta: 0:00:25 lr: 0.007216 min_lr: 0.007216 loss: 3.8052 (3.5956) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [76] [130/156] eta: 0:00:18 lr: 0.007214 min_lr: 0.007214 loss: 3.5218 (3.5821) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0010 max mem: 55573 Epoch: [76] [140/156] eta: 0:00:11 lr: 0.007212 min_lr: 0.007212 loss: 3.5218 (3.5830) weight_decay: 0.0500 (0.0500) time: 0.5879 data: 0.0008 max mem: 55573 Epoch: [76] [150/156] eta: 0:00:04 lr: 0.007211 min_lr: 0.007211 loss: 3.8880 (3.5962) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0001 max mem: 55573 Epoch: [76] [155/156] eta: 0:00:00 lr: 0.007210 min_lr: 0.007210 loss: 3.8493 (3.5976) weight_decay: 0.0500 (0.0500) time: 0.5848 data: 0.0001 max mem: 55573 Epoch: [76] Total time: 0:01:47 (0.6864 s / it) Averaged stats: lr: 0.007210 min_lr: 0.007210 loss: 3.8493 (3.6160) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2860 (1.2860) acc1: 73.6979 (73.6979) acc5: 93.7500 (93.7500) time: 7.0630 data: 6.8256 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3774 (1.4006) acc1: 72.9167 (69.7920) acc5: 90.5660 (90.7520) time: 1.5590 data: 1.3652 max mem: 55573 Test: Total time: 0:00:08 (1.6009 s / it) * Acc@1 70.708 Acc@5 90.448 loss 1.380 Accuracy of the model on the 50000 test images: 70.7% Max accuracy: 70.71% Test: [0/5] eta: 0:00:34 loss: 7.2767 (7.2767) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.8262 data: 6.5897 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5326 (7.5257) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5760) time: 1.5107 data: 1.3180 max mem: 55573 Test: Total time: 0:00:07 (1.5281 s / it) * Acc@1 0.106 Acc@5 0.556 loss 7.438 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [77] [ 0/156] eta: 0:37:05 lr: 0.007210 min_lr: 0.007210 loss: 3.3552 (3.3552) weight_decay: 0.0500 (0.0500) time: 14.2688 data: 13.6136 max mem: 55573 Epoch: [77] [ 10/156] eta: 0:04:29 lr: 0.007208 min_lr: 0.007208 loss: 3.4367 (3.4936) weight_decay: 0.0500 (0.0500) time: 1.8425 data: 1.2380 max mem: 55573 Epoch: [77] [ 20/156] eta: 0:02:49 lr: 0.007206 min_lr: 0.007206 loss: 3.6484 (3.5472) weight_decay: 0.0500 (0.0500) time: 0.5945 data: 0.0004 max mem: 55573 Epoch: [77] [ 30/156] eta: 0:02:10 lr: 0.007204 min_lr: 0.007204 loss: 3.7936 (3.6539) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [77] [ 40/156] eta: 0:01:47 lr: 0.007203 min_lr: 0.007203 loss: 3.8917 (3.6245) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [77] [ 50/156] eta: 0:01:31 lr: 0.007201 min_lr: 0.007201 loss: 3.6951 (3.6533) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0004 max mem: 55573 Epoch: [77] [ 60/156] eta: 0:01:18 lr: 0.007199 min_lr: 0.007199 loss: 3.7767 (3.6663) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [77] [ 70/156] eta: 0:01:07 lr: 0.007198 min_lr: 0.007198 loss: 3.7298 (3.6443) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [77] [ 80/156] eta: 0:00:57 lr: 0.007196 min_lr: 0.007196 loss: 3.4840 (3.6240) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [77] [ 90/156] eta: 0:00:48 lr: 0.007194 min_lr: 0.007194 loss: 3.3895 (3.5856) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [77] [100/156] eta: 0:00:40 lr: 0.007192 min_lr: 0.007192 loss: 3.4160 (3.5816) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [77] [110/156] eta: 0:00:32 lr: 0.007191 min_lr: 0.007191 loss: 3.5165 (3.5800) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [77] [120/156] eta: 0:00:25 lr: 0.007189 min_lr: 0.007189 loss: 3.5165 (3.5741) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [77] [130/156] eta: 0:00:18 lr: 0.007187 min_lr: 0.007187 loss: 3.4085 (3.5577) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0012 max mem: 55573 Epoch: [77] [140/156] eta: 0:00:11 lr: 0.007185 min_lr: 0.007185 loss: 3.3815 (3.5594) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0010 max mem: 55573 Epoch: [77] [150/156] eta: 0:00:04 lr: 0.007184 min_lr: 0.007184 loss: 3.6415 (3.5638) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [77] [155/156] eta: 0:00:00 lr: 0.007183 min_lr: 0.007183 loss: 3.7290 (3.5637) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [77] Total time: 0:01:46 (0.6844 s / it) Averaged stats: lr: 0.007183 min_lr: 0.007183 loss: 3.7290 (3.5776) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2571 (1.2571) acc1: 75.6510 (75.6510) acc5: 93.6198 (93.6198) time: 7.1803 data: 6.9424 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3528 (1.3939) acc1: 71.2240 (69.3440) acc5: 91.7969 (89.9200) time: 1.5825 data: 1.3886 max mem: 55573 Test: Total time: 0:00:08 (1.6222 s / it) * Acc@1 70.202 Acc@5 90.226 loss 1.372 Accuracy of the model on the 50000 test images: 70.2% Max accuracy: 70.71% Test: [0/5] eta: 0:00:34 loss: 7.2941 (7.2941) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.8884 data: 6.6524 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5474 (7.5449) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5760) time: 1.5231 data: 1.3306 max mem: 55573 Test: Total time: 0:00:07 (1.5476 s / it) * Acc@1 0.106 Acc@5 0.546 loss 7.459 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [78] [ 0/156] eta: 0:35:24 lr: 0.007183 min_lr: 0.007183 loss: 3.9012 (3.9012) weight_decay: 0.0500 (0.0500) time: 13.6159 data: 10.2409 max mem: 55573 Epoch: [78] [ 10/156] eta: 0:04:20 lr: 0.007181 min_lr: 0.007181 loss: 3.2908 (3.4612) weight_decay: 0.0500 (0.0500) time: 1.7851 data: 0.9314 max mem: 55573 Epoch: [78] [ 20/156] eta: 0:02:45 lr: 0.007179 min_lr: 0.007179 loss: 3.2866 (3.3928) weight_decay: 0.0500 (0.0500) time: 0.5947 data: 0.0004 max mem: 55573 Epoch: [78] [ 30/156] eta: 0:02:07 lr: 0.007177 min_lr: 0.007177 loss: 3.2866 (3.4010) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [78] [ 40/156] eta: 0:01:45 lr: 0.007176 min_lr: 0.007176 loss: 3.3999 (3.4047) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [78] [ 50/156] eta: 0:01:29 lr: 0.007174 min_lr: 0.007174 loss: 3.6096 (3.4392) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [78] [ 60/156] eta: 0:01:17 lr: 0.007172 min_lr: 0.007172 loss: 3.4942 (3.4568) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [78] [ 70/156] eta: 0:01:06 lr: 0.007170 min_lr: 0.007170 loss: 3.6304 (3.4810) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [78] [ 80/156] eta: 0:00:57 lr: 0.007169 min_lr: 0.007169 loss: 3.7949 (3.4958) weight_decay: 0.0500 (0.0500) time: 0.5883 data: 0.0004 max mem: 55573 Epoch: [78] [ 90/156] eta: 0:00:48 lr: 0.007167 min_lr: 0.007167 loss: 3.6249 (3.5184) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [78] [100/156] eta: 0:00:40 lr: 0.007165 min_lr: 0.007165 loss: 3.8634 (3.5410) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [78] [110/156] eta: 0:00:32 lr: 0.007163 min_lr: 0.007163 loss: 3.5090 (3.5220) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [78] [120/156] eta: 0:00:25 lr: 0.007162 min_lr: 0.007162 loss: 3.5090 (3.5466) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [78] [130/156] eta: 0:00:17 lr: 0.007160 min_lr: 0.007160 loss: 3.7808 (3.5575) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0010 max mem: 55573 Epoch: [78] [140/156] eta: 0:00:10 lr: 0.007158 min_lr: 0.007158 loss: 3.6996 (3.5641) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0008 max mem: 55573 Epoch: [78] [150/156] eta: 0:00:04 lr: 0.007156 min_lr: 0.007156 loss: 3.6847 (3.5683) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [78] [155/156] eta: 0:00:00 lr: 0.007155 min_lr: 0.007155 loss: 3.6847 (3.5779) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [78] Total time: 0:01:46 (0.6799 s / it) Averaged stats: lr: 0.007155 min_lr: 0.007155 loss: 3.6847 (3.6017) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:37 loss: 1.2768 (1.2768) acc1: 73.9583 (73.9583) acc5: 92.4479 (92.4479) time: 7.4706 data: 7.2325 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3649 (1.3977) acc1: 71.6146 (69.4080) acc5: 90.8854 (88.8960) time: 1.6408 data: 1.4466 max mem: 55573 Test: Total time: 0:00:08 (1.6847 s / it) * Acc@1 69.934 Acc@5 89.812 loss 1.390 Accuracy of the model on the 50000 test images: 69.9% Max accuracy: 70.71% Test: [0/5] eta: 0:00:34 loss: 7.3109 (7.3109) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.8926 data: 6.6561 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5596 (7.5619) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5760) time: 1.5636 data: 1.3709 max mem: 55573 Test: Total time: 0:00:07 (1.5805 s / it) * Acc@1 0.106 Acc@5 0.540 loss 7.479 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [79] [ 0/156] eta: 0:33:49 lr: 0.007155 min_lr: 0.007155 loss: 3.8152 (3.8152) weight_decay: 0.0500 (0.0500) time: 13.0109 data: 8.5316 max mem: 55573 Epoch: [79] [ 10/156] eta: 0:04:14 lr: 0.007153 min_lr: 0.007153 loss: 3.7850 (3.3369) weight_decay: 0.0500 (0.0500) time: 1.7462 data: 0.7761 max mem: 55573 Epoch: [79] [ 20/156] eta: 0:02:42 lr: 0.007152 min_lr: 0.007152 loss: 3.8502 (3.5834) weight_decay: 0.0500 (0.0500) time: 0.6049 data: 0.0005 max mem: 55573 Epoch: [79] [ 30/156] eta: 0:02:06 lr: 0.007150 min_lr: 0.007150 loss: 3.6924 (3.5269) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [79] [ 40/156] eta: 0:01:44 lr: 0.007148 min_lr: 0.007148 loss: 3.3924 (3.4867) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [79] [ 50/156] eta: 0:01:28 lr: 0.007146 min_lr: 0.007146 loss: 3.2189 (3.4399) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [79] [ 60/156] eta: 0:01:16 lr: 0.007145 min_lr: 0.007145 loss: 3.5730 (3.4738) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0005 max mem: 55573 Epoch: [79] [ 70/156] eta: 0:01:06 lr: 0.007143 min_lr: 0.007143 loss: 3.7176 (3.4730) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.0004 max mem: 55573 Epoch: [79] [ 80/156] eta: 0:00:56 lr: 0.007141 min_lr: 0.007141 loss: 3.5448 (3.4746) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [79] [ 90/156] eta: 0:00:48 lr: 0.007139 min_lr: 0.007139 loss: 3.6530 (3.4950) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [79] [100/156] eta: 0:00:40 lr: 0.007137 min_lr: 0.007137 loss: 3.6530 (3.4901) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [79] [110/156] eta: 0:00:32 lr: 0.007136 min_lr: 0.007136 loss: 3.2663 (3.4809) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [79] [120/156] eta: 0:00:25 lr: 0.007134 min_lr: 0.007134 loss: 3.4419 (3.4769) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [79] [130/156] eta: 0:00:17 lr: 0.007132 min_lr: 0.007132 loss: 3.4998 (3.4692) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [79] [140/156] eta: 0:00:10 lr: 0.007130 min_lr: 0.007130 loss: 3.8141 (3.4926) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0009 max mem: 55573 Epoch: [79] [150/156] eta: 0:00:04 lr: 0.007129 min_lr: 0.007129 loss: 3.7091 (3.4979) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [79] [155/156] eta: 0:00:00 lr: 0.007128 min_lr: 0.007128 loss: 3.6486 (3.5034) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [79] Total time: 0:01:45 (0.6775 s / it) Averaged stats: lr: 0.007128 min_lr: 0.007128 loss: 3.6486 (3.5952) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3429 (1.3429) acc1: 75.2604 (75.2604) acc5: 92.9688 (92.9688) time: 7.0685 data: 6.8312 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4741 (1.5447) acc1: 72.7865 (69.0240) acc5: 92.3177 (89.4400) time: 1.5600 data: 1.3663 max mem: 55573 Test: Total time: 0:00:08 (1.6088 s / it) * Acc@1 69.834 Acc@5 89.776 loss 1.563 Accuracy of the model on the 50000 test images: 69.8% Max accuracy: 70.71% Test: [0/5] eta: 0:00:35 loss: 7.3227 (7.3227) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.1454 data: 6.9087 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5677 (7.5742) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5760) time: 1.5746 data: 1.3818 max mem: 55573 Test: Total time: 0:00:07 (1.5977 s / it) * Acc@1 0.110 Acc@5 0.548 loss 7.494 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [80] [ 0/156] eta: 0:35:06 lr: 0.007127 min_lr: 0.007127 loss: 3.3273 (3.3273) weight_decay: 0.0500 (0.0500) time: 13.5045 data: 12.0208 max mem: 55573 Epoch: [80] [ 10/156] eta: 0:04:19 lr: 0.007126 min_lr: 0.007126 loss: 3.5671 (3.4123) weight_decay: 0.0500 (0.0500) time: 1.7786 data: 1.0932 max mem: 55573 Epoch: [80] [ 20/156] eta: 0:02:44 lr: 0.007124 min_lr: 0.007124 loss: 3.5671 (3.5006) weight_decay: 0.0500 (0.0500) time: 0.5976 data: 0.0004 max mem: 55573 Epoch: [80] [ 30/156] eta: 0:02:07 lr: 0.007122 min_lr: 0.007122 loss: 3.7568 (3.5738) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [80] [ 40/156] eta: 0:01:45 lr: 0.007120 min_lr: 0.007120 loss: 3.7568 (3.6239) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [80] [ 50/156] eta: 0:01:29 lr: 0.007118 min_lr: 0.007118 loss: 3.7337 (3.6306) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [80] [ 60/156] eta: 0:01:17 lr: 0.007117 min_lr: 0.007117 loss: 3.6221 (3.6014) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [80] [ 70/156] eta: 0:01:06 lr: 0.007115 min_lr: 0.007115 loss: 3.5315 (3.6011) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [80] [ 80/156] eta: 0:00:57 lr: 0.007113 min_lr: 0.007113 loss: 3.6112 (3.5872) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [80] [ 90/156] eta: 0:00:48 lr: 0.007111 min_lr: 0.007111 loss: 3.5790 (3.5726) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [80] [100/156] eta: 0:00:40 lr: 0.007109 min_lr: 0.007109 loss: 3.6918 (3.5952) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [80] [110/156] eta: 0:00:32 lr: 0.007108 min_lr: 0.007108 loss: 3.9052 (3.6124) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [80] [120/156] eta: 0:00:25 lr: 0.007106 min_lr: 0.007106 loss: 3.6389 (3.6039) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [80] [130/156] eta: 0:00:17 lr: 0.007104 min_lr: 0.007104 loss: 3.4302 (3.5941) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [80] [140/156] eta: 0:00:10 lr: 0.007102 min_lr: 0.007102 loss: 3.8058 (3.5969) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0009 max mem: 55573 Epoch: [80] [150/156] eta: 0:00:04 lr: 0.007100 min_lr: 0.007100 loss: 3.8058 (3.5993) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [80] [155/156] eta: 0:00:00 lr: 0.007099 min_lr: 0.007099 loss: 3.7216 (3.5952) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [80] Total time: 0:01:45 (0.6792 s / it) Averaged stats: lr: 0.007099 min_lr: 0.007099 loss: 3.7216 (3.5985) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2343 (1.2343) acc1: 73.5677 (73.5677) acc5: 92.5781 (92.5781) time: 7.1086 data: 6.8711 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3173 (1.4009) acc1: 73.5677 (69.3120) acc5: 92.5781 (89.7920) time: 1.5681 data: 1.3743 max mem: 55573 Test: Total time: 0:00:08 (1.6098 s / it) * Acc@1 70.106 Acc@5 89.996 loss 1.387 Accuracy of the model on the 50000 test images: 70.1% Max accuracy: 70.71% Test: [0/5] eta: 0:00:36 loss: 7.3325 (7.3325) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.3303 data: 7.0939 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5746 (7.5852) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5760) time: 1.6115 data: 1.4189 max mem: 55573 Test: Total time: 0:00:08 (1.6438 s / it) * Acc@1 0.112 Acc@5 0.554 loss 7.508 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [81] [ 0/156] eta: 0:39:25 lr: 0.007099 min_lr: 0.007099 loss: 2.8104 (2.8104) weight_decay: 0.0500 (0.0500) time: 15.1660 data: 7.8838 max mem: 55573 Epoch: [81] [ 10/156] eta: 0:04:40 lr: 0.007097 min_lr: 0.007097 loss: 3.8179 (3.5600) weight_decay: 0.0500 (0.0500) time: 1.9183 data: 0.7172 max mem: 55573 Epoch: [81] [ 20/156] eta: 0:02:54 lr: 0.007096 min_lr: 0.007096 loss: 3.8179 (3.6088) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [81] [ 30/156] eta: 0:02:13 lr: 0.007094 min_lr: 0.007094 loss: 3.6875 (3.6065) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [81] [ 40/156] eta: 0:01:49 lr: 0.007092 min_lr: 0.007092 loss: 3.7121 (3.6479) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [81] [ 50/156] eta: 0:01:32 lr: 0.007090 min_lr: 0.007090 loss: 3.7782 (3.6497) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [81] [ 60/156] eta: 0:01:19 lr: 0.007088 min_lr: 0.007088 loss: 3.7880 (3.6488) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [81] [ 70/156] eta: 0:01:08 lr: 0.007086 min_lr: 0.007086 loss: 3.8081 (3.6526) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [81] [ 80/156] eta: 0:00:58 lr: 0.007085 min_lr: 0.007085 loss: 3.7272 (3.6563) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [81] [ 90/156] eta: 0:00:49 lr: 0.007083 min_lr: 0.007083 loss: 3.8729 (3.6679) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [81] [100/156] eta: 0:00:41 lr: 0.007081 min_lr: 0.007081 loss: 3.8519 (3.6537) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [81] [110/156] eta: 0:00:33 lr: 0.007079 min_lr: 0.007079 loss: 3.4186 (3.6345) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [81] [120/156] eta: 0:00:25 lr: 0.007077 min_lr: 0.007077 loss: 3.6972 (3.6351) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [81] [130/156] eta: 0:00:18 lr: 0.007075 min_lr: 0.007075 loss: 3.7662 (3.6429) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0026 max mem: 55573 Epoch: [81] [140/156] eta: 0:00:11 lr: 0.007074 min_lr: 0.007074 loss: 3.8053 (3.6346) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0025 max mem: 55573 Epoch: [81] [150/156] eta: 0:00:04 lr: 0.007072 min_lr: 0.007072 loss: 3.3396 (3.6231) weight_decay: 0.0500 (0.0500) time: 0.5831 data: 0.0001 max mem: 55573 Epoch: [81] [155/156] eta: 0:00:00 lr: 0.007071 min_lr: 0.007071 loss: 3.3396 (3.6184) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [81] Total time: 0:01:47 (0.6899 s / it) Averaged stats: lr: 0.007071 min_lr: 0.007071 loss: 3.3396 (3.5925) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.2966 (1.2966) acc1: 76.3021 (76.3021) acc5: 93.3594 (93.3594) time: 6.9791 data: 6.7422 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3943 (1.4441) acc1: 71.3542 (69.4720) acc5: 92.1875 (90.2080) time: 1.5422 data: 1.3485 max mem: 55573 Test: Total time: 0:00:07 (1.5838 s / it) * Acc@1 70.110 Acc@5 90.102 loss 1.435 Accuracy of the model on the 50000 test images: 70.1% Max accuracy: 70.71% Test: [0/5] eta: 0:00:35 loss: 7.3442 (7.3442) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.1132 data: 6.8773 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5828 (7.5981) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5760) time: 1.5679 data: 1.3755 max mem: 55573 Test: Total time: 0:00:07 (1.5864 s / it) * Acc@1 0.106 Acc@5 0.558 loss 7.524 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [82] [ 0/156] eta: 0:39:56 lr: 0.007071 min_lr: 0.007071 loss: 2.9143 (2.9143) weight_decay: 0.0500 (0.0500) time: 15.3601 data: 12.1954 max mem: 55573 Epoch: [82] [ 10/156] eta: 0:04:41 lr: 0.007069 min_lr: 0.007069 loss: 3.0157 (3.3353) weight_decay: 0.0500 (0.0500) time: 1.9307 data: 1.1090 max mem: 55573 Epoch: [82] [ 20/156] eta: 0:02:55 lr: 0.007067 min_lr: 0.007067 loss: 3.6908 (3.5114) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [82] [ 30/156] eta: 0:02:14 lr: 0.007065 min_lr: 0.007065 loss: 3.8300 (3.5313) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [82] [ 40/156] eta: 0:01:50 lr: 0.007063 min_lr: 0.007063 loss: 3.7589 (3.5957) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [82] [ 50/156] eta: 0:01:33 lr: 0.007061 min_lr: 0.007061 loss: 3.7031 (3.5564) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [82] [ 60/156] eta: 0:01:19 lr: 0.007060 min_lr: 0.007060 loss: 3.6339 (3.5997) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [82] [ 70/156] eta: 0:01:08 lr: 0.007058 min_lr: 0.007058 loss: 3.4668 (3.5536) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [82] [ 80/156] eta: 0:00:58 lr: 0.007056 min_lr: 0.007056 loss: 3.5331 (3.5845) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [82] [ 90/156] eta: 0:00:49 lr: 0.007054 min_lr: 0.007054 loss: 3.7231 (3.5876) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [82] [100/156] eta: 0:00:41 lr: 0.007052 min_lr: 0.007052 loss: 3.8312 (3.5913) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [82] [110/156] eta: 0:00:33 lr: 0.007050 min_lr: 0.007050 loss: 3.7936 (3.6040) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [82] [120/156] eta: 0:00:25 lr: 0.007048 min_lr: 0.007048 loss: 3.7936 (3.6148) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [82] [130/156] eta: 0:00:18 lr: 0.007047 min_lr: 0.007047 loss: 3.8286 (3.6306) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0010 max mem: 55573 Epoch: [82] [140/156] eta: 0:00:11 lr: 0.007045 min_lr: 0.007045 loss: 3.5965 (3.6115) weight_decay: 0.0500 (0.0500) time: 0.5867 data: 0.0009 max mem: 55573 Epoch: [82] [150/156] eta: 0:00:04 lr: 0.007043 min_lr: 0.007043 loss: 3.4767 (3.6161) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [82] [155/156] eta: 0:00:00 lr: 0.007042 min_lr: 0.007042 loss: 3.5698 (3.6154) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [82] Total time: 0:01:47 (0.6904 s / it) Averaged stats: lr: 0.007042 min_lr: 0.007042 loss: 3.5698 (3.5701) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.3095 (1.3095) acc1: 75.6510 (75.6510) acc5: 93.3594 (93.3594) time: 6.8435 data: 6.6062 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3980 (1.4860) acc1: 72.9167 (69.6640) acc5: 90.5660 (89.3440) time: 1.5140 data: 1.3213 max mem: 55573 Test: Total time: 0:00:07 (1.5565 s / it) * Acc@1 70.558 Acc@5 90.048 loss 1.475 Accuracy of the model on the 50000 test images: 70.6% Max accuracy: 70.71% Test: [0/5] eta: 0:00:37 loss: 7.3551 (7.3551) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.4588 data: 7.2223 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5864 (7.6086) acc1: 0.0000 (0.1280) acc5: 0.0000 (0.5120) time: 1.6374 data: 1.4446 max mem: 55573 Test: Total time: 0:00:08 (1.6543 s / it) * Acc@1 0.108 Acc@5 0.560 loss 7.538 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [83] [ 0/156] eta: 0:35:04 lr: 0.007042 min_lr: 0.007042 loss: 2.4782 (2.4782) weight_decay: 0.0500 (0.0500) time: 13.4921 data: 10.3025 max mem: 55573 Epoch: [83] [ 10/156] eta: 0:04:21 lr: 0.007040 min_lr: 0.007040 loss: 3.6784 (3.5960) weight_decay: 0.0500 (0.0500) time: 1.7884 data: 0.9371 max mem: 55573 Epoch: [83] [ 20/156] eta: 0:02:45 lr: 0.007038 min_lr: 0.007038 loss: 3.7208 (3.6701) weight_decay: 0.0500 (0.0500) time: 0.6041 data: 0.0005 max mem: 55573 Epoch: [83] [ 30/156] eta: 0:02:07 lr: 0.007036 min_lr: 0.007036 loss: 3.7342 (3.6105) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [83] [ 40/156] eta: 0:01:45 lr: 0.007034 min_lr: 0.007034 loss: 3.3953 (3.5200) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [83] [ 50/156] eta: 0:01:29 lr: 0.007032 min_lr: 0.007032 loss: 3.3452 (3.5051) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [83] [ 60/156] eta: 0:01:17 lr: 0.007031 min_lr: 0.007031 loss: 3.6523 (3.5273) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [83] [ 70/156] eta: 0:01:06 lr: 0.007029 min_lr: 0.007029 loss: 3.8700 (3.5759) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [83] [ 80/156] eta: 0:00:57 lr: 0.007027 min_lr: 0.007027 loss: 3.8541 (3.6059) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [83] [ 90/156] eta: 0:00:48 lr: 0.007025 min_lr: 0.007025 loss: 3.7959 (3.6003) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [83] [100/156] eta: 0:00:40 lr: 0.007023 min_lr: 0.007023 loss: 3.8262 (3.6193) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [83] [110/156] eta: 0:00:32 lr: 0.007021 min_lr: 0.007021 loss: 3.8033 (3.6150) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [83] [120/156] eta: 0:00:25 lr: 0.007019 min_lr: 0.007019 loss: 3.5518 (3.5993) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [83] [130/156] eta: 0:00:17 lr: 0.007017 min_lr: 0.007017 loss: 3.3693 (3.5752) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0010 max mem: 55573 Epoch: [83] [140/156] eta: 0:00:10 lr: 0.007015 min_lr: 0.007015 loss: 3.7645 (3.5842) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0009 max mem: 55573 Epoch: [83] [150/156] eta: 0:00:04 lr: 0.007014 min_lr: 0.007014 loss: 3.7934 (3.5840) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [83] [155/156] eta: 0:00:00 lr: 0.007013 min_lr: 0.007013 loss: 3.8201 (3.6029) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [83] Total time: 0:01:46 (0.6797 s / it) Averaged stats: lr: 0.007013 min_lr: 0.007013 loss: 3.8201 (3.5816) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3522 (1.3522) acc1: 75.5208 (75.5208) acc5: 93.3594 (93.3594) time: 7.0476 data: 6.8102 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5040 (1.5640) acc1: 69.7917 (68.7360) acc5: 91.1458 (89.3120) time: 1.5559 data: 1.3621 max mem: 55573 Test: Total time: 0:00:08 (1.6007 s / it) * Acc@1 70.186 Acc@5 89.770 loss 1.548 Accuracy of the model on the 50000 test images: 70.2% Max accuracy: 70.71% Test: [0/5] eta: 0:00:36 loss: 7.3696 (7.3696) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2607 data: 7.0247 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5918 (7.6214) acc1: 0.0000 (0.1280) acc5: 0.0000 (0.5120) time: 1.5975 data: 1.4050 max mem: 55573 Test: Total time: 0:00:08 (1.6160 s / it) * Acc@1 0.112 Acc@5 0.554 loss 7.555 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [84] [ 0/156] eta: 0:35:36 lr: 0.007012 min_lr: 0.007012 loss: 4.2180 (4.2180) weight_decay: 0.0500 (0.0500) time: 13.6953 data: 9.7590 max mem: 55573 Epoch: [84] [ 10/156] eta: 0:04:21 lr: 0.007011 min_lr: 0.007011 loss: 3.4610 (3.4506) weight_decay: 0.0500 (0.0500) time: 1.7890 data: 0.8876 max mem: 55573 Epoch: [84] [ 20/156] eta: 0:02:45 lr: 0.007009 min_lr: 0.007009 loss: 3.4610 (3.5247) weight_decay: 0.0500 (0.0500) time: 0.5944 data: 0.0004 max mem: 55573 Epoch: [84] [ 30/156] eta: 0:02:07 lr: 0.007007 min_lr: 0.007007 loss: 3.6279 (3.5073) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [84] [ 40/156] eta: 0:01:45 lr: 0.007005 min_lr: 0.007005 loss: 3.6994 (3.5386) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [84] [ 50/156] eta: 0:01:29 lr: 0.007003 min_lr: 0.007003 loss: 3.8320 (3.6145) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [84] [ 60/156] eta: 0:01:17 lr: 0.007001 min_lr: 0.007001 loss: 3.6832 (3.5677) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [84] [ 70/156] eta: 0:01:06 lr: 0.006999 min_lr: 0.006999 loss: 3.4123 (3.5602) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [84] [ 80/156] eta: 0:00:57 lr: 0.006997 min_lr: 0.006997 loss: 3.7485 (3.5840) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [84] [ 90/156] eta: 0:00:48 lr: 0.006995 min_lr: 0.006995 loss: 3.5918 (3.5630) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [84] [100/156] eta: 0:00:40 lr: 0.006993 min_lr: 0.006993 loss: 3.4900 (3.5590) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [84] [110/156] eta: 0:00:32 lr: 0.006991 min_lr: 0.006991 loss: 3.7950 (3.6001) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [84] [120/156] eta: 0:00:25 lr: 0.006990 min_lr: 0.006990 loss: 3.7787 (3.6057) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [84] [130/156] eta: 0:00:17 lr: 0.006988 min_lr: 0.006988 loss: 3.6092 (3.6053) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0010 max mem: 55573 Epoch: [84] [140/156] eta: 0:00:10 lr: 0.006986 min_lr: 0.006986 loss: 3.3904 (3.5785) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0009 max mem: 55573 Epoch: [84] [150/156] eta: 0:00:04 lr: 0.006984 min_lr: 0.006984 loss: 3.4917 (3.5805) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [84] [155/156] eta: 0:00:00 lr: 0.006983 min_lr: 0.006983 loss: 3.4917 (3.5750) weight_decay: 0.0500 (0.0500) time: 0.5831 data: 0.0001 max mem: 55573 Epoch: [84] Total time: 0:01:46 (0.6808 s / it) Averaged stats: lr: 0.006983 min_lr: 0.006983 loss: 3.4917 (3.5881) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2621 (1.2621) acc1: 72.7865 (72.7865) acc5: 92.8385 (92.8385) time: 7.0437 data: 6.8033 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2990 (1.3808) acc1: 70.5729 (68.2560) acc5: 90.5660 (89.4720) time: 1.5551 data: 1.3608 max mem: 55573 Test: Total time: 0:00:07 (1.5889 s / it) * Acc@1 69.326 Acc@5 89.684 loss 1.347 Accuracy of the model on the 50000 test images: 69.3% Max accuracy: 70.71% Test: [0/5] eta: 0:00:37 loss: 7.3839 (7.3839) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.4826 data: 7.2465 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5968 (7.6310) acc1: 0.0000 (0.0960) acc5: 0.0000 (0.5120) time: 1.6419 data: 1.4494 max mem: 55573 Test: Total time: 0:00:08 (1.6638 s / it) * Acc@1 0.120 Acc@5 0.552 loss 7.567 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.12% Epoch: [85] [ 0/156] eta: 0:35:22 lr: 0.006983 min_lr: 0.006983 loss: 3.4805 (3.4805) weight_decay: 0.0500 (0.0500) time: 13.6059 data: 13.0191 max mem: 55573 Epoch: [85] [ 10/156] eta: 0:04:19 lr: 0.006981 min_lr: 0.006981 loss: 3.3862 (3.4345) weight_decay: 0.0500 (0.0500) time: 1.7799 data: 1.1841 max mem: 55573 Epoch: [85] [ 20/156] eta: 0:02:45 lr: 0.006979 min_lr: 0.006979 loss: 3.3989 (3.4581) weight_decay: 0.0500 (0.0500) time: 0.5944 data: 0.0006 max mem: 55573 Epoch: [85] [ 30/156] eta: 0:02:07 lr: 0.006977 min_lr: 0.006977 loss: 3.6327 (3.5587) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0007 max mem: 55573 Epoch: [85] [ 40/156] eta: 0:01:45 lr: 0.006975 min_lr: 0.006975 loss: 3.6717 (3.5392) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0007 max mem: 55573 Epoch: [85] [ 50/156] eta: 0:01:29 lr: 0.006973 min_lr: 0.006973 loss: 3.4184 (3.4487) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0007 max mem: 55573 Epoch: [85] [ 60/156] eta: 0:01:17 lr: 0.006971 min_lr: 0.006971 loss: 3.1355 (3.4553) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [85] [ 70/156] eta: 0:01:06 lr: 0.006969 min_lr: 0.006969 loss: 3.6218 (3.4762) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0006 max mem: 55573 Epoch: [85] [ 80/156] eta: 0:00:57 lr: 0.006967 min_lr: 0.006967 loss: 3.6218 (3.4882) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0006 max mem: 55573 Epoch: [85] [ 90/156] eta: 0:00:48 lr: 0.006965 min_lr: 0.006965 loss: 3.6279 (3.5031) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [85] [100/156] eta: 0:00:40 lr: 0.006963 min_lr: 0.006963 loss: 3.6279 (3.5078) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0006 max mem: 55573 Epoch: [85] [110/156] eta: 0:00:32 lr: 0.006962 min_lr: 0.006962 loss: 3.8645 (3.5342) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0007 max mem: 55573 Epoch: [85] [120/156] eta: 0:00:25 lr: 0.006960 min_lr: 0.006960 loss: 3.8981 (3.5638) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0006 max mem: 55573 Epoch: [85] [130/156] eta: 0:00:17 lr: 0.006958 min_lr: 0.006958 loss: 3.6927 (3.5602) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0012 max mem: 55573 Epoch: [85] [140/156] eta: 0:00:10 lr: 0.006956 min_lr: 0.006956 loss: 3.2311 (3.5311) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0010 max mem: 55573 Epoch: [85] [150/156] eta: 0:00:04 lr: 0.006954 min_lr: 0.006954 loss: 2.9387 (3.5027) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0002 max mem: 55573 Epoch: [85] [155/156] eta: 0:00:00 lr: 0.006953 min_lr: 0.006953 loss: 3.2304 (3.5141) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0002 max mem: 55573 Epoch: [85] Total time: 0:01:45 (0.6793 s / it) Averaged stats: lr: 0.006953 min_lr: 0.006953 loss: 3.2304 (3.5832) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.3300 (1.3300) acc1: 74.3490 (74.3490) acc5: 91.9271 (91.9271) time: 6.9800 data: 6.7419 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3694 (1.4101) acc1: 72.1354 (70.2720) acc5: 91.9271 (89.9840) time: 1.5427 data: 1.3485 max mem: 55573 Test: Total time: 0:00:07 (1.5863 s / it) * Acc@1 70.622 Acc@5 90.216 loss 1.402 Accuracy of the model on the 50000 test images: 70.6% Max accuracy: 70.71% Test: [0/5] eta: 0:00:37 loss: 7.3914 (7.3914) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.4768 data: 7.2408 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5949 (7.6333) acc1: 0.0000 (0.1280) acc5: 0.0000 (0.5120) time: 1.6407 data: 1.4483 max mem: 55573 Test: Total time: 0:00:08 (1.6608 s / it) * Acc@1 0.120 Acc@5 0.556 loss 7.572 Accuracy of the model EMA on 50000 test images: 0.1% Epoch: [86] [ 0/156] eta: 0:34:21 lr: 0.006953 min_lr: 0.006953 loss: 3.7692 (3.7692) weight_decay: 0.0500 (0.0500) time: 13.2171 data: 9.4016 max mem: 55573 Epoch: [86] [ 10/156] eta: 0:04:17 lr: 0.006951 min_lr: 0.006951 loss: 3.5025 (3.4605) weight_decay: 0.0500 (0.0500) time: 1.7634 data: 0.8554 max mem: 55573 Epoch: [86] [ 20/156] eta: 0:02:43 lr: 0.006949 min_lr: 0.006949 loss: 3.6880 (3.6719) weight_decay: 0.0500 (0.0500) time: 0.6038 data: 0.0007 max mem: 55573 Epoch: [86] [ 30/156] eta: 0:02:06 lr: 0.006947 min_lr: 0.006947 loss: 3.8077 (3.7063) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0006 max mem: 55573 Epoch: [86] [ 40/156] eta: 0:01:44 lr: 0.006945 min_lr: 0.006945 loss: 3.5712 (3.6630) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0007 max mem: 55573 Epoch: [86] [ 50/156] eta: 0:01:29 lr: 0.006943 min_lr: 0.006943 loss: 3.8027 (3.6763) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0007 max mem: 55573 Epoch: [86] [ 60/156] eta: 0:01:17 lr: 0.006941 min_lr: 0.006941 loss: 3.7202 (3.6410) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0006 max mem: 55573 Epoch: [86] [ 70/156] eta: 0:01:06 lr: 0.006939 min_lr: 0.006939 loss: 3.3164 (3.6057) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0008 max mem: 55573 Epoch: [86] [ 80/156] eta: 0:00:57 lr: 0.006937 min_lr: 0.006937 loss: 3.6195 (3.6217) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0008 max mem: 55573 Epoch: [86] [ 90/156] eta: 0:00:48 lr: 0.006935 min_lr: 0.006935 loss: 3.7546 (3.6137) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0007 max mem: 55573 Epoch: [86] [100/156] eta: 0:00:40 lr: 0.006933 min_lr: 0.006933 loss: 3.6067 (3.6081) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0006 max mem: 55573 Epoch: [86] [110/156] eta: 0:00:32 lr: 0.006931 min_lr: 0.006931 loss: 3.2387 (3.5563) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0007 max mem: 55573 Epoch: [86] [120/156] eta: 0:00:25 lr: 0.006929 min_lr: 0.006929 loss: 3.4942 (3.5679) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0007 max mem: 55573 Epoch: [86] [130/156] eta: 0:00:17 lr: 0.006927 min_lr: 0.006927 loss: 3.5239 (3.5545) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0016 max mem: 55573 Epoch: [86] [140/156] eta: 0:00:10 lr: 0.006925 min_lr: 0.006925 loss: 3.5774 (3.5588) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0013 max mem: 55573 Epoch: [86] [150/156] eta: 0:00:04 lr: 0.006923 min_lr: 0.006923 loss: 3.5788 (3.5498) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0002 max mem: 55573 Epoch: [86] [155/156] eta: 0:00:00 lr: 0.006922 min_lr: 0.006922 loss: 3.4837 (3.5408) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0002 max mem: 55573 Epoch: [86] Total time: 0:01:45 (0.6787 s / it) Averaged stats: lr: 0.006922 min_lr: 0.006922 loss: 3.4837 (3.5565) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2330 (1.2330) acc1: 77.0833 (77.0833) acc5: 93.2292 (93.2292) time: 7.1223 data: 6.8850 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3083 (1.3568) acc1: 75.5208 (70.7840) acc5: 92.9688 (90.0800) time: 1.5708 data: 1.3771 max mem: 55573 Test: Total time: 0:00:08 (1.6152 s / it) * Acc@1 70.666 Acc@5 90.204 loss 1.359 Accuracy of the model on the 50000 test images: 70.7% Max accuracy: 70.71% Test: [0/5] eta: 0:00:33 loss: 7.4025 (7.4025) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.7088 data: 6.4727 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5963 (7.6394) acc1: 0.0000 (0.1280) acc5: 0.0000 (0.5120) time: 1.5031 data: 1.3105 max mem: 55573 Test: Total time: 0:00:07 (1.5193 s / it) * Acc@1 0.132 Acc@5 0.554 loss 7.581 Accuracy of the model EMA on 50000 test images: 0.1% Max EMA accuracy: 0.13% Epoch: [87] [ 0/156] eta: 0:32:37 lr: 0.006922 min_lr: 0.006922 loss: 4.1604 (4.1604) weight_decay: 0.0500 (0.0500) time: 12.5472 data: 7.6935 max mem: 55573 Epoch: [87] [ 10/156] eta: 0:04:05 lr: 0.006920 min_lr: 0.006920 loss: 3.7208 (3.6360) weight_decay: 0.0500 (0.0500) time: 1.6797 data: 0.7000 max mem: 55573 Epoch: [87] [ 20/156] eta: 0:02:37 lr: 0.006918 min_lr: 0.006918 loss: 3.5311 (3.4910) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0007 max mem: 55573 Epoch: [87] [ 30/156] eta: 0:02:03 lr: 0.006916 min_lr: 0.006916 loss: 3.3975 (3.4988) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0006 max mem: 55573 Epoch: [87] [ 40/156] eta: 0:01:42 lr: 0.006914 min_lr: 0.006914 loss: 3.5704 (3.4728) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0007 max mem: 55573 Epoch: [87] [ 50/156] eta: 0:01:27 lr: 0.006912 min_lr: 0.006912 loss: 3.6792 (3.5052) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0007 max mem: 55573 Epoch: [87] [ 60/156] eta: 0:01:15 lr: 0.006910 min_lr: 0.006910 loss: 3.8001 (3.5273) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0006 max mem: 55573 Epoch: [87] [ 70/156] eta: 0:01:05 lr: 0.006908 min_lr: 0.006908 loss: 3.3892 (3.4774) weight_decay: 0.0500 (0.0500) time: 0.5958 data: 0.0006 max mem: 55573 Epoch: [87] [ 80/156] eta: 0:00:56 lr: 0.006906 min_lr: 0.006906 loss: 3.0165 (3.4477) weight_decay: 0.0500 (0.0500) time: 0.5972 data: 0.0006 max mem: 55573 Epoch: [87] [ 90/156] eta: 0:00:47 lr: 0.006904 min_lr: 0.006904 loss: 3.3302 (3.4450) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0006 max mem: 55573 Epoch: [87] [100/156] eta: 0:00:39 lr: 0.006902 min_lr: 0.006902 loss: 3.7197 (3.4669) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0006 max mem: 55573 Epoch: [87] [110/156] eta: 0:00:32 lr: 0.006900 min_lr: 0.006900 loss: 3.8585 (3.4923) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0007 max mem: 55573 Epoch: [87] [120/156] eta: 0:00:24 lr: 0.006898 min_lr: 0.006898 loss: 3.7808 (3.4834) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0006 max mem: 55573 Epoch: [87] [130/156] eta: 0:00:17 lr: 0.006896 min_lr: 0.006896 loss: 3.4484 (3.4757) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0015 max mem: 55573 Epoch: [87] [140/156] eta: 0:00:10 lr: 0.006894 min_lr: 0.006894 loss: 3.6752 (3.4951) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0013 max mem: 55573 Epoch: [87] [150/156] eta: 0:00:04 lr: 0.006893 min_lr: 0.006893 loss: 3.7252 (3.5065) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0002 max mem: 55573 Epoch: [87] [155/156] eta: 0:00:00 lr: 0.006892 min_lr: 0.006892 loss: 3.7204 (3.5011) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0002 max mem: 55573 Epoch: [87] Total time: 0:01:45 (0.6738 s / it) Averaged stats: lr: 0.006892 min_lr: 0.006892 loss: 3.7204 (3.5477) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.1353 (1.1353) acc1: 76.9531 (76.9531) acc5: 94.1406 (94.1406) time: 7.3052 data: 7.0675 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2179 (1.2225) acc1: 72.5260 (70.7520) acc5: 92.4479 (91.1360) time: 1.6073 data: 1.4136 max mem: 55573 Test: Total time: 0:00:08 (1.6501 s / it) * Acc@1 71.598 Acc@5 90.960 loss 1.238 Accuracy of the model on the 50000 test images: 71.6% Max accuracy: 71.60% Test: [0/5] eta: 0:00:35 loss: 7.4056 (7.4056) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.0927 data: 6.8564 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5936 (7.6399) acc1: 0.0000 (0.1600) acc5: 0.0000 (0.5120) time: 1.5639 data: 1.3714 max mem: 55573 Test: Total time: 0:00:07 (1.5808 s / it) * Acc@1 0.150 Acc@5 0.552 loss 7.584 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.15% Epoch: [88] [ 0/156] eta: 0:37:10 lr: 0.006891 min_lr: 0.006891 loss: 3.7348 (3.7348) weight_decay: 0.0500 (0.0500) time: 14.2981 data: 10.2054 max mem: 55573 Epoch: [88] [ 10/156] eta: 0:04:27 lr: 0.006889 min_lr: 0.006889 loss: 3.5443 (3.5204) weight_decay: 0.0500 (0.0500) time: 1.8336 data: 0.9284 max mem: 55573 Epoch: [88] [ 20/156] eta: 0:02:48 lr: 0.006887 min_lr: 0.006887 loss: 3.6154 (3.5992) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0007 max mem: 55573 Epoch: [88] [ 30/156] eta: 0:02:09 lr: 0.006885 min_lr: 0.006885 loss: 3.6416 (3.6113) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0007 max mem: 55573 Epoch: [88] [ 40/156] eta: 0:01:47 lr: 0.006883 min_lr: 0.006883 loss: 3.5867 (3.6242) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [88] [ 50/156] eta: 0:01:30 lr: 0.006881 min_lr: 0.006881 loss: 3.8090 (3.6712) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0006 max mem: 55573 Epoch: [88] [ 60/156] eta: 0:01:18 lr: 0.006879 min_lr: 0.006879 loss: 3.8090 (3.6477) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0006 max mem: 55573 Epoch: [88] [ 70/156] eta: 0:01:07 lr: 0.006877 min_lr: 0.006877 loss: 3.5176 (3.6416) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0006 max mem: 55573 Epoch: [88] [ 80/156] eta: 0:00:57 lr: 0.006875 min_lr: 0.006875 loss: 3.3873 (3.5998) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0007 max mem: 55573 Epoch: [88] [ 90/156] eta: 0:00:48 lr: 0.006873 min_lr: 0.006873 loss: 3.4234 (3.5824) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0007 max mem: 55573 Epoch: [88] [100/156] eta: 0:00:40 lr: 0.006871 min_lr: 0.006871 loss: 3.5660 (3.5685) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0006 max mem: 55573 Epoch: [88] [110/156] eta: 0:00:32 lr: 0.006869 min_lr: 0.006869 loss: 3.6002 (3.5659) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0007 max mem: 55573 Epoch: [88] [120/156] eta: 0:00:25 lr: 0.006867 min_lr: 0.006867 loss: 3.7216 (3.5660) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0006 max mem: 55573 Epoch: [88] [130/156] eta: 0:00:18 lr: 0.006865 min_lr: 0.006865 loss: 3.7254 (3.5844) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0015 max mem: 55573 Epoch: [88] [140/156] eta: 0:00:11 lr: 0.006863 min_lr: 0.006863 loss: 3.6985 (3.5807) weight_decay: 0.0500 (0.0500) time: 0.5878 data: 0.0014 max mem: 55573 Epoch: [88] [150/156] eta: 0:00:04 lr: 0.006861 min_lr: 0.006861 loss: 3.6370 (3.5765) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [88] [155/156] eta: 0:00:00 lr: 0.006860 min_lr: 0.006860 loss: 3.6542 (3.5715) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [88] Total time: 0:01:46 (0.6843 s / it) Averaged stats: lr: 0.006860 min_lr: 0.006860 loss: 3.6542 (3.5642) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2736 (1.2736) acc1: 75.2604 (75.2604) acc5: 93.2292 (93.2292) time: 7.0917 data: 6.8537 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3377 (1.3797) acc1: 73.1771 (70.1120) acc5: 92.0573 (90.0800) time: 1.5649 data: 1.3708 max mem: 55573 Test: Total time: 0:00:08 (1.6164 s / it) * Acc@1 70.428 Acc@5 90.272 loss 1.395 Accuracy of the model on the 50000 test images: 70.4% Max accuracy: 71.60% Test: [0/5] eta: 0:00:34 loss: 7.4049 (7.4049) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9622 data: 6.7261 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5837 (7.6340) acc1: 0.0000 (0.1600) acc5: 0.0000 (0.5120) time: 1.5377 data: 1.3453 max mem: 55573 Test: Total time: 0:00:07 (1.5585 s / it) * Acc@1 0.158 Acc@5 0.542 loss 7.581 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.16% Epoch: [89] [ 0/156] eta: 0:30:33 lr: 0.006860 min_lr: 0.006860 loss: 2.6832 (2.6832) weight_decay: 0.0500 (0.0500) time: 11.7552 data: 11.0447 max mem: 55573 Epoch: [89] [ 10/156] eta: 0:04:09 lr: 0.006858 min_lr: 0.006858 loss: 3.8554 (3.6095) weight_decay: 0.0500 (0.0500) time: 1.7072 data: 1.0605 max mem: 55573 Epoch: [89] [ 20/156] eta: 0:02:39 lr: 0.006856 min_lr: 0.006856 loss: 3.6512 (3.5461) weight_decay: 0.0500 (0.0500) time: 0.6473 data: 0.0313 max mem: 55573 Epoch: [89] [ 30/156] eta: 0:02:05 lr: 0.006854 min_lr: 0.006854 loss: 3.6331 (3.5763) weight_decay: 0.0500 (0.0500) time: 0.5991 data: 0.0006 max mem: 55573 Epoch: [89] [ 40/156] eta: 0:01:43 lr: 0.006852 min_lr: 0.006852 loss: 3.6193 (3.5006) weight_decay: 0.0500 (0.0500) time: 0.5976 data: 0.0006 max mem: 55573 Epoch: [89] [ 50/156] eta: 0:01:28 lr: 0.006850 min_lr: 0.006850 loss: 3.4308 (3.5058) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0006 max mem: 55573 Epoch: [89] [ 60/156] eta: 0:01:16 lr: 0.006848 min_lr: 0.006848 loss: 3.6337 (3.5288) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0007 max mem: 55573 Epoch: [89] [ 70/156] eta: 0:01:05 lr: 0.006846 min_lr: 0.006846 loss: 3.6867 (3.5101) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0007 max mem: 55573 Epoch: [89] [ 80/156] eta: 0:00:56 lr: 0.006844 min_lr: 0.006844 loss: 3.3618 (3.4745) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0006 max mem: 55573 Epoch: [89] [ 90/156] eta: 0:00:47 lr: 0.006842 min_lr: 0.006842 loss: 3.4297 (3.4980) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0006 max mem: 55573 Epoch: [89] [100/156] eta: 0:00:39 lr: 0.006840 min_lr: 0.006840 loss: 3.7841 (3.5032) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0007 max mem: 55573 Epoch: [89] [110/156] eta: 0:00:32 lr: 0.006838 min_lr: 0.006838 loss: 3.8193 (3.5329) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0008 max mem: 55573 Epoch: [89] [120/156] eta: 0:00:24 lr: 0.006836 min_lr: 0.006836 loss: 3.8853 (3.5573) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0007 max mem: 55573 Epoch: [89] [130/156] eta: 0:00:17 lr: 0.006834 min_lr: 0.006834 loss: 3.7809 (3.5551) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0019 max mem: 55573 Epoch: [89] [140/156] eta: 0:00:10 lr: 0.006832 min_lr: 0.006832 loss: 3.6426 (3.5673) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0017 max mem: 55573 Epoch: [89] [150/156] eta: 0:00:04 lr: 0.006830 min_lr: 0.006830 loss: 3.5979 (3.5665) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0002 max mem: 55573 Epoch: [89] [155/156] eta: 0:00:00 lr: 0.006829 min_lr: 0.006829 loss: 3.5979 (3.5702) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0002 max mem: 55573 Epoch: [89] Total time: 0:01:45 (0.6759 s / it) Averaged stats: lr: 0.006829 min_lr: 0.006829 loss: 3.5979 (3.5462) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.3525 (1.3525) acc1: 74.7396 (74.7396) acc5: 92.9688 (92.9688) time: 6.7035 data: 6.4655 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4314 (1.4609) acc1: 72.9167 (69.6000) acc5: 92.9688 (90.0800) time: 1.4873 data: 1.2932 max mem: 55573 Test: Total time: 0:00:07 (1.5282 s / it) * Acc@1 70.438 Acc@5 90.220 loss 1.436 Accuracy of the model on the 50000 test images: 70.4% Max accuracy: 71.60% Test: [0/5] eta: 0:00:37 loss: 7.3976 (7.3976) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.4066 data: 7.1707 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5713 (7.6237) acc1: 0.0000 (0.1600) acc5: 0.0000 (0.5120) time: 1.6266 data: 1.4342 max mem: 55573 Test: Total time: 0:00:08 (1.6449 s / it) * Acc@1 0.174 Acc@5 0.540 loss 7.574 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.17% Epoch: [90] [ 0/156] eta: 0:35:54 lr: 0.006829 min_lr: 0.006829 loss: 2.6781 (2.6781) weight_decay: 0.0500 (0.0500) time: 13.8110 data: 12.6045 max mem: 55573 Epoch: [90] [ 10/156] eta: 0:04:25 lr: 0.006827 min_lr: 0.006827 loss: 3.5112 (3.5169) weight_decay: 0.0500 (0.0500) time: 1.8194 data: 1.1465 max mem: 55573 Epoch: [90] [ 20/156] eta: 0:02:47 lr: 0.006825 min_lr: 0.006825 loss: 3.4810 (3.4534) weight_decay: 0.0500 (0.0500) time: 0.6059 data: 0.0006 max mem: 55573 Epoch: [90] [ 30/156] eta: 0:02:09 lr: 0.006822 min_lr: 0.006822 loss: 3.5843 (3.5752) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0006 max mem: 55573 Epoch: [90] [ 40/156] eta: 0:01:46 lr: 0.006820 min_lr: 0.006820 loss: 3.9058 (3.6186) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0007 max mem: 55573 Epoch: [90] [ 50/156] eta: 0:01:30 lr: 0.006818 min_lr: 0.006818 loss: 3.7393 (3.6642) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0006 max mem: 55573 Epoch: [90] [ 60/156] eta: 0:01:17 lr: 0.006816 min_lr: 0.006816 loss: 3.6988 (3.6426) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0007 max mem: 55573 Epoch: [90] [ 70/156] eta: 0:01:07 lr: 0.006814 min_lr: 0.006814 loss: 3.4116 (3.6093) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0009 max mem: 55573 Epoch: [90] [ 80/156] eta: 0:00:57 lr: 0.006812 min_lr: 0.006812 loss: 3.3385 (3.5901) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0008 max mem: 55573 Epoch: [90] [ 90/156] eta: 0:00:48 lr: 0.006810 min_lr: 0.006810 loss: 3.6255 (3.5972) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0008 max mem: 55573 Epoch: [90] [100/156] eta: 0:00:40 lr: 0.006808 min_lr: 0.006808 loss: 3.7996 (3.5953) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0007 max mem: 55573 Epoch: [90] [110/156] eta: 0:00:32 lr: 0.006806 min_lr: 0.006806 loss: 3.8426 (3.6135) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0006 max mem: 55573 Epoch: [90] [120/156] eta: 0:00:25 lr: 0.006804 min_lr: 0.006804 loss: 3.7834 (3.6066) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0007 max mem: 55573 Epoch: [90] [130/156] eta: 0:00:18 lr: 0.006802 min_lr: 0.006802 loss: 3.5808 (3.5946) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0021 max mem: 55573 Epoch: [90] [140/156] eta: 0:00:10 lr: 0.006800 min_lr: 0.006800 loss: 3.3426 (3.5832) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0019 max mem: 55573 Epoch: [90] [150/156] eta: 0:00:04 lr: 0.006798 min_lr: 0.006798 loss: 3.7988 (3.6004) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0002 max mem: 55573 Epoch: [90] [155/156] eta: 0:00:00 lr: 0.006797 min_lr: 0.006797 loss: 3.8259 (3.6061) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [90] Total time: 0:01:46 (0.6826 s / it) Averaged stats: lr: 0.006797 min_lr: 0.006797 loss: 3.8259 (3.5612) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3589 (1.3589) acc1: 74.8698 (74.8698) acc5: 93.0990 (93.0990) time: 7.1502 data: 6.9125 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4793 (1.4890) acc1: 72.0052 (69.8880) acc5: 92.4479 (90.2720) time: 1.5764 data: 1.3826 max mem: 55573 Test: Total time: 0:00:08 (1.6121 s / it) * Acc@1 71.134 Acc@5 90.718 loss 1.494 Accuracy of the model on the 50000 test images: 71.1% Max accuracy: 71.60% Test: [0/5] eta: 0:00:35 loss: 7.3910 (7.3910) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.0097 data: 6.7736 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5621 (7.6157) acc1: 0.0000 (0.2240) acc5: 0.0000 (0.5120) time: 1.5473 data: 1.3548 max mem: 55573 Test: Total time: 0:00:07 (1.5665 s / it) * Acc@1 0.178 Acc@5 0.536 loss 7.569 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.18% Epoch: [91] [ 0/156] eta: 0:26:27 lr: 0.006797 min_lr: 0.006797 loss: 3.7694 (3.7694) weight_decay: 0.0500 (0.0500) time: 10.1738 data: 8.4450 max mem: 55573 Epoch: [91] [ 10/156] eta: 0:03:58 lr: 0.006795 min_lr: 0.006795 loss: 3.5228 (3.4325) weight_decay: 0.0500 (0.0500) time: 1.6340 data: 0.8495 max mem: 55573 Epoch: [91] [ 20/156] eta: 0:02:34 lr: 0.006793 min_lr: 0.006793 loss: 3.3679 (3.3587) weight_decay: 0.0500 (0.0500) time: 0.6845 data: 0.0452 max mem: 55573 Epoch: [91] [ 30/156] eta: 0:02:01 lr: 0.006790 min_lr: 0.006790 loss: 3.4854 (3.3546) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [91] [ 40/156] eta: 0:01:40 lr: 0.006788 min_lr: 0.006788 loss: 3.6781 (3.4061) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [91] [ 50/156] eta: 0:01:26 lr: 0.006786 min_lr: 0.006786 loss: 3.7155 (3.4558) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0007 max mem: 55573 Epoch: [91] [ 60/156] eta: 0:01:14 lr: 0.006784 min_lr: 0.006784 loss: 3.6401 (3.4683) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0006 max mem: 55573 Epoch: [91] [ 70/156] eta: 0:01:04 lr: 0.006782 min_lr: 0.006782 loss: 3.5889 (3.4672) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [91] [ 80/156] eta: 0:00:55 lr: 0.006780 min_lr: 0.006780 loss: 3.5567 (3.4878) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0006 max mem: 55573 Epoch: [91] [ 90/156] eta: 0:00:47 lr: 0.006778 min_lr: 0.006778 loss: 3.5511 (3.4883) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0006 max mem: 55573 Epoch: [91] [100/156] eta: 0:00:39 lr: 0.006776 min_lr: 0.006776 loss: 3.5153 (3.4985) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0005 max mem: 55573 Epoch: [91] [110/156] eta: 0:00:31 lr: 0.006774 min_lr: 0.006774 loss: 3.4458 (3.5021) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [91] [120/156] eta: 0:00:24 lr: 0.006772 min_lr: 0.006772 loss: 3.4617 (3.4931) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [91] [130/156] eta: 0:00:17 lr: 0.006770 min_lr: 0.006770 loss: 3.6449 (3.5075) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0012 max mem: 55573 Epoch: [91] [140/156] eta: 0:00:10 lr: 0.006768 min_lr: 0.006768 loss: 3.5981 (3.5068) weight_decay: 0.0500 (0.0500) time: 0.5878 data: 0.0010 max mem: 55573 Epoch: [91] [150/156] eta: 0:00:03 lr: 0.006766 min_lr: 0.006766 loss: 3.5736 (3.5158) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [91] [155/156] eta: 0:00:00 lr: 0.006765 min_lr: 0.006765 loss: 3.5842 (3.5230) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [91] Total time: 0:01:44 (0.6691 s / it) Averaged stats: lr: 0.006765 min_lr: 0.006765 loss: 3.5842 (3.5523) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.2748 (1.2748) acc1: 75.6510 (75.6510) acc5: 94.6615 (94.6615) time: 6.8877 data: 6.6507 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3994 (1.3732) acc1: 72.9167 (70.4320) acc5: 92.1875 (90.7520) time: 1.5241 data: 1.3302 max mem: 55573 Test: Total time: 0:00:07 (1.5615 s / it) * Acc@1 70.704 Acc@5 90.284 loss 1.375 Accuracy of the model on the 50000 test images: 70.7% Max accuracy: 71.60% Test: [0/5] eta: 0:00:34 loss: 7.3851 (7.3851) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9765 data: 6.7404 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5510 (7.6048) acc1: 0.0000 (0.2560) acc5: 0.0000 (0.5120) time: 1.5589 data: 1.3665 max mem: 55573 Test: Total time: 0:00:07 (1.5772 s / it) * Acc@1 0.190 Acc@5 0.534 loss 7.560 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.19% Epoch: [92] [ 0/156] eta: 0:31:28 lr: 0.006764 min_lr: 0.006764 loss: 3.4494 (3.4494) weight_decay: 0.0500 (0.0500) time: 12.1064 data: 8.0062 max mem: 55573 Epoch: [92] [ 10/156] eta: 0:04:14 lr: 0.006762 min_lr: 0.006762 loss: 3.7564 (3.4418) weight_decay: 0.0500 (0.0500) time: 1.7447 data: 0.7285 max mem: 55573 Epoch: [92] [ 20/156] eta: 0:02:42 lr: 0.006760 min_lr: 0.006760 loss: 3.7564 (3.5407) weight_decay: 0.0500 (0.0500) time: 0.6496 data: 0.0007 max mem: 55573 Epoch: [92] [ 30/156] eta: 0:02:06 lr: 0.006758 min_lr: 0.006758 loss: 3.8301 (3.5772) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0006 max mem: 55573 Epoch: [92] [ 40/156] eta: 0:01:44 lr: 0.006756 min_lr: 0.006756 loss: 3.7953 (3.5741) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0006 max mem: 55573 Epoch: [92] [ 50/156] eta: 0:01:29 lr: 0.006754 min_lr: 0.006754 loss: 3.6429 (3.6185) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0006 max mem: 55573 Epoch: [92] [ 60/156] eta: 0:01:16 lr: 0.006752 min_lr: 0.006752 loss: 3.7564 (3.6310) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0006 max mem: 55573 Epoch: [92] [ 70/156] eta: 0:01:06 lr: 0.006750 min_lr: 0.006750 loss: 3.7588 (3.6155) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [92] [ 80/156] eta: 0:00:56 lr: 0.006748 min_lr: 0.006748 loss: 3.7322 (3.6318) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [92] [ 90/156] eta: 0:00:48 lr: 0.006746 min_lr: 0.006746 loss: 3.7350 (3.6166) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [92] [100/156] eta: 0:00:40 lr: 0.006744 min_lr: 0.006744 loss: 3.7553 (3.6328) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0006 max mem: 55573 Epoch: [92] [110/156] eta: 0:00:32 lr: 0.006741 min_lr: 0.006741 loss: 3.8622 (3.6342) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0006 max mem: 55573 Epoch: [92] [120/156] eta: 0:00:25 lr: 0.006739 min_lr: 0.006739 loss: 3.8806 (3.6630) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0006 max mem: 55573 Epoch: [92] [130/156] eta: 0:00:17 lr: 0.006737 min_lr: 0.006737 loss: 3.7881 (3.6492) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0011 max mem: 55573 Epoch: [92] [140/156] eta: 0:00:10 lr: 0.006735 min_lr: 0.006735 loss: 3.6620 (3.6594) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [92] [150/156] eta: 0:00:04 lr: 0.006733 min_lr: 0.006733 loss: 3.6620 (3.6506) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0002 max mem: 55573 Epoch: [92] [155/156] eta: 0:00:00 lr: 0.006732 min_lr: 0.006732 loss: 3.5981 (3.6465) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0002 max mem: 55573 Epoch: [92] Total time: 0:01:45 (0.6774 s / it) Averaged stats: lr: 0.006732 min_lr: 0.006732 loss: 3.5981 (3.5494) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3255 (1.3255) acc1: 74.8698 (74.8698) acc5: 92.7083 (92.7083) time: 7.0276 data: 6.7903 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3632 (1.4150) acc1: 70.7031 (69.6000) acc5: 92.0573 (89.9200) time: 1.5521 data: 1.3581 max mem: 55573 Test: Total time: 0:00:07 (1.5890 s / it) * Acc@1 70.390 Acc@5 90.460 loss 1.417 Accuracy of the model on the 50000 test images: 70.4% Max accuracy: 71.60% Test: [0/5] eta: 0:00:34 loss: 7.3721 (7.3721) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9443 data: 6.7081 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5366 (7.5913) acc1: 0.0000 (0.2880) acc5: 0.0000 (0.5440) time: 1.5341 data: 1.3417 max mem: 55573 Test: Total time: 0:00:07 (1.5653 s / it) * Acc@1 0.200 Acc@5 0.542 loss 7.549 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.20% Epoch: [93] [ 0/156] eta: 0:31:52 lr: 0.006732 min_lr: 0.006732 loss: 3.3785 (3.3785) weight_decay: 0.0500 (0.0500) time: 12.2613 data: 11.6415 max mem: 55573 Epoch: [93] [ 10/156] eta: 0:04:12 lr: 0.006730 min_lr: 0.006730 loss: 3.7472 (3.6943) weight_decay: 0.0500 (0.0500) time: 1.7322 data: 1.0794 max mem: 55573 Epoch: [93] [ 20/156] eta: 0:02:41 lr: 0.006728 min_lr: 0.006728 loss: 3.7472 (3.6036) weight_decay: 0.0500 (0.0500) time: 0.6345 data: 0.0119 max mem: 55573 Epoch: [93] [ 30/156] eta: 0:02:05 lr: 0.006725 min_lr: 0.006725 loss: 3.7886 (3.6322) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [93] [ 40/156] eta: 0:01:44 lr: 0.006723 min_lr: 0.006723 loss: 3.5720 (3.5584) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0006 max mem: 55573 Epoch: [93] [ 50/156] eta: 0:01:28 lr: 0.006721 min_lr: 0.006721 loss: 3.5720 (3.5961) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0007 max mem: 55573 Epoch: [93] [ 60/156] eta: 0:01:16 lr: 0.006719 min_lr: 0.006719 loss: 3.6537 (3.5690) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0006 max mem: 55573 Epoch: [93] [ 70/156] eta: 0:01:06 lr: 0.006717 min_lr: 0.006717 loss: 3.6537 (3.5666) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0006 max mem: 55573 Epoch: [93] [ 80/156] eta: 0:00:56 lr: 0.006715 min_lr: 0.006715 loss: 3.6671 (3.5694) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0006 max mem: 55573 Epoch: [93] [ 90/156] eta: 0:00:48 lr: 0.006713 min_lr: 0.006713 loss: 3.7406 (3.5760) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [93] [100/156] eta: 0:00:40 lr: 0.006711 min_lr: 0.006711 loss: 3.7946 (3.5964) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0006 max mem: 55573 Epoch: [93] [110/156] eta: 0:00:32 lr: 0.006709 min_lr: 0.006709 loss: 3.8819 (3.6207) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [93] [120/156] eta: 0:00:25 lr: 0.006706 min_lr: 0.006706 loss: 3.7618 (3.6134) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0006 max mem: 55573 Epoch: [93] [130/156] eta: 0:00:17 lr: 0.006704 min_lr: 0.006704 loss: 3.5684 (3.5941) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0012 max mem: 55573 Epoch: [93] [140/156] eta: 0:00:10 lr: 0.006702 min_lr: 0.006702 loss: 3.5684 (3.5797) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0010 max mem: 55573 Epoch: [93] [150/156] eta: 0:00:04 lr: 0.006700 min_lr: 0.006700 loss: 3.6782 (3.5904) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0002 max mem: 55573 Epoch: [93] [155/156] eta: 0:00:00 lr: 0.006699 min_lr: 0.006699 loss: 3.6632 (3.5788) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0002 max mem: 55573 Epoch: [93] Total time: 0:01:45 (0.6766 s / it) Averaged stats: lr: 0.006699 min_lr: 0.006699 loss: 3.6632 (3.5508) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1788 (1.1788) acc1: 77.2135 (77.2135) acc5: 94.6615 (94.6615) time: 6.9852 data: 6.7479 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3453 (1.3834) acc1: 72.7865 (70.8800) acc5: 91.1458 (90.0480) time: 1.5431 data: 1.3498 max mem: 55573 Test: Total time: 0:00:07 (1.5843 s / it) * Acc@1 71.428 Acc@5 90.442 loss 1.345 Accuracy of the model on the 50000 test images: 71.4% Max accuracy: 71.60% Test: [0/5] eta: 0:00:37 loss: 7.3660 (7.3660) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.5785 data: 7.3420 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5250 (7.5813) acc1: 0.0000 (0.2880) acc5: 0.0000 (0.5440) time: 1.6612 data: 1.4685 max mem: 55573 Test: Total time: 0:00:08 (1.6795 s / it) * Acc@1 0.206 Acc@5 0.542 loss 7.541 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.21% Epoch: [94] [ 0/156] eta: 0:30:15 lr: 0.006699 min_lr: 0.006699 loss: 3.7628 (3.7628) weight_decay: 0.0500 (0.0500) time: 11.6410 data: 10.7380 max mem: 55573 Epoch: [94] [ 10/156] eta: 0:04:08 lr: 0.006697 min_lr: 0.006697 loss: 3.6545 (3.4709) weight_decay: 0.0500 (0.0500) time: 1.6992 data: 0.9768 max mem: 55573 Epoch: [94] [ 20/156] eta: 0:02:39 lr: 0.006695 min_lr: 0.006695 loss: 3.5358 (3.3901) weight_decay: 0.0500 (0.0500) time: 0.6477 data: 0.0006 max mem: 55573 Epoch: [94] [ 30/156] eta: 0:02:03 lr: 0.006692 min_lr: 0.006692 loss: 3.1403 (3.2794) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0007 max mem: 55573 Epoch: [94] [ 40/156] eta: 0:01:43 lr: 0.006690 min_lr: 0.006690 loss: 3.3206 (3.3132) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0007 max mem: 55573 Epoch: [94] [ 50/156] eta: 0:01:27 lr: 0.006688 min_lr: 0.006688 loss: 3.5670 (3.3481) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0007 max mem: 55573 Epoch: [94] [ 60/156] eta: 0:01:15 lr: 0.006686 min_lr: 0.006686 loss: 3.5670 (3.3506) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0006 max mem: 55573 Epoch: [94] [ 70/156] eta: 0:01:05 lr: 0.006684 min_lr: 0.006684 loss: 3.5819 (3.3711) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0006 max mem: 55573 Epoch: [94] [ 80/156] eta: 0:00:56 lr: 0.006682 min_lr: 0.006682 loss: 3.6485 (3.3876) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0007 max mem: 55573 Epoch: [94] [ 90/156] eta: 0:00:47 lr: 0.006680 min_lr: 0.006680 loss: 3.6168 (3.4083) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0008 max mem: 55573 Epoch: [94] [100/156] eta: 0:00:39 lr: 0.006678 min_lr: 0.006678 loss: 3.6828 (3.4104) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0008 max mem: 55573 Epoch: [94] [110/156] eta: 0:00:32 lr: 0.006675 min_lr: 0.006675 loss: 3.3354 (3.4019) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0008 max mem: 55573 Epoch: [94] [120/156] eta: 0:00:24 lr: 0.006673 min_lr: 0.006673 loss: 3.4742 (3.4084) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0007 max mem: 55573 Epoch: [94] [130/156] eta: 0:00:17 lr: 0.006671 min_lr: 0.006671 loss: 3.6243 (3.4362) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0022 max mem: 55573 Epoch: [94] [140/156] eta: 0:00:10 lr: 0.006669 min_lr: 0.006669 loss: 3.7145 (3.4506) weight_decay: 0.0500 (0.0500) time: 0.5868 data: 0.0020 max mem: 55573 Epoch: [94] [150/156] eta: 0:00:04 lr: 0.006667 min_lr: 0.006667 loss: 3.6419 (3.4567) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0002 max mem: 55573 Epoch: [94] [155/156] eta: 0:00:00 lr: 0.006666 min_lr: 0.006666 loss: 3.5381 (3.4563) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [94] Total time: 0:01:45 (0.6749 s / it) Averaged stats: lr: 0.006666 min_lr: 0.006666 loss: 3.5381 (3.5447) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.1710 (1.1710) acc1: 75.0000 (75.0000) acc5: 94.1406 (94.1406) time: 7.3282 data: 7.0908 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2986 (1.2826) acc1: 72.2656 (70.2080) acc5: 92.9688 (91.0720) time: 1.6119 data: 1.4182 max mem: 55573 Test: Total time: 0:00:08 (1.6572 s / it) * Acc@1 71.878 Acc@5 91.128 loss 1.237 Accuracy of the model on the 50000 test images: 71.9% Max accuracy: 71.88% Test: [0/5] eta: 0:00:35 loss: 7.3611 (7.3611) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.0480 data: 6.8113 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5166 (7.5743) acc1: 0.0000 (0.2880) acc5: 0.0000 (0.6080) time: 1.5552 data: 1.3624 max mem: 55573 Test: Total time: 0:00:07 (1.5747 s / it) * Acc@1 0.212 Acc@5 0.558 loss 7.535 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.21% Epoch: [95] [ 0/156] eta: 0:35:39 lr: 0.006666 min_lr: 0.006666 loss: 2.8651 (2.8651) weight_decay: 0.0500 (0.0500) time: 13.7125 data: 13.1282 max mem: 55573 Epoch: [95] [ 10/156] eta: 0:04:20 lr: 0.006663 min_lr: 0.006663 loss: 3.4265 (3.3282) weight_decay: 0.0500 (0.0500) time: 1.7813 data: 1.1938 max mem: 55573 Epoch: [95] [ 20/156] eta: 0:02:45 lr: 0.006661 min_lr: 0.006661 loss: 3.5377 (3.4726) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [95] [ 30/156] eta: 0:02:07 lr: 0.006659 min_lr: 0.006659 loss: 3.8204 (3.5590) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [95] [ 40/156] eta: 0:01:45 lr: 0.006657 min_lr: 0.006657 loss: 3.5810 (3.5320) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0007 max mem: 55573 Epoch: [95] [ 50/156] eta: 0:01:29 lr: 0.006655 min_lr: 0.006655 loss: 3.4604 (3.5467) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0008 max mem: 55573 Epoch: [95] [ 60/156] eta: 0:01:17 lr: 0.006653 min_lr: 0.006653 loss: 3.7159 (3.5469) weight_decay: 0.0500 (0.0500) time: 0.5932 data: 0.0007 max mem: 55573 Epoch: [95] [ 70/156] eta: 0:01:06 lr: 0.006651 min_lr: 0.006651 loss: 3.7194 (3.5446) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.0006 max mem: 55573 Epoch: [95] [ 80/156] eta: 0:00:57 lr: 0.006648 min_lr: 0.006648 loss: 3.7712 (3.5438) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0008 max mem: 55573 Epoch: [95] [ 90/156] eta: 0:00:48 lr: 0.006646 min_lr: 0.006646 loss: 3.7712 (3.5482) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0007 max mem: 55573 Epoch: [95] [100/156] eta: 0:00:40 lr: 0.006644 min_lr: 0.006644 loss: 3.8091 (3.5563) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0007 max mem: 55573 Epoch: [95] [110/156] eta: 0:00:32 lr: 0.006642 min_lr: 0.006642 loss: 3.6754 (3.5345) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0008 max mem: 55573 Epoch: [95] [120/156] eta: 0:00:25 lr: 0.006640 min_lr: 0.006640 loss: 3.7165 (3.5491) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0006 max mem: 55573 Epoch: [95] [130/156] eta: 0:00:17 lr: 0.006638 min_lr: 0.006638 loss: 3.7470 (3.5476) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0013 max mem: 55573 Epoch: [95] [140/156] eta: 0:00:10 lr: 0.006635 min_lr: 0.006635 loss: 3.5032 (3.5490) weight_decay: 0.0500 (0.0500) time: 0.5856 data: 0.0011 max mem: 55573 Epoch: [95] [150/156] eta: 0:00:04 lr: 0.006633 min_lr: 0.006633 loss: 3.4795 (3.5421) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0002 max mem: 55573 Epoch: [95] [155/156] eta: 0:00:00 lr: 0.006632 min_lr: 0.006632 loss: 3.4169 (3.5390) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0002 max mem: 55573 Epoch: [95] Total time: 0:01:46 (0.6801 s / it) Averaged stats: lr: 0.006632 min_lr: 0.006632 loss: 3.4169 (3.5240) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.2612 (1.2612) acc1: 75.1302 (75.1302) acc5: 93.4896 (93.4896) time: 7.3837 data: 7.1463 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3669 (1.3644) acc1: 71.6146 (69.7280) acc5: 91.5365 (90.2080) time: 1.6232 data: 1.4293 max mem: 55573 Test: Total time: 0:00:08 (1.6664 s / it) * Acc@1 71.016 Acc@5 90.426 loss 1.352 Accuracy of the model on the 50000 test images: 71.0% Max accuracy: 71.88% Test: [0/5] eta: 0:00:36 loss: 7.3564 (7.3564) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2543 data: 7.0177 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.5076 (7.5667) acc1: 0.0000 (0.2880) acc5: 0.0000 (0.6080) time: 1.5964 data: 1.4037 max mem: 55573 Test: Total time: 0:00:08 (1.6113 s / it) * Acc@1 0.216 Acc@5 0.568 loss 7.530 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.22% Epoch: [96] [ 0/156] eta: 0:38:32 lr: 0.006632 min_lr: 0.006632 loss: 3.3143 (3.3143) weight_decay: 0.0500 (0.0500) time: 14.8245 data: 14.2398 max mem: 55573 Epoch: [96] [ 10/156] eta: 0:04:35 lr: 0.006630 min_lr: 0.006630 loss: 3.6872 (3.5248) weight_decay: 0.0500 (0.0500) time: 1.8900 data: 1.2950 max mem: 55573 Epoch: [96] [ 20/156] eta: 0:02:52 lr: 0.006628 min_lr: 0.006628 loss: 3.7639 (3.5456) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0005 max mem: 55573 Epoch: [96] [ 30/156] eta: 0:02:12 lr: 0.006625 min_lr: 0.006625 loss: 3.7944 (3.6075) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [96] [ 40/156] eta: 0:01:48 lr: 0.006623 min_lr: 0.006623 loss: 3.7231 (3.5828) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [96] [ 50/156] eta: 0:01:32 lr: 0.006621 min_lr: 0.006621 loss: 3.4952 (3.5425) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0006 max mem: 55573 Epoch: [96] [ 60/156] eta: 0:01:19 lr: 0.006619 min_lr: 0.006619 loss: 3.5816 (3.5482) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0006 max mem: 55573 Epoch: [96] [ 70/156] eta: 0:01:08 lr: 0.006617 min_lr: 0.006617 loss: 3.7043 (3.5333) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [96] [ 80/156] eta: 0:00:58 lr: 0.006615 min_lr: 0.006615 loss: 3.4610 (3.5233) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0006 max mem: 55573 Epoch: [96] [ 90/156] eta: 0:00:49 lr: 0.006612 min_lr: 0.006612 loss: 3.4278 (3.4950) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0006 max mem: 55573 Epoch: [96] [100/156] eta: 0:00:41 lr: 0.006610 min_lr: 0.006610 loss: 3.6417 (3.5075) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [96] [110/156] eta: 0:00:33 lr: 0.006608 min_lr: 0.006608 loss: 3.6760 (3.5007) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [96] [120/156] eta: 0:00:25 lr: 0.006606 min_lr: 0.006606 loss: 3.7193 (3.5236) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [96] [130/156] eta: 0:00:18 lr: 0.006604 min_lr: 0.006604 loss: 3.7551 (3.5036) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0011 max mem: 55573 Epoch: [96] [140/156] eta: 0:00:11 lr: 0.006601 min_lr: 0.006601 loss: 3.7366 (3.5269) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [96] [150/156] eta: 0:00:04 lr: 0.006599 min_lr: 0.006599 loss: 3.8236 (3.5247) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [96] [155/156] eta: 0:00:00 lr: 0.006598 min_lr: 0.006598 loss: 3.4593 (3.5086) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [96] Total time: 0:01:47 (0.6879 s / it) Averaged stats: lr: 0.006598 min_lr: 0.006598 loss: 3.4593 (3.5446) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.1760 (1.1760) acc1: 73.0469 (73.0469) acc5: 93.8802 (93.8802) time: 7.3042 data: 7.0668 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2398 (1.2794) acc1: 72.5260 (70.1440) acc5: 93.0990 (90.9440) time: 1.6074 data: 1.4134 max mem: 55573 Test: Total time: 0:00:08 (1.6686 s / it) * Acc@1 71.050 Acc@5 90.774 loss 1.293 Accuracy of the model on the 50000 test images: 71.1% Max accuracy: 71.88% Test: [0/5] eta: 0:00:32 loss: 7.3554 (7.3554) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.4246 data: 6.1886 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4980 (7.5598) acc1: 0.0000 (0.2880) acc5: 0.0000 (0.6720) time: 1.4807 data: 1.2884 max mem: 55573 Test: Total time: 0:00:07 (1.5005 s / it) * Acc@1 0.218 Acc@5 0.600 loss 7.524 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.22% Epoch: [97] [ 0/156] eta: 0:35:02 lr: 0.006598 min_lr: 0.006598 loss: 3.2738 (3.2738) weight_decay: 0.0500 (0.0500) time: 13.4783 data: 12.8864 max mem: 55573 Epoch: [97] [ 10/156] eta: 0:04:19 lr: 0.006596 min_lr: 0.006596 loss: 3.7798 (3.5207) weight_decay: 0.0500 (0.0500) time: 1.7801 data: 1.1719 max mem: 55573 Epoch: [97] [ 20/156] eta: 0:02:45 lr: 0.006594 min_lr: 0.006594 loss: 3.7635 (3.6852) weight_decay: 0.0500 (0.0500) time: 0.6000 data: 0.0005 max mem: 55573 Epoch: [97] [ 30/156] eta: 0:02:07 lr: 0.006591 min_lr: 0.006591 loss: 3.7165 (3.6186) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [97] [ 40/156] eta: 0:01:45 lr: 0.006589 min_lr: 0.006589 loss: 3.6586 (3.6095) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [97] [ 50/156] eta: 0:01:29 lr: 0.006587 min_lr: 0.006587 loss: 3.6602 (3.6270) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [97] [ 60/156] eta: 0:01:17 lr: 0.006585 min_lr: 0.006585 loss: 3.6215 (3.5861) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [97] [ 70/156] eta: 0:01:06 lr: 0.006583 min_lr: 0.006583 loss: 3.6208 (3.5692) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0006 max mem: 55573 Epoch: [97] [ 80/156] eta: 0:00:57 lr: 0.006580 min_lr: 0.006580 loss: 3.6907 (3.5775) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0006 max mem: 55573 Epoch: [97] [ 90/156] eta: 0:00:48 lr: 0.006578 min_lr: 0.006578 loss: 3.6717 (3.5749) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0006 max mem: 55573 Epoch: [97] [100/156] eta: 0:00:40 lr: 0.006576 min_lr: 0.006576 loss: 3.5779 (3.5739) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0006 max mem: 55573 Epoch: [97] [110/156] eta: 0:00:32 lr: 0.006574 min_lr: 0.006574 loss: 3.4441 (3.5588) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [97] [120/156] eta: 0:00:25 lr: 0.006572 min_lr: 0.006572 loss: 3.7326 (3.5689) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0006 max mem: 55573 Epoch: [97] [130/156] eta: 0:00:17 lr: 0.006569 min_lr: 0.006569 loss: 3.7985 (3.5686) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0011 max mem: 55573 Epoch: [97] [140/156] eta: 0:00:10 lr: 0.006567 min_lr: 0.006567 loss: 3.6297 (3.5702) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0009 max mem: 55573 Epoch: [97] [150/156] eta: 0:00:04 lr: 0.006565 min_lr: 0.006565 loss: 3.7583 (3.5872) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0002 max mem: 55573 Epoch: [97] [155/156] eta: 0:00:00 lr: 0.006564 min_lr: 0.006564 loss: 3.7296 (3.5786) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [97] Total time: 0:01:46 (0.6798 s / it) Averaged stats: lr: 0.006564 min_lr: 0.006564 loss: 3.7296 (3.5348) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1493 (1.1493) acc1: 75.5208 (75.5208) acc5: 93.6198 (93.6198) time: 6.9973 data: 6.7595 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2146 (1.2402) acc1: 73.5677 (71.8720) acc5: 93.6198 (91.2960) time: 1.5460 data: 1.3520 max mem: 55573 Test: Total time: 0:00:07 (1.5872 s / it) * Acc@1 72.068 Acc@5 91.224 loss 1.257 Accuracy of the model on the 50000 test images: 72.1% Max accuracy: 72.07% Test: [0/5] eta: 0:00:34 loss: 7.3578 (7.3578) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.9066 data: 6.6700 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4876 (7.5534) acc1: 0.0000 (0.3200) acc5: 0.0000 (0.7040) time: 1.5269 data: 1.3341 max mem: 55573 Test: Total time: 0:00:07 (1.5451 s / it) * Acc@1 0.234 Acc@5 0.626 loss 7.518 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.23% Epoch: [98] [ 0/156] eta: 0:31:29 lr: 0.006564 min_lr: 0.006564 loss: 4.0497 (4.0497) weight_decay: 0.0500 (0.0500) time: 12.1142 data: 8.7939 max mem: 55573 Epoch: [98] [ 10/156] eta: 0:04:18 lr: 0.006561 min_lr: 0.006561 loss: 3.8331 (3.7579) weight_decay: 0.0500 (0.0500) time: 1.7685 data: 0.8004 max mem: 55573 Epoch: [98] [ 20/156] eta: 0:02:44 lr: 0.006559 min_lr: 0.006559 loss: 3.5809 (3.5107) weight_decay: 0.0500 (0.0500) time: 0.6625 data: 0.0008 max mem: 55573 Epoch: [98] [ 30/156] eta: 0:02:07 lr: 0.006557 min_lr: 0.006557 loss: 3.5809 (3.5188) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0006 max mem: 55573 Epoch: [98] [ 40/156] eta: 0:01:45 lr: 0.006555 min_lr: 0.006555 loss: 3.6361 (3.4596) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0006 max mem: 55573 Epoch: [98] [ 50/156] eta: 0:01:29 lr: 0.006553 min_lr: 0.006553 loss: 3.4044 (3.4518) weight_decay: 0.0500 (0.0500) time: 0.5883 data: 0.0006 max mem: 55573 Epoch: [98] [ 60/156] eta: 0:01:17 lr: 0.006550 min_lr: 0.006550 loss: 3.4044 (3.4455) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0006 max mem: 55573 Epoch: [98] [ 70/156] eta: 0:01:06 lr: 0.006548 min_lr: 0.006548 loss: 3.7241 (3.5032) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0006 max mem: 55573 Epoch: [98] [ 80/156] eta: 0:00:57 lr: 0.006546 min_lr: 0.006546 loss: 3.7747 (3.5076) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0006 max mem: 55573 Epoch: [98] [ 90/156] eta: 0:00:48 lr: 0.006544 min_lr: 0.006544 loss: 3.6203 (3.5263) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0006 max mem: 55573 Epoch: [98] [100/156] eta: 0:00:40 lr: 0.006542 min_lr: 0.006542 loss: 3.7048 (3.5262) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0005 max mem: 55573 Epoch: [98] [110/156] eta: 0:00:32 lr: 0.006539 min_lr: 0.006539 loss: 3.4292 (3.4971) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [98] [120/156] eta: 0:00:25 lr: 0.006537 min_lr: 0.006537 loss: 3.1856 (3.4958) weight_decay: 0.0500 (0.0500) time: 0.5945 data: 0.0005 max mem: 55573 Epoch: [98] [130/156] eta: 0:00:17 lr: 0.006535 min_lr: 0.006535 loss: 3.5460 (3.5098) weight_decay: 0.0500 (0.0500) time: 0.5956 data: 0.0015 max mem: 55573 Epoch: [98] [140/156] eta: 0:00:10 lr: 0.006533 min_lr: 0.006533 loss: 3.5460 (3.4954) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0013 max mem: 55573 Epoch: [98] [150/156] eta: 0:00:04 lr: 0.006530 min_lr: 0.006530 loss: 3.3750 (3.4956) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0002 max mem: 55573 Epoch: [98] [155/156] eta: 0:00:00 lr: 0.006529 min_lr: 0.006529 loss: 3.5070 (3.4996) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0002 max mem: 55573 Epoch: [98] Total time: 0:01:46 (0.6806 s / it) Averaged stats: lr: 0.006529 min_lr: 0.006529 loss: 3.5070 (3.5177) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3889 (1.3889) acc1: 75.9115 (75.9115) acc5: 93.3594 (93.3594) time: 7.1850 data: 6.9476 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4239 (1.4651) acc1: 73.3073 (70.6880) acc5: 93.3594 (91.1040) time: 1.5834 data: 1.3896 max mem: 55573 Test: Total time: 0:00:08 (1.6184 s / it) * Acc@1 71.180 Acc@5 90.434 loss 1.479 Accuracy of the model on the 50000 test images: 71.2% Max accuracy: 72.07% Test: [0/5] eta: 0:00:36 loss: 7.3554 (7.3554) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.3481 data: 7.1115 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4745 (7.5431) acc1: 0.0000 (0.3200) acc5: 0.0000 (0.7360) time: 1.6151 data: 1.4224 max mem: 55573 Test: Total time: 0:00:08 (1.6377 s / it) * Acc@1 0.238 Acc@5 0.682 loss 7.510 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.24% Epoch: [99] [ 0/156] eta: 0:34:50 lr: 0.006529 min_lr: 0.006529 loss: 3.4162 (3.4162) weight_decay: 0.0500 (0.0500) time: 13.3979 data: 12.8119 max mem: 55573 Epoch: [99] [ 10/156] eta: 0:04:18 lr: 0.006527 min_lr: 0.006527 loss: 3.7542 (3.4532) weight_decay: 0.0500 (0.0500) time: 1.7678 data: 1.1656 max mem: 55573 Epoch: [99] [ 20/156] eta: 0:02:44 lr: 0.006525 min_lr: 0.006525 loss: 3.7542 (3.4165) weight_decay: 0.0500 (0.0500) time: 0.5968 data: 0.0008 max mem: 55573 Epoch: [99] [ 30/156] eta: 0:02:07 lr: 0.006522 min_lr: 0.006522 loss: 3.0335 (3.3452) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [99] [ 40/156] eta: 0:01:45 lr: 0.006520 min_lr: 0.006520 loss: 3.3180 (3.4107) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [99] [ 50/156] eta: 0:01:29 lr: 0.006518 min_lr: 0.006518 loss: 3.8494 (3.4950) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [99] [ 60/156] eta: 0:01:17 lr: 0.006516 min_lr: 0.006516 loss: 3.7652 (3.5033) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [99] [ 70/156] eta: 0:01:06 lr: 0.006513 min_lr: 0.006513 loss: 3.6751 (3.5127) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0006 max mem: 55573 Epoch: [99] [ 80/156] eta: 0:00:57 lr: 0.006511 min_lr: 0.006511 loss: 3.7312 (3.5328) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [99] [ 90/156] eta: 0:00:48 lr: 0.006509 min_lr: 0.006509 loss: 3.6664 (3.5312) weight_decay: 0.0500 (0.0500) time: 0.5945 data: 0.0005 max mem: 55573 Epoch: [99] [100/156] eta: 0:00:40 lr: 0.006507 min_lr: 0.006507 loss: 3.6669 (3.5337) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0006 max mem: 55573 Epoch: [99] [110/156] eta: 0:00:32 lr: 0.006504 min_lr: 0.006504 loss: 3.6758 (3.5433) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0006 max mem: 55573 Epoch: [99] [120/156] eta: 0:00:25 lr: 0.006502 min_lr: 0.006502 loss: 3.6374 (3.5402) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [99] [130/156] eta: 0:00:17 lr: 0.006500 min_lr: 0.006500 loss: 3.7376 (3.5581) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0011 max mem: 55573 Epoch: [99] [140/156] eta: 0:00:10 lr: 0.006498 min_lr: 0.006498 loss: 3.8850 (3.5479) weight_decay: 0.0500 (0.0500) time: 0.5877 data: 0.0010 max mem: 55573 Epoch: [99] [150/156] eta: 0:00:04 lr: 0.006495 min_lr: 0.006495 loss: 3.1770 (3.5289) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0002 max mem: 55573 Epoch: [99] [155/156] eta: 0:00:00 lr: 0.006494 min_lr: 0.006494 loss: 3.4122 (3.5362) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0002 max mem: 55573 Epoch: [99] Total time: 0:01:46 (0.6799 s / it) Averaged stats: lr: 0.006494 min_lr: 0.006494 loss: 3.4122 (3.5224) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1976 (1.1976) acc1: 75.0000 (75.0000) acc5: 92.8385 (92.8385) time: 7.1792 data: 6.9421 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2474 (1.2969) acc1: 71.6146 (70.0800) acc5: 92.8385 (90.1440) time: 1.5823 data: 1.3885 max mem: 55573 Test: Total time: 0:00:08 (1.6210 s / it) * Acc@1 70.694 Acc@5 90.342 loss 1.303 Accuracy of the model on the 50000 test images: 70.7% Max accuracy: 72.07% Test: [0/5] eta: 0:00:36 loss: 7.3476 (7.3476) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.2327 data: 6.9962 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4597 (7.5317) acc1: 0.0000 (0.3200) acc5: 0.1302 (0.8640) time: 1.5921 data: 1.3993 max mem: 55573 Test: Total time: 0:00:08 (1.6126 s / it) * Acc@1 0.240 Acc@5 0.714 loss 7.502 Accuracy of the model EMA on 50000 test images: 0.2% Max EMA accuracy: 0.24% Epoch: [100] [ 0/156] eta: 0:32:27 lr: 0.006494 min_lr: 0.006494 loss: 3.3762 (3.3762) weight_decay: 0.0500 (0.0500) time: 12.4850 data: 7.8882 max mem: 55573 Epoch: [100] [ 10/156] eta: 0:04:06 lr: 0.006492 min_lr: 0.006492 loss: 3.6403 (3.4975) weight_decay: 0.0500 (0.0500) time: 1.6898 data: 0.7178 max mem: 55573 Epoch: [100] [ 20/156] eta: 0:02:38 lr: 0.006490 min_lr: 0.006490 loss: 3.6925 (3.5727) weight_decay: 0.0500 (0.0500) time: 0.6001 data: 0.0007 max mem: 55573 Epoch: [100] [ 30/156] eta: 0:02:03 lr: 0.006487 min_lr: 0.006487 loss: 3.6977 (3.6218) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0006 max mem: 55573 Epoch: [100] [ 40/156] eta: 0:01:42 lr: 0.006485 min_lr: 0.006485 loss: 3.4689 (3.5829) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [100] [ 50/156] eta: 0:01:27 lr: 0.006483 min_lr: 0.006483 loss: 3.2825 (3.4997) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [100] [ 60/156] eta: 0:01:15 lr: 0.006481 min_lr: 0.006481 loss: 3.4548 (3.5374) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0006 max mem: 55573 Epoch: [100] [ 70/156] eta: 0:01:05 lr: 0.006478 min_lr: 0.006478 loss: 3.7065 (3.5362) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0006 max mem: 55573 Epoch: [100] [ 80/156] eta: 0:00:56 lr: 0.006476 min_lr: 0.006476 loss: 3.6290 (3.5359) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [100] [ 90/156] eta: 0:00:47 lr: 0.006474 min_lr: 0.006474 loss: 3.3552 (3.5093) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [100] [100/156] eta: 0:00:39 lr: 0.006472 min_lr: 0.006472 loss: 3.6226 (3.5151) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [100] [110/156] eta: 0:00:32 lr: 0.006469 min_lr: 0.006469 loss: 3.6848 (3.5271) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [100] [120/156] eta: 0:00:24 lr: 0.006467 min_lr: 0.006467 loss: 3.7371 (3.5391) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [100] [130/156] eta: 0:00:17 lr: 0.006465 min_lr: 0.006465 loss: 3.5852 (3.5242) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0012 max mem: 55573 Epoch: [100] [140/156] eta: 0:00:10 lr: 0.006463 min_lr: 0.006463 loss: 3.5852 (3.5242) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0010 max mem: 55573 Epoch: [100] [150/156] eta: 0:00:04 lr: 0.006460 min_lr: 0.006460 loss: 3.6374 (3.5336) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [100] [155/156] eta: 0:00:00 lr: 0.006459 min_lr: 0.006459 loss: 3.5079 (3.5334) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [100] Total time: 0:01:45 (0.6736 s / it) Averaged stats: lr: 0.006459 min_lr: 0.006459 loss: 3.5079 (3.5223) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.2705 (1.2705) acc1: 73.4375 (73.4375) acc5: 92.3177 (92.3177) time: 6.9885 data: 6.7516 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3398 (1.3622) acc1: 73.1771 (69.5360) acc5: 91.7969 (90.1120) time: 1.5441 data: 1.3504 max mem: 55573 Test: Total time: 0:00:07 (1.5900 s / it) * Acc@1 70.328 Acc@5 90.278 loss 1.363 Accuracy of the model on the 50000 test images: 70.3% Max accuracy: 72.07% Test: [0/5] eta: 0:00:35 loss: 7.3376 (7.3376) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.0920 data: 6.8560 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4415 (7.5169) acc1: 0.0000 (0.3200) acc5: 0.2604 (0.9280) time: 1.5638 data: 1.3713 max mem: 55573 Test: Total time: 0:00:07 (1.5864 s / it) * Acc@1 0.256 Acc@5 0.766 loss 7.491 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.26% Epoch: [101] [ 0/156] eta: 0:29:35 lr: 0.006459 min_lr: 0.006459 loss: 3.3351 (3.3351) weight_decay: 0.0500 (0.0500) time: 11.3821 data: 8.1261 max mem: 55573 Epoch: [101] [ 10/156] eta: 0:03:55 lr: 0.006457 min_lr: 0.006457 loss: 3.5261 (3.3052) weight_decay: 0.0500 (0.0500) time: 1.6125 data: 0.7727 max mem: 55573 Epoch: [101] [ 20/156] eta: 0:02:33 lr: 0.006454 min_lr: 0.006454 loss: 3.7065 (3.4988) weight_decay: 0.0500 (0.0500) time: 0.6128 data: 0.0190 max mem: 55573 Epoch: [101] [ 30/156] eta: 0:02:00 lr: 0.006452 min_lr: 0.006452 loss: 3.7711 (3.4542) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0005 max mem: 55573 Epoch: [101] [ 40/156] eta: 0:01:40 lr: 0.006450 min_lr: 0.006450 loss: 3.7711 (3.4830) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0005 max mem: 55573 Epoch: [101] [ 50/156] eta: 0:01:26 lr: 0.006448 min_lr: 0.006448 loss: 3.6455 (3.4787) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [101] [ 60/156] eta: 0:01:14 lr: 0.006445 min_lr: 0.006445 loss: 3.5447 (3.4934) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [101] [ 70/156] eta: 0:01:04 lr: 0.006443 min_lr: 0.006443 loss: 3.5005 (3.5085) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0006 max mem: 55573 Epoch: [101] [ 80/156] eta: 0:00:55 lr: 0.006441 min_lr: 0.006441 loss: 3.5610 (3.5342) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0006 max mem: 55573 Epoch: [101] [ 90/156] eta: 0:00:47 lr: 0.006438 min_lr: 0.006438 loss: 3.7379 (3.5507) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0006 max mem: 55573 Epoch: [101] [100/156] eta: 0:00:39 lr: 0.006436 min_lr: 0.006436 loss: 3.7007 (3.5354) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0007 max mem: 55573 Epoch: [101] [110/156] eta: 0:00:31 lr: 0.006434 min_lr: 0.006434 loss: 3.6743 (3.5473) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0006 max mem: 55573 Epoch: [101] [120/156] eta: 0:00:24 lr: 0.006432 min_lr: 0.006432 loss: 3.7842 (3.5626) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0008 max mem: 55573 Epoch: [101] [130/156] eta: 0:00:17 lr: 0.006429 min_lr: 0.006429 loss: 3.7842 (3.5670) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0014 max mem: 55573 Epoch: [101] [140/156] eta: 0:00:10 lr: 0.006427 min_lr: 0.006427 loss: 3.7472 (3.5703) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0010 max mem: 55573 Epoch: [101] [150/156] eta: 0:00:03 lr: 0.006425 min_lr: 0.006425 loss: 3.7472 (3.5846) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [101] [155/156] eta: 0:00:00 lr: 0.006424 min_lr: 0.006424 loss: 3.5546 (3.5642) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0002 max mem: 55573 Epoch: [101] Total time: 0:01:44 (0.6687 s / it) Averaged stats: lr: 0.006424 min_lr: 0.006424 loss: 3.5546 (3.5207) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.2763 (1.2763) acc1: 74.6094 (74.6094) acc5: 92.8385 (92.8385) time: 6.9669 data: 6.7294 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3521 (1.3851) acc1: 70.1823 (69.2800) acc5: 90.5660 (90.6240) time: 1.5400 data: 1.3460 max mem: 55573 Test: Total time: 0:00:07 (1.5750 s / it) * Acc@1 70.650 Acc@5 90.552 loss 1.351 Accuracy of the model on the 50000 test images: 70.7% Max accuracy: 72.07% Test: [0/5] eta: 0:00:37 loss: 7.3227 (7.3227) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 7.4686 data: 7.2327 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.4190 (7.4965) acc1: 0.0000 (0.3520) acc5: 0.1302 (0.9280) time: 1.6390 data: 1.4467 max mem: 55573 Test: Total time: 0:00:08 (1.6618 s / it) * Acc@1 0.258 Acc@5 0.820 loss 7.476 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.26% Epoch: [102] [ 0/156] eta: 0:34:24 lr: 0.006423 min_lr: 0.006423 loss: 3.8816 (3.8816) weight_decay: 0.0500 (0.0500) time: 13.2336 data: 7.9062 max mem: 55573 Epoch: [102] [ 10/156] eta: 0:04:20 lr: 0.006421 min_lr: 0.006421 loss: 3.5478 (3.4532) weight_decay: 0.0500 (0.0500) time: 1.7866 data: 0.7253 max mem: 55573 Epoch: [102] [ 20/156] eta: 0:02:45 lr: 0.006419 min_lr: 0.006419 loss: 3.0804 (3.2489) weight_decay: 0.0500 (0.0500) time: 0.6155 data: 0.0039 max mem: 55573 Epoch: [102] [ 30/156] eta: 0:02:07 lr: 0.006416 min_lr: 0.006416 loss: 3.3809 (3.3497) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0007 max mem: 55573 Epoch: [102] [ 40/156] eta: 0:01:45 lr: 0.006414 min_lr: 0.006414 loss: 3.5204 (3.3437) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0008 max mem: 55573 Epoch: [102] [ 50/156] eta: 0:01:29 lr: 0.006412 min_lr: 0.006412 loss: 3.4934 (3.3699) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0008 max mem: 55573 Epoch: [102] [ 60/156] eta: 0:01:17 lr: 0.006410 min_lr: 0.006410 loss: 3.7495 (3.3856) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0007 max mem: 55573 Epoch: [102] [ 70/156] eta: 0:01:06 lr: 0.006407 min_lr: 0.006407 loss: 3.6624 (3.3811) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [102] [ 80/156] eta: 0:00:57 lr: 0.006405 min_lr: 0.006405 loss: 3.2780 (3.3749) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0006 max mem: 55573 Epoch: [102] [ 90/156] eta: 0:00:48 lr: 0.006403 min_lr: 0.006403 loss: 3.4601 (3.4085) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [102] [100/156] eta: 0:00:40 lr: 0.006400 min_lr: 0.006400 loss: 3.6891 (3.4046) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [102] [110/156] eta: 0:00:32 lr: 0.006398 min_lr: 0.006398 loss: 3.5596 (3.4198) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0006 max mem: 55573 Epoch: [102] [120/156] eta: 0:00:25 lr: 0.006396 min_lr: 0.006396 loss: 3.5862 (3.4181) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0007 max mem: 55573 Epoch: [102] [130/156] eta: 0:00:17 lr: 0.006393 min_lr: 0.006393 loss: 3.5894 (3.4277) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0013 max mem: 55573 Epoch: [102] [140/156] eta: 0:00:10 lr: 0.006391 min_lr: 0.006391 loss: 3.6785 (3.4328) weight_decay: 0.0500 (0.0500) time: 0.5853 data: 0.0010 max mem: 55573 Epoch: [102] [150/156] eta: 0:00:04 lr: 0.006389 min_lr: 0.006389 loss: 3.5058 (3.4307) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [102] [155/156] eta: 0:00:00 lr: 0.006388 min_lr: 0.006388 loss: 3.5058 (3.4363) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [102] Total time: 0:01:46 (0.6802 s / it) Averaged stats: lr: 0.006388 min_lr: 0.006388 loss: 3.5058 (3.5319) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.3337 (1.3337) acc1: 75.7812 (75.7812) acc5: 93.7500 (93.7500) time: 6.8825 data: 6.6455 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3914 (1.4123) acc1: 71.3542 (70.4960) acc5: 92.4528 (91.2640) time: 1.5231 data: 1.3292 max mem: 55573 Test: Total time: 0:00:07 (1.5704 s / it) * Acc@1 71.728 Acc@5 90.988 loss 1.397 Accuracy of the model on the 50000 test images: 71.7% Max accuracy: 72.07% Test: [0/5] eta: 0:00:32 loss: 7.3012 (7.3012) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.4052 data: 6.1690 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.3857 (7.4675) acc1: 0.0000 (0.3520) acc5: 0.2604 (0.9920) time: 1.4943 data: 1.3018 max mem: 55573 Test: Total time: 0:00:07 (1.5175 s / it) * Acc@1 0.290 Acc@5 0.882 loss 7.454 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.29% Epoch: [103] [ 0/156] eta: 0:35:48 lr: 0.006388 min_lr: 0.006388 loss: 2.5335 (2.5335) weight_decay: 0.0500 (0.0500) time: 13.7732 data: 13.1568 max mem: 55573 Epoch: [103] [ 10/156] eta: 0:04:22 lr: 0.006385 min_lr: 0.006385 loss: 3.8120 (3.6249) weight_decay: 0.0500 (0.0500) time: 1.7957 data: 1.1964 max mem: 55573 Epoch: [103] [ 20/156] eta: 0:02:46 lr: 0.006383 min_lr: 0.006383 loss: 3.7687 (3.5986) weight_decay: 0.0500 (0.0500) time: 0.5932 data: 0.0004 max mem: 55573 Epoch: [103] [ 30/156] eta: 0:02:08 lr: 0.006381 min_lr: 0.006381 loss: 3.6850 (3.5119) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [103] [ 40/156] eta: 0:01:46 lr: 0.006378 min_lr: 0.006378 loss: 3.4322 (3.4836) weight_decay: 0.0500 (0.0500) time: 0.5937 data: 0.0005 max mem: 55573 Epoch: [103] [ 50/156] eta: 0:01:30 lr: 0.006376 min_lr: 0.006376 loss: 3.4491 (3.5038) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0005 max mem: 55573 Epoch: [103] [ 60/156] eta: 0:01:17 lr: 0.006374 min_lr: 0.006374 loss: 3.4885 (3.5040) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [103] [ 70/156] eta: 0:01:06 lr: 0.006371 min_lr: 0.006371 loss: 3.7830 (3.5356) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0006 max mem: 55573 Epoch: [103] [ 80/156] eta: 0:00:57 lr: 0.006369 min_lr: 0.006369 loss: 3.8508 (3.5531) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0006 max mem: 55573 Epoch: [103] [ 90/156] eta: 0:00:48 lr: 0.006367 min_lr: 0.006367 loss: 3.6506 (3.5572) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [103] [100/156] eta: 0:00:40 lr: 0.006364 min_lr: 0.006364 loss: 3.6506 (3.5480) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0006 max mem: 55573 Epoch: [103] [110/156] eta: 0:00:32 lr: 0.006362 min_lr: 0.006362 loss: 3.5826 (3.5238) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [103] [120/156] eta: 0:00:25 lr: 0.006360 min_lr: 0.006360 loss: 3.3605 (3.5130) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0006 max mem: 55573 Epoch: [103] [130/156] eta: 0:00:17 lr: 0.006357 min_lr: 0.006357 loss: 3.4195 (3.5149) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0015 max mem: 55573 Epoch: [103] [140/156] eta: 0:00:10 lr: 0.006355 min_lr: 0.006355 loss: 3.7763 (3.5406) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0012 max mem: 55573 Epoch: [103] [150/156] eta: 0:00:04 lr: 0.006353 min_lr: 0.006353 loss: 3.7040 (3.5241) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [103] [155/156] eta: 0:00:00 lr: 0.006352 min_lr: 0.006352 loss: 3.7007 (3.5292) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [103] Total time: 0:01:46 (0.6813 s / it) Averaged stats: lr: 0.006352 min_lr: 0.006352 loss: 3.7007 (3.5160) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.2553 (1.2553) acc1: 77.3438 (77.3438) acc5: 94.2708 (94.2708) time: 6.8774 data: 6.6405 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3405 (1.3897) acc1: 74.6094 (71.9360) acc5: 93.6198 (91.3600) time: 1.5219 data: 1.3282 max mem: 55573 Test: Total time: 0:00:07 (1.5574 s / it) * Acc@1 72.216 Acc@5 91.356 loss 1.375 Accuracy of the model on the 50000 test images: 72.2% Max accuracy: 72.22% Test: [0/5] eta: 0:00:33 loss: 7.2698 (7.2698) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.7807 data: 6.5444 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.3500 (7.4275) acc1: 0.0000 (0.3520) acc5: 0.2604 (1.1200) time: 1.5015 data: 1.3090 max mem: 55573 Test: Total time: 0:00:07 (1.5189 s / it) * Acc@1 0.310 Acc@5 0.956 loss 7.425 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.31% Epoch: [104] [ 0/156] eta: 0:33:31 lr: 0.006351 min_lr: 0.006351 loss: 3.5156 (3.5156) weight_decay: 0.0500 (0.0500) time: 12.8968 data: 8.5858 max mem: 55573 Epoch: [104] [ 10/156] eta: 0:04:11 lr: 0.006349 min_lr: 0.006349 loss: 3.5926 (3.4568) weight_decay: 0.0500 (0.0500) time: 1.7229 data: 0.7812 max mem: 55573 Epoch: [104] [ 20/156] eta: 0:02:40 lr: 0.006347 min_lr: 0.006347 loss: 3.5462 (3.4544) weight_decay: 0.0500 (0.0500) time: 0.5976 data: 0.0007 max mem: 55573 Epoch: [104] [ 30/156] eta: 0:02:05 lr: 0.006344 min_lr: 0.006344 loss: 3.3159 (3.4129) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0006 max mem: 55573 Epoch: [104] [ 40/156] eta: 0:01:43 lr: 0.006342 min_lr: 0.006342 loss: 3.4221 (3.4733) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0006 max mem: 55573 Epoch: [104] [ 50/156] eta: 0:01:28 lr: 0.006340 min_lr: 0.006340 loss: 3.6057 (3.4326) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0007 max mem: 55573 Epoch: [104] [ 60/156] eta: 0:01:16 lr: 0.006337 min_lr: 0.006337 loss: 3.6057 (3.4571) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [104] [ 70/156] eta: 0:01:05 lr: 0.006335 min_lr: 0.006335 loss: 3.4079 (3.4557) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0006 max mem: 55573 Epoch: [104] [ 80/156] eta: 0:00:56 lr: 0.006333 min_lr: 0.006333 loss: 3.3932 (3.4422) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0006 max mem: 55573 Epoch: [104] [ 90/156] eta: 0:00:48 lr: 0.006330 min_lr: 0.006330 loss: 3.6053 (3.4657) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [104] [100/156] eta: 0:00:39 lr: 0.006328 min_lr: 0.006328 loss: 3.6538 (3.4743) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [104] [110/156] eta: 0:00:32 lr: 0.006326 min_lr: 0.006326 loss: 3.5994 (3.4770) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [104] [120/156] eta: 0:00:24 lr: 0.006323 min_lr: 0.006323 loss: 3.6913 (3.4681) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [104] [130/156] eta: 0:00:17 lr: 0.006321 min_lr: 0.006321 loss: 3.2643 (3.4512) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0014 max mem: 55573 Epoch: [104] [140/156] eta: 0:00:10 lr: 0.006319 min_lr: 0.006319 loss: 3.5667 (3.4554) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0013 max mem: 55573 Epoch: [104] [150/156] eta: 0:00:04 lr: 0.006316 min_lr: 0.006316 loss: 3.6679 (3.4570) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [104] [155/156] eta: 0:00:00 lr: 0.006315 min_lr: 0.006315 loss: 3.6631 (3.4646) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [104] Total time: 0:01:45 (0.6757 s / it) Averaged stats: lr: 0.006315 min_lr: 0.006315 loss: 3.6631 (3.5108) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1745 (1.1745) acc1: 76.9531 (76.9531) acc5: 94.7917 (94.7917) time: 6.8139 data: 6.5770 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3722 (1.3575) acc1: 72.6562 (71.2000) acc5: 93.0990 (91.1680) time: 1.5102 data: 1.3158 max mem: 55573 Test: Total time: 0:00:07 (1.5505 s / it) * Acc@1 72.166 Acc@5 91.242 loss 1.354 Accuracy of the model on the 50000 test images: 72.2% Max accuracy: 72.22% Test: [0/5] eta: 0:00:33 loss: 7.2293 (7.2293) acc1: 0.0000 (0.0000) acc5: 0.0000 (0.0000) time: 6.7004 data: 6.4643 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.3143 (7.3783) acc1: 0.0000 (0.3840) acc5: 0.2604 (1.2160) time: 1.4854 data: 1.2930 max mem: 55573 Test: Total time: 0:00:07 (1.5076 s / it) * Acc@1 0.330 Acc@5 1.038 loss 7.390 Accuracy of the model EMA on 50000 test images: 0.3% Max EMA accuracy: 0.33% Epoch: [105] [ 0/156] eta: 0:31:05 lr: 0.006315 min_lr: 0.006315 loss: 2.4087 (2.4087) weight_decay: 0.0500 (0.0500) time: 11.9611 data: 10.3034 max mem: 55573 Epoch: [105] [ 10/156] eta: 0:04:10 lr: 0.006313 min_lr: 0.006313 loss: 3.5131 (3.3917) weight_decay: 0.0500 (0.0500) time: 1.7162 data: 0.9372 max mem: 55573 Epoch: [105] [ 20/156] eta: 0:02:40 lr: 0.006310 min_lr: 0.006310 loss: 3.5131 (3.3838) weight_decay: 0.0500 (0.0500) time: 0.6409 data: 0.0006 max mem: 55573 Epoch: [105] [ 30/156] eta: 0:02:04 lr: 0.006308 min_lr: 0.006308 loss: 3.5275 (3.4647) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [105] [ 40/156] eta: 0:01:43 lr: 0.006306 min_lr: 0.006306 loss: 3.4917 (3.4377) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [105] [ 50/156] eta: 0:01:28 lr: 0.006303 min_lr: 0.006303 loss: 3.4061 (3.4620) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [105] [ 60/156] eta: 0:01:16 lr: 0.006301 min_lr: 0.006301 loss: 3.6771 (3.5071) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [105] [ 70/156] eta: 0:01:05 lr: 0.006298 min_lr: 0.006298 loss: 3.6771 (3.5068) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0006 max mem: 55573 Epoch: [105] [ 80/156] eta: 0:00:56 lr: 0.006296 min_lr: 0.006296 loss: 3.5218 (3.4846) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [105] [ 90/156] eta: 0:00:47 lr: 0.006294 min_lr: 0.006294 loss: 3.5135 (3.4837) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [105] [100/156] eta: 0:00:39 lr: 0.006291 min_lr: 0.006291 loss: 3.5239 (3.4830) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0006 max mem: 55573 Epoch: [105] [110/156] eta: 0:00:32 lr: 0.006289 min_lr: 0.006289 loss: 3.5875 (3.4790) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [105] [120/156] eta: 0:00:24 lr: 0.006287 min_lr: 0.006287 loss: 3.7452 (3.5027) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [105] [130/156] eta: 0:00:17 lr: 0.006284 min_lr: 0.006284 loss: 3.7584 (3.5139) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0015 max mem: 55573 Epoch: [105] [140/156] eta: 0:00:10 lr: 0.006282 min_lr: 0.006282 loss: 3.7499 (3.5156) weight_decay: 0.0500 (0.0500) time: 0.5856 data: 0.0013 max mem: 55573 Epoch: [105] [150/156] eta: 0:00:04 lr: 0.006280 min_lr: 0.006280 loss: 3.6516 (3.5100) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [105] [155/156] eta: 0:00:00 lr: 0.006278 min_lr: 0.006278 loss: 3.1556 (3.4988) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [105] Total time: 0:01:45 (0.6755 s / it) Averaged stats: lr: 0.006278 min_lr: 0.006278 loss: 3.1556 (3.4976) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:37 loss: 1.0753 (1.0753) acc1: 76.9531 (76.9531) acc5: 94.0104 (94.0104) time: 7.4464 data: 7.2090 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2284 (1.2641) acc1: 74.6094 (71.8400) acc5: 92.8385 (90.7200) time: 1.6357 data: 1.4419 max mem: 55573 Test: Total time: 0:00:08 (1.6749 s / it) * Acc@1 72.370 Acc@5 91.456 loss 1.254 Accuracy of the model on the 50000 test images: 72.4% Max accuracy: 72.37% Test: [0/5] eta: 0:00:34 loss: 7.1751 (7.1751) acc1: 0.0000 (0.0000) acc5: 0.1302 (0.1302) time: 6.9113 data: 6.6752 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.2703 (7.3199) acc1: 0.0000 (0.4160) acc5: 1.8868 (1.5040) time: 1.5320 data: 1.3396 max mem: 55573 Test: Total time: 0:00:07 (1.5533 s / it) * Acc@1 0.374 Acc@5 1.156 loss 7.347 Accuracy of the model EMA on 50000 test images: 0.4% Max EMA accuracy: 0.37% Epoch: [106] [ 0/156] eta: 0:30:55 lr: 0.006278 min_lr: 0.006278 loss: 3.6701 (3.6701) weight_decay: 0.0500 (0.0500) time: 11.8971 data: 7.6838 max mem: 55573 Epoch: [106] [ 10/156] eta: 0:03:56 lr: 0.006276 min_lr: 0.006276 loss: 3.6207 (3.4223) weight_decay: 0.0500 (0.0500) time: 1.6227 data: 0.6991 max mem: 55573 Epoch: [106] [ 20/156] eta: 0:02:33 lr: 0.006273 min_lr: 0.006273 loss: 3.3305 (3.2914) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0005 max mem: 55573 Epoch: [106] [ 30/156] eta: 0:02:00 lr: 0.006271 min_lr: 0.006271 loss: 3.3941 (3.3725) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [106] [ 40/156] eta: 0:01:40 lr: 0.006269 min_lr: 0.006269 loss: 3.6420 (3.4124) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [106] [ 50/156] eta: 0:01:26 lr: 0.006266 min_lr: 0.006266 loss: 3.8589 (3.4873) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [106] [ 60/156] eta: 0:01:14 lr: 0.006264 min_lr: 0.006264 loss: 3.7894 (3.4574) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0006 max mem: 55573 Epoch: [106] [ 70/156] eta: 0:01:04 lr: 0.006262 min_lr: 0.006262 loss: 3.3807 (3.4650) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [106] [ 80/156] eta: 0:00:55 lr: 0.006259 min_lr: 0.006259 loss: 3.5251 (3.4659) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0006 max mem: 55573 Epoch: [106] [ 90/156] eta: 0:00:47 lr: 0.006257 min_lr: 0.006257 loss: 3.6513 (3.4702) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0008 max mem: 55573 Epoch: [106] [100/156] eta: 0:00:39 lr: 0.006254 min_lr: 0.006254 loss: 3.6179 (3.4735) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0007 max mem: 55573 Epoch: [106] [110/156] eta: 0:00:31 lr: 0.006252 min_lr: 0.006252 loss: 3.7364 (3.4829) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0006 max mem: 55573 Epoch: [106] [120/156] eta: 0:00:24 lr: 0.006250 min_lr: 0.006250 loss: 3.7838 (3.4741) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0006 max mem: 55573 Epoch: [106] [130/156] eta: 0:00:17 lr: 0.006247 min_lr: 0.006247 loss: 3.6624 (3.4723) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0010 max mem: 55573 Epoch: [106] [140/156] eta: 0:00:10 lr: 0.006245 min_lr: 0.006245 loss: 3.4453 (3.4673) weight_decay: 0.0500 (0.0500) time: 0.5870 data: 0.0009 max mem: 55573 Epoch: [106] [150/156] eta: 0:00:03 lr: 0.006243 min_lr: 0.006243 loss: 3.3972 (3.4726) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0002 max mem: 55573 Epoch: [106] [155/156] eta: 0:00:00 lr: 0.006241 min_lr: 0.006241 loss: 3.3972 (3.4746) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0002 max mem: 55573 Epoch: [106] Total time: 0:01:44 (0.6694 s / it) Averaged stats: lr: 0.006241 min_lr: 0.006241 loss: 3.3972 (3.5046) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1263 (1.1263) acc1: 77.3438 (77.3438) acc5: 94.9219 (94.9219) time: 6.8838 data: 6.6454 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2711 (1.2394) acc1: 72.7865 (72.0320) acc5: 92.8385 (90.8160) time: 1.5234 data: 1.3292 max mem: 55573 Test: Total time: 0:00:07 (1.5619 s / it) * Acc@1 72.238 Acc@5 91.160 loss 1.253 Accuracy of the model on the 50000 test images: 72.2% Max accuracy: 72.37% Test: [0/5] eta: 0:00:36 loss: 7.1005 (7.1005) acc1: 0.1302 (0.1302) acc5: 0.3906 (0.3906) time: 7.3814 data: 7.1455 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.2109 (7.2461) acc1: 0.1302 (0.5120) acc5: 1.9531 (1.7280) time: 1.6216 data: 1.4292 max mem: 55573 Test: Total time: 0:00:08 (1.6406 s / it) * Acc@1 0.430 Acc@5 1.352 loss 7.293 Accuracy of the model EMA on 50000 test images: 0.4% Max EMA accuracy: 0.43% Epoch: [107] [ 0/156] eta: 0:35:21 lr: 0.006241 min_lr: 0.006241 loss: 4.2462 (4.2462) weight_decay: 0.0500 (0.0500) time: 13.5977 data: 12.9786 max mem: 55573 Epoch: [107] [ 10/156] eta: 0:04:18 lr: 0.006239 min_lr: 0.006239 loss: 3.8584 (3.7923) weight_decay: 0.0500 (0.0500) time: 1.7736 data: 1.1804 max mem: 55573 Epoch: [107] [ 20/156] eta: 0:02:44 lr: 0.006236 min_lr: 0.006236 loss: 3.7035 (3.7247) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [107] [ 30/156] eta: 0:02:07 lr: 0.006234 min_lr: 0.006234 loss: 3.5193 (3.5622) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [107] [ 40/156] eta: 0:01:45 lr: 0.006232 min_lr: 0.006232 loss: 3.3467 (3.5187) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [107] [ 50/156] eta: 0:01:29 lr: 0.006229 min_lr: 0.006229 loss: 3.5820 (3.5197) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [107] [ 60/156] eta: 0:01:17 lr: 0.006227 min_lr: 0.006227 loss: 3.6006 (3.5484) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0007 max mem: 55573 Epoch: [107] [ 70/156] eta: 0:01:06 lr: 0.006224 min_lr: 0.006224 loss: 3.7053 (3.5647) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0007 max mem: 55573 Epoch: [107] [ 80/156] eta: 0:00:57 lr: 0.006222 min_lr: 0.006222 loss: 3.7793 (3.6043) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [107] [ 90/156] eta: 0:00:48 lr: 0.006220 min_lr: 0.006220 loss: 3.7074 (3.5915) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [107] [100/156] eta: 0:00:40 lr: 0.006217 min_lr: 0.006217 loss: 3.1775 (3.5450) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [107] [110/156] eta: 0:00:32 lr: 0.006215 min_lr: 0.006215 loss: 3.1775 (3.5355) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [107] [120/156] eta: 0:00:25 lr: 0.006212 min_lr: 0.006212 loss: 3.6572 (3.5183) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [107] [130/156] eta: 0:00:17 lr: 0.006210 min_lr: 0.006210 loss: 3.4951 (3.5164) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0013 max mem: 55573 Epoch: [107] [140/156] eta: 0:00:10 lr: 0.006208 min_lr: 0.006208 loss: 3.7711 (3.5284) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0011 max mem: 55573 Epoch: [107] [150/156] eta: 0:00:04 lr: 0.006205 min_lr: 0.006205 loss: 3.7711 (3.5362) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [107] [155/156] eta: 0:00:00 lr: 0.006204 min_lr: 0.006204 loss: 3.6791 (3.5406) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [107] Total time: 0:01:45 (0.6794 s / it) Averaged stats: lr: 0.006204 min_lr: 0.006204 loss: 3.6791 (3.4990) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.1480 (1.1480) acc1: 74.7396 (74.7396) acc5: 93.8802 (93.8802) time: 7.2441 data: 7.0068 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3458 (1.3813) acc1: 71.3542 (70.4320) acc5: 91.7969 (90.8480) time: 1.5952 data: 1.4015 max mem: 55573 Test: Total time: 0:00:08 (1.6377 s / it) * Acc@1 71.804 Acc@5 91.106 loss 1.358 Accuracy of the model on the 50000 test images: 71.8% Max accuracy: 72.37% Test: [0/5] eta: 0:00:35 loss: 7.0062 (7.0062) acc1: 0.1302 (0.1302) acc5: 0.6510 (0.6510) time: 7.0318 data: 6.7957 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.1391 (7.1587) acc1: 0.5208 (0.6400) acc5: 2.3438 (2.0160) time: 1.5517 data: 1.3592 max mem: 55573 Test: Total time: 0:00:07 (1.5686 s / it) * Acc@1 0.520 Acc@5 1.580 loss 7.228 Accuracy of the model EMA on 50000 test images: 0.5% Max EMA accuracy: 0.52% Epoch: [108] [ 0/156] eta: 0:35:26 lr: 0.006204 min_lr: 0.006204 loss: 4.0922 (4.0922) weight_decay: 0.0500 (0.0500) time: 13.6342 data: 10.0179 max mem: 55573 Epoch: [108] [ 10/156] eta: 0:04:19 lr: 0.006201 min_lr: 0.006201 loss: 3.4284 (3.3607) weight_decay: 0.0500 (0.0500) time: 1.7765 data: 0.9113 max mem: 55573 Epoch: [108] [ 20/156] eta: 0:02:44 lr: 0.006199 min_lr: 0.006199 loss: 3.6271 (3.5210) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [108] [ 30/156] eta: 0:02:07 lr: 0.006197 min_lr: 0.006197 loss: 3.6490 (3.5155) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [108] [ 40/156] eta: 0:01:45 lr: 0.006194 min_lr: 0.006194 loss: 3.5654 (3.4950) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [108] [ 50/156] eta: 0:01:29 lr: 0.006192 min_lr: 0.006192 loss: 3.5654 (3.4919) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [108] [ 60/156] eta: 0:01:17 lr: 0.006189 min_lr: 0.006189 loss: 3.3624 (3.4400) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [108] [ 70/156] eta: 0:01:06 lr: 0.006187 min_lr: 0.006187 loss: 3.6888 (3.4774) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [108] [ 80/156] eta: 0:00:57 lr: 0.006185 min_lr: 0.006185 loss: 3.6888 (3.4607) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [108] [ 90/156] eta: 0:00:48 lr: 0.006182 min_lr: 0.006182 loss: 3.3513 (3.4393) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [108] [100/156] eta: 0:00:40 lr: 0.006180 min_lr: 0.006180 loss: 3.4852 (3.4656) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0006 max mem: 55573 Epoch: [108] [110/156] eta: 0:00:32 lr: 0.006177 min_lr: 0.006177 loss: 3.8160 (3.4780) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0007 max mem: 55573 Epoch: [108] [120/156] eta: 0:00:25 lr: 0.006175 min_lr: 0.006175 loss: 3.5404 (3.4650) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0006 max mem: 55573 Epoch: [108] [130/156] eta: 0:00:17 lr: 0.006173 min_lr: 0.006173 loss: 3.5972 (3.4770) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0011 max mem: 55573 Epoch: [108] [140/156] eta: 0:00:10 lr: 0.006170 min_lr: 0.006170 loss: 3.5960 (3.4646) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0009 max mem: 55573 Epoch: [108] [150/156] eta: 0:00:04 lr: 0.006168 min_lr: 0.006168 loss: 3.4850 (3.4737) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [108] [155/156] eta: 0:00:00 lr: 0.006166 min_lr: 0.006166 loss: 3.6677 (3.4794) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [108] Total time: 0:01:46 (0.6799 s / it) Averaged stats: lr: 0.006166 min_lr: 0.006166 loss: 3.6677 (3.5066) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1228 (1.1228) acc1: 74.3490 (74.3490) acc5: 94.2708 (94.2708) time: 6.8862 data: 6.6483 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1772 (1.2026) acc1: 72.3958 (70.4320) acc5: 93.0990 (90.8800) time: 1.5238 data: 1.3297 max mem: 55573 Test: Total time: 0:00:07 (1.5576 s / it) * Acc@1 71.740 Acc@5 91.084 loss 1.193 Accuracy of the model on the 50000 test images: 71.7% Max accuracy: 72.37% Test: [0/5] eta: 0:00:34 loss: 6.8944 (6.8944) acc1: 0.5208 (0.5208) acc5: 1.3021 (1.3021) time: 6.9439 data: 6.7077 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 7.0528 (7.0558) acc1: 0.7812 (0.8640) acc5: 2.9948 (2.7840) time: 1.5341 data: 1.3417 max mem: 55573 Test: Total time: 0:00:07 (1.5579 s / it) * Acc@1 0.654 Acc@5 1.942 loss 7.150 Accuracy of the model EMA on 50000 test images: 0.7% Max EMA accuracy: 0.65% Epoch: [109] [ 0/156] eta: 0:33:07 lr: 0.006166 min_lr: 0.006166 loss: 3.6316 (3.6316) weight_decay: 0.0500 (0.0500) time: 12.7421 data: 11.8819 max mem: 55573 Epoch: [109] [ 10/156] eta: 0:04:07 lr: 0.006164 min_lr: 0.006164 loss: 3.5888 (3.3142) weight_decay: 0.0500 (0.0500) time: 1.6983 data: 1.0805 max mem: 55573 Epoch: [109] [ 20/156] eta: 0:02:39 lr: 0.006161 min_lr: 0.006161 loss: 3.2037 (3.3024) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0004 max mem: 55573 Epoch: [109] [ 30/156] eta: 0:02:04 lr: 0.006159 min_lr: 0.006159 loss: 3.2037 (3.2925) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [109] [ 40/156] eta: 0:01:43 lr: 0.006157 min_lr: 0.006157 loss: 3.4766 (3.3518) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0004 max mem: 55573 Epoch: [109] [ 50/156] eta: 0:01:28 lr: 0.006154 min_lr: 0.006154 loss: 3.5784 (3.3439) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0004 max mem: 55573 Epoch: [109] [ 60/156] eta: 0:01:16 lr: 0.006152 min_lr: 0.006152 loss: 3.3691 (3.3677) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [109] [ 70/156] eta: 0:01:05 lr: 0.006149 min_lr: 0.006149 loss: 3.6415 (3.3905) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [109] [ 80/156] eta: 0:00:56 lr: 0.006147 min_lr: 0.006147 loss: 3.6714 (3.4153) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [109] [ 90/156] eta: 0:00:47 lr: 0.006144 min_lr: 0.006144 loss: 3.6412 (3.4229) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0005 max mem: 55573 Epoch: [109] [100/156] eta: 0:00:39 lr: 0.006142 min_lr: 0.006142 loss: 3.6624 (3.4422) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0004 max mem: 55573 Epoch: [109] [110/156] eta: 0:00:32 lr: 0.006140 min_lr: 0.006140 loss: 3.6624 (3.4495) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [109] [120/156] eta: 0:00:24 lr: 0.006137 min_lr: 0.006137 loss: 3.6478 (3.4688) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [109] [130/156] eta: 0:00:17 lr: 0.006135 min_lr: 0.006135 loss: 3.7436 (3.4734) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0012 max mem: 55573 Epoch: [109] [140/156] eta: 0:00:10 lr: 0.006132 min_lr: 0.006132 loss: 3.5500 (3.4621) weight_decay: 0.0500 (0.0500) time: 0.5867 data: 0.0010 max mem: 55573 Epoch: [109] [150/156] eta: 0:00:04 lr: 0.006130 min_lr: 0.006130 loss: 3.5620 (3.4744) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [109] [155/156] eta: 0:00:00 lr: 0.006129 min_lr: 0.006129 loss: 3.8341 (3.4858) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [109] Total time: 0:01:45 (0.6746 s / it) Averaged stats: lr: 0.006129 min_lr: 0.006129 loss: 3.8341 (3.4953) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.1619 (1.1619) acc1: 77.0833 (77.0833) acc5: 93.0990 (93.0990) time: 7.3279 data: 7.0905 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2805 (1.2872) acc1: 74.7396 (71.4880) acc5: 93.0990 (90.7520) time: 1.6121 data: 1.4182 max mem: 55573 Test: Total time: 0:00:08 (1.6533 s / it) * Acc@1 72.108 Acc@5 91.126 loss 1.281 Accuracy of the model on the 50000 test images: 72.1% Max accuracy: 72.37% Test: [0/5] eta: 0:00:37 loss: 6.7696 (6.7696) acc1: 0.9115 (0.9115) acc5: 1.8229 (1.8229) time: 7.4638 data: 7.2279 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.9542 (6.9430) acc1: 0.9115 (1.0240) acc5: 4.1667 (3.6160) time: 1.6381 data: 1.4457 max mem: 55573 Test: Total time: 0:00:08 (1.6549 s / it) * Acc@1 0.846 Acc@5 2.420 loss 7.063 Accuracy of the model EMA on 50000 test images: 0.8% Max EMA accuracy: 0.85% Epoch: [110] [ 0/156] eta: 0:29:03 lr: 0.006128 min_lr: 0.006128 loss: 4.1141 (4.1141) weight_decay: 0.0500 (0.0500) time: 11.1752 data: 10.2931 max mem: 55573 Epoch: [110] [ 10/156] eta: 0:04:14 lr: 0.006126 min_lr: 0.006126 loss: 3.5671 (3.4967) weight_decay: 0.0500 (0.0500) time: 1.7449 data: 0.9823 max mem: 55573 Epoch: [110] [ 20/156] eta: 0:02:42 lr: 0.006123 min_lr: 0.006123 loss: 3.4732 (3.4265) weight_decay: 0.0500 (0.0500) time: 0.6966 data: 0.0258 max mem: 55573 Epoch: [110] [ 30/156] eta: 0:02:06 lr: 0.006121 min_lr: 0.006121 loss: 3.1733 (3.3213) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [110] [ 40/156] eta: 0:01:44 lr: 0.006119 min_lr: 0.006119 loss: 3.4837 (3.3875) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [110] [ 50/156] eta: 0:01:29 lr: 0.006116 min_lr: 0.006116 loss: 3.5875 (3.4181) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [110] [ 60/156] eta: 0:01:16 lr: 0.006114 min_lr: 0.006114 loss: 3.5934 (3.4182) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0006 max mem: 55573 Epoch: [110] [ 70/156] eta: 0:01:06 lr: 0.006111 min_lr: 0.006111 loss: 3.3759 (3.4021) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0006 max mem: 55573 Epoch: [110] [ 80/156] eta: 0:00:56 lr: 0.006109 min_lr: 0.006109 loss: 3.3759 (3.4135) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0006 max mem: 55573 Epoch: [110] [ 90/156] eta: 0:00:48 lr: 0.006106 min_lr: 0.006106 loss: 3.6962 (3.4406) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0006 max mem: 55573 Epoch: [110] [100/156] eta: 0:00:40 lr: 0.006104 min_lr: 0.006104 loss: 3.6686 (3.4581) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0006 max mem: 55573 Epoch: [110] [110/156] eta: 0:00:32 lr: 0.006102 min_lr: 0.006102 loss: 3.6090 (3.4512) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [110] [120/156] eta: 0:00:25 lr: 0.006099 min_lr: 0.006099 loss: 3.4941 (3.4425) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [110] [130/156] eta: 0:00:17 lr: 0.006097 min_lr: 0.006097 loss: 3.7215 (3.4629) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0012 max mem: 55573 Epoch: [110] [140/156] eta: 0:00:10 lr: 0.006094 min_lr: 0.006094 loss: 3.6660 (3.4651) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0010 max mem: 55573 Epoch: [110] [150/156] eta: 0:00:04 lr: 0.006092 min_lr: 0.006092 loss: 3.6103 (3.4701) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [110] [155/156] eta: 0:00:00 lr: 0.006090 min_lr: 0.006090 loss: 3.6050 (3.4708) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [110] Total time: 0:01:45 (0.6779 s / it) Averaged stats: lr: 0.006090 min_lr: 0.006090 loss: 3.6050 (3.4875) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0735 (1.0735) acc1: 78.6458 (78.6458) acc5: 95.5729 (95.5729) time: 6.8698 data: 6.6310 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2824 (1.2781) acc1: 72.6562 (71.2640) acc5: 92.9688 (91.3600) time: 1.5209 data: 1.3264 max mem: 55573 Test: Total time: 0:00:07 (1.5683 s / it) * Acc@1 72.306 Acc@5 91.048 loss 1.278 Accuracy of the model on the 50000 test images: 72.3% Max accuracy: 72.37% Test: [0/5] eta: 0:00:36 loss: 6.6196 (6.6196) acc1: 1.5625 (1.5625) acc5: 4.0365 (4.0365) time: 7.3748 data: 7.1388 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.8090 (6.8096) acc1: 1.5625 (1.4400) acc5: 5.2083 (4.8960) time: 1.6203 data: 1.4279 max mem: 55573 Test: Total time: 0:00:08 (1.6368 s / it) * Acc@1 1.060 Acc@5 3.076 loss 6.955 Accuracy of the model EMA on 50000 test images: 1.1% Max EMA accuracy: 1.06% Epoch: [111] [ 0/156] eta: 0:34:35 lr: 0.006090 min_lr: 0.006090 loss: 3.9365 (3.9365) weight_decay: 0.0500 (0.0500) time: 13.3036 data: 12.5295 max mem: 55573 Epoch: [111] [ 10/156] eta: 0:04:19 lr: 0.006088 min_lr: 0.006088 loss: 3.6873 (3.6133) weight_decay: 0.0500 (0.0500) time: 1.7804 data: 1.1395 max mem: 55573 Epoch: [111] [ 20/156] eta: 0:02:45 lr: 0.006085 min_lr: 0.006085 loss: 3.6103 (3.5407) weight_decay: 0.0500 (0.0500) time: 0.6097 data: 0.0005 max mem: 55573 Epoch: [111] [ 30/156] eta: 0:02:07 lr: 0.006083 min_lr: 0.006083 loss: 3.5147 (3.4549) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [111] [ 40/156] eta: 0:01:45 lr: 0.006080 min_lr: 0.006080 loss: 3.5167 (3.4845) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0006 max mem: 55573 Epoch: [111] [ 50/156] eta: 0:01:29 lr: 0.006078 min_lr: 0.006078 loss: 3.5095 (3.4311) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0007 max mem: 55573 Epoch: [111] [ 60/156] eta: 0:01:17 lr: 0.006075 min_lr: 0.006075 loss: 3.5209 (3.4467) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0006 max mem: 55573 Epoch: [111] [ 70/156] eta: 0:01:06 lr: 0.006073 min_lr: 0.006073 loss: 3.5209 (3.4152) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [111] [ 80/156] eta: 0:00:57 lr: 0.006071 min_lr: 0.006071 loss: 3.2755 (3.4176) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [111] [ 90/156] eta: 0:00:48 lr: 0.006068 min_lr: 0.006068 loss: 3.5635 (3.4445) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0006 max mem: 55573 Epoch: [111] [100/156] eta: 0:00:40 lr: 0.006066 min_lr: 0.006066 loss: 3.6631 (3.4598) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [111] [110/156] eta: 0:00:32 lr: 0.006063 min_lr: 0.006063 loss: 3.6171 (3.4735) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [111] [120/156] eta: 0:00:25 lr: 0.006061 min_lr: 0.006061 loss: 3.5703 (3.4717) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [111] [130/156] eta: 0:00:17 lr: 0.006058 min_lr: 0.006058 loss: 3.4727 (3.4641) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0012 max mem: 55573 Epoch: [111] [140/156] eta: 0:00:10 lr: 0.006056 min_lr: 0.006056 loss: 3.3490 (3.4405) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0010 max mem: 55573 Epoch: [111] [150/156] eta: 0:00:04 lr: 0.006053 min_lr: 0.006053 loss: 3.4018 (3.4507) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0002 max mem: 55573 Epoch: [111] [155/156] eta: 0:00:00 lr: 0.006052 min_lr: 0.006052 loss: 3.5128 (3.4470) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0002 max mem: 55573 Epoch: [111] Total time: 0:01:46 (0.6803 s / it) Averaged stats: lr: 0.006052 min_lr: 0.006052 loss: 3.5128 (3.4566) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.1416 (1.1416) acc1: 77.0833 (77.0833) acc5: 94.6615 (94.6615) time: 7.3886 data: 7.1511 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2656 (1.2053) acc1: 70.3125 (71.1680) acc5: 92.9688 (90.8160) time: 1.6242 data: 1.4303 max mem: 55573 Test: Total time: 0:00:08 (1.6634 s / it) * Acc@1 72.588 Acc@5 91.396 loss 1.204 Accuracy of the model on the 50000 test images: 72.6% Max accuracy: 72.59% Test: [0/5] eta: 0:00:35 loss: 6.4590 (6.4590) acc1: 2.4740 (2.4740) acc5: 5.4688 (5.4688) time: 7.0831 data: 6.8471 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.6535 (6.6670) acc1: 2.4740 (2.1760) acc5: 5.9896 (5.8880) time: 1.5619 data: 1.3695 max mem: 55573 Test: Total time: 0:00:07 (1.5797 s / it) * Acc@1 1.412 Acc@5 3.992 loss 6.835 Accuracy of the model EMA on 50000 test images: 1.4% Max EMA accuracy: 1.41% Epoch: [112] [ 0/156] eta: 0:34:18 lr: 0.006052 min_lr: 0.006052 loss: 3.3098 (3.3098) weight_decay: 0.0500 (0.0500) time: 13.1929 data: 9.2715 max mem: 55573 Epoch: [112] [ 10/156] eta: 0:04:18 lr: 0.006049 min_lr: 0.006049 loss: 3.6961 (3.4964) weight_decay: 0.0500 (0.0500) time: 1.7734 data: 0.8443 max mem: 55573 Epoch: [112] [ 20/156] eta: 0:02:44 lr: 0.006047 min_lr: 0.006047 loss: 3.7265 (3.5204) weight_decay: 0.0500 (0.0500) time: 0.6114 data: 0.0011 max mem: 55573 Epoch: [112] [ 30/156] eta: 0:02:07 lr: 0.006044 min_lr: 0.006044 loss: 3.5109 (3.4044) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0007 max mem: 55573 Epoch: [112] [ 40/156] eta: 0:01:45 lr: 0.006042 min_lr: 0.006042 loss: 3.4426 (3.4437) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0007 max mem: 55573 Epoch: [112] [ 50/156] eta: 0:01:29 lr: 0.006039 min_lr: 0.006039 loss: 3.7088 (3.4977) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [112] [ 60/156] eta: 0:01:17 lr: 0.006037 min_lr: 0.006037 loss: 3.7347 (3.4544) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [112] [ 70/156] eta: 0:01:06 lr: 0.006035 min_lr: 0.006035 loss: 3.7347 (3.4846) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0006 max mem: 55573 Epoch: [112] [ 80/156] eta: 0:00:57 lr: 0.006032 min_lr: 0.006032 loss: 3.7532 (3.4582) weight_decay: 0.0500 (0.0500) time: 0.5948 data: 0.0006 max mem: 55573 Epoch: [112] [ 90/156] eta: 0:00:48 lr: 0.006030 min_lr: 0.006030 loss: 3.2924 (3.4403) weight_decay: 0.0500 (0.0500) time: 0.5949 data: 0.0006 max mem: 55573 Epoch: [112] [100/156] eta: 0:00:40 lr: 0.006027 min_lr: 0.006027 loss: 3.6791 (3.4679) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0008 max mem: 55573 Epoch: [112] [110/156] eta: 0:00:32 lr: 0.006025 min_lr: 0.006025 loss: 3.7430 (3.4695) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0008 max mem: 55573 Epoch: [112] [120/156] eta: 0:00:25 lr: 0.006022 min_lr: 0.006022 loss: 3.7236 (3.4811) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0008 max mem: 55573 Epoch: [112] [130/156] eta: 0:00:17 lr: 0.006020 min_lr: 0.006020 loss: 3.6255 (3.4850) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0015 max mem: 55573 Epoch: [112] [140/156] eta: 0:00:10 lr: 0.006017 min_lr: 0.006017 loss: 3.5440 (3.4912) weight_decay: 0.0500 (0.0500) time: 0.5856 data: 0.0012 max mem: 55573 Epoch: [112] [150/156] eta: 0:00:04 lr: 0.006015 min_lr: 0.006015 loss: 3.7245 (3.4886) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0002 max mem: 55573 Epoch: [112] [155/156] eta: 0:00:00 lr: 0.006013 min_lr: 0.006013 loss: 3.7245 (3.4874) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [112] Total time: 0:01:46 (0.6805 s / it) Averaged stats: lr: 0.006013 min_lr: 0.006013 loss: 3.7245 (3.4581) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1072 (1.1072) acc1: 77.6042 (77.6042) acc5: 93.8802 (93.8802) time: 6.8426 data: 6.6055 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2111 (1.2341) acc1: 74.0885 (72.2560) acc5: 93.4896 (91.2960) time: 1.5150 data: 1.3212 max mem: 55573 Test: Total time: 0:00:07 (1.5486 s / it) * Acc@1 72.444 Acc@5 91.470 loss 1.213 Accuracy of the model on the 50000 test images: 72.4% Max accuracy: 72.59% Test: [0/5] eta: 0:00:36 loss: 6.2673 (6.2673) acc1: 2.8646 (2.8646) acc5: 7.5521 (7.5521) time: 7.2804 data: 7.0439 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.4818 (6.4994) acc1: 2.8646 (2.6880) acc5: 7.5521 (7.6160) time: 1.6015 data: 1.4089 max mem: 55573 Test: Total time: 0:00:08 (1.6199 s / it) * Acc@1 1.952 Acc@5 5.272 loss 6.691 Accuracy of the model EMA on 50000 test images: 2.0% Max EMA accuracy: 1.95% Epoch: [113] [ 0/156] eta: 0:31:26 lr: 0.006013 min_lr: 0.006013 loss: 2.2979 (2.2979) weight_decay: 0.0500 (0.0500) time: 12.0912 data: 11.4866 max mem: 55573 Epoch: [113] [ 10/156] eta: 0:04:01 lr: 0.006011 min_lr: 0.006011 loss: 3.6502 (3.5055) weight_decay: 0.0500 (0.0500) time: 1.6519 data: 1.0447 max mem: 55573 Epoch: [113] [ 20/156] eta: 0:02:36 lr: 0.006008 min_lr: 0.006008 loss: 3.5825 (3.4278) weight_decay: 0.0500 (0.0500) time: 0.6006 data: 0.0005 max mem: 55573 Epoch: [113] [ 30/156] eta: 0:02:01 lr: 0.006006 min_lr: 0.006006 loss: 3.5825 (3.4788) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [113] [ 40/156] eta: 0:01:41 lr: 0.006003 min_lr: 0.006003 loss: 3.6148 (3.4538) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [113] [ 50/156] eta: 0:01:26 lr: 0.006001 min_lr: 0.006001 loss: 3.6717 (3.4733) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0006 max mem: 55573 Epoch: [113] [ 60/156] eta: 0:01:15 lr: 0.005998 min_lr: 0.005998 loss: 3.3437 (3.4025) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [113] [ 70/156] eta: 0:01:04 lr: 0.005996 min_lr: 0.005996 loss: 2.9886 (3.3775) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0006 max mem: 55573 Epoch: [113] [ 80/156] eta: 0:00:55 lr: 0.005993 min_lr: 0.005993 loss: 3.5825 (3.4123) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.0007 max mem: 55573 Epoch: [113] [ 90/156] eta: 0:00:47 lr: 0.005991 min_lr: 0.005991 loss: 3.7266 (3.4218) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0006 max mem: 55573 Epoch: [113] [100/156] eta: 0:00:39 lr: 0.005988 min_lr: 0.005988 loss: 3.3774 (3.4036) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [113] [110/156] eta: 0:00:32 lr: 0.005986 min_lr: 0.005986 loss: 3.4022 (3.4219) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [113] [120/156] eta: 0:00:24 lr: 0.005983 min_lr: 0.005983 loss: 3.6713 (3.4291) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [113] [130/156] eta: 0:00:17 lr: 0.005981 min_lr: 0.005981 loss: 3.7001 (3.4408) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0011 max mem: 55573 Epoch: [113] [140/156] eta: 0:00:10 lr: 0.005978 min_lr: 0.005978 loss: 3.7206 (3.4420) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [113] [150/156] eta: 0:00:04 lr: 0.005976 min_lr: 0.005976 loss: 3.6037 (3.4411) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [113] [155/156] eta: 0:00:00 lr: 0.005975 min_lr: 0.005975 loss: 3.5262 (3.4378) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [113] Total time: 0:01:44 (0.6710 s / it) Averaged stats: lr: 0.005975 min_lr: 0.005975 loss: 3.5262 (3.4836) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.2023 (1.2023) acc1: 75.1302 (75.1302) acc5: 93.3594 (93.3594) time: 7.2427 data: 7.0052 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2295 (1.2555) acc1: 74.2188 (71.4240) acc5: 93.3594 (91.2320) time: 1.5949 data: 1.4011 max mem: 55573 Test: Total time: 0:00:08 (1.6364 s / it) * Acc@1 72.876 Acc@5 91.482 loss 1.251 Accuracy of the model on the 50000 test images: 72.9% Max accuracy: 72.88% Test: [0/5] eta: 0:00:37 loss: 6.0626 (6.0626) acc1: 4.5573 (4.5573) acc5: 10.5469 (10.5469) time: 7.4240 data: 7.1874 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.2947 (6.3173) acc1: 4.5573 (3.7440) acc5: 9.6354 (9.5680) time: 1.6304 data: 1.4376 max mem: 55573 Test: Total time: 0:00:08 (1.6513 s / it) * Acc@1 2.638 Acc@5 6.918 loss 6.529 Accuracy of the model EMA on 50000 test images: 2.6% Max EMA accuracy: 2.64% Epoch: [114] [ 0/156] eta: 0:28:35 lr: 0.005974 min_lr: 0.005974 loss: 2.5949 (2.5949) weight_decay: 0.0500 (0.0500) time: 10.9942 data: 9.9900 max mem: 55573 Epoch: [114] [ 10/156] eta: 0:04:08 lr: 0.005972 min_lr: 0.005972 loss: 3.6422 (3.3772) weight_decay: 0.0500 (0.0500) time: 1.7050 data: 1.0028 max mem: 55573 Epoch: [114] [ 20/156] eta: 0:02:39 lr: 0.005969 min_lr: 0.005969 loss: 3.6408 (3.4143) weight_decay: 0.0500 (0.0500) time: 0.6828 data: 0.0524 max mem: 55573 Epoch: [114] [ 30/156] eta: 0:02:04 lr: 0.005967 min_lr: 0.005967 loss: 3.6408 (3.4465) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0006 max mem: 55573 Epoch: [114] [ 40/156] eta: 0:01:43 lr: 0.005964 min_lr: 0.005964 loss: 3.4828 (3.3951) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [114] [ 50/156] eta: 0:01:28 lr: 0.005962 min_lr: 0.005962 loss: 3.2947 (3.4008) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0006 max mem: 55573 Epoch: [114] [ 60/156] eta: 0:01:15 lr: 0.005959 min_lr: 0.005959 loss: 3.5746 (3.4268) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0006 max mem: 55573 Epoch: [114] [ 70/156] eta: 0:01:05 lr: 0.005957 min_lr: 0.005957 loss: 3.4879 (3.4088) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0006 max mem: 55573 Epoch: [114] [ 80/156] eta: 0:00:56 lr: 0.005954 min_lr: 0.005954 loss: 3.4533 (3.4154) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [114] [ 90/156] eta: 0:00:47 lr: 0.005952 min_lr: 0.005952 loss: 3.6091 (3.4331) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [114] [100/156] eta: 0:00:39 lr: 0.005949 min_lr: 0.005949 loss: 3.5165 (3.4166) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0008 max mem: 55573 Epoch: [114] [110/156] eta: 0:00:32 lr: 0.005947 min_lr: 0.005947 loss: 3.5072 (3.4266) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0008 max mem: 55573 Epoch: [114] [120/156] eta: 0:00:24 lr: 0.005944 min_lr: 0.005944 loss: 3.7442 (3.4343) weight_decay: 0.0500 (0.0500) time: 0.5978 data: 0.0005 max mem: 55573 Epoch: [114] [130/156] eta: 0:00:17 lr: 0.005942 min_lr: 0.005942 loss: 3.6882 (3.4489) weight_decay: 0.0500 (0.0500) time: 0.5990 data: 0.0007 max mem: 55573 Epoch: [114] [140/156] eta: 0:00:10 lr: 0.005939 min_lr: 0.005939 loss: 3.6882 (3.4560) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0005 max mem: 55573 Epoch: [114] [150/156] eta: 0:00:04 lr: 0.005937 min_lr: 0.005937 loss: 3.3799 (3.4439) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [114] [155/156] eta: 0:00:00 lr: 0.005935 min_lr: 0.005935 loss: 3.4993 (3.4473) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [114] Total time: 0:01:45 (0.6758 s / it) Averaged stats: lr: 0.005935 min_lr: 0.005935 loss: 3.4993 (3.4889) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.1396 (1.1396) acc1: 76.8229 (76.8229) acc5: 93.6198 (93.6198) time: 6.7472 data: 6.5101 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2559 (1.2358) acc1: 71.7448 (71.0400) acc5: 92.4479 (90.7840) time: 1.4958 data: 1.3021 max mem: 55573 Test: Total time: 0:00:07 (1.5296 s / it) * Acc@1 71.808 Acc@5 91.054 loss 1.228 Accuracy of the model on the 50000 test images: 71.8% Max accuracy: 72.88% Test: [0/5] eta: 0:00:33 loss: 5.8458 (5.8458) acc1: 5.7292 (5.7292) acc5: 13.4115 (13.4115) time: 6.7487 data: 6.5126 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 6.1053 (6.1232) acc1: 5.4688 (4.5760) acc5: 10.9375 (11.6800) time: 1.4951 data: 1.3026 max mem: 55573 Test: Total time: 0:00:07 (1.5368 s / it) * Acc@1 3.462 Acc@5 8.872 loss 6.352 Accuracy of the model EMA on 50000 test images: 3.5% Max EMA accuracy: 3.46% Epoch: [115] [ 0/156] eta: 0:34:33 lr: 0.005935 min_lr: 0.005935 loss: 3.4633 (3.4633) weight_decay: 0.0500 (0.0500) time: 13.2915 data: 9.7036 max mem: 55573 Epoch: [115] [ 10/156] eta: 0:04:20 lr: 0.005933 min_lr: 0.005933 loss: 3.3890 (3.3916) weight_decay: 0.0500 (0.0500) time: 1.7847 data: 0.8832 max mem: 55573 Epoch: [115] [ 20/156] eta: 0:02:45 lr: 0.005930 min_lr: 0.005930 loss: 3.3750 (3.3970) weight_decay: 0.0500 (0.0500) time: 0.6113 data: 0.0009 max mem: 55573 Epoch: [115] [ 30/156] eta: 0:02:07 lr: 0.005928 min_lr: 0.005928 loss: 3.6170 (3.4642) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0006 max mem: 55573 Epoch: [115] [ 40/156] eta: 0:01:45 lr: 0.005925 min_lr: 0.005925 loss: 3.6322 (3.4074) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [115] [ 50/156] eta: 0:01:29 lr: 0.005923 min_lr: 0.005923 loss: 3.4098 (3.3790) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [115] [ 60/156] eta: 0:01:17 lr: 0.005920 min_lr: 0.005920 loss: 3.4502 (3.4062) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0006 max mem: 55573 Epoch: [115] [ 70/156] eta: 0:01:06 lr: 0.005917 min_lr: 0.005917 loss: 3.6190 (3.4230) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0006 max mem: 55573 Epoch: [115] [ 80/156] eta: 0:00:57 lr: 0.005915 min_lr: 0.005915 loss: 3.7152 (3.4450) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0006 max mem: 55573 Epoch: [115] [ 90/156] eta: 0:00:48 lr: 0.005912 min_lr: 0.005912 loss: 3.5177 (3.4312) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0006 max mem: 55573 Epoch: [115] [100/156] eta: 0:00:40 lr: 0.005910 min_lr: 0.005910 loss: 3.4324 (3.4448) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [115] [110/156] eta: 0:00:32 lr: 0.005907 min_lr: 0.005907 loss: 3.5830 (3.4361) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [115] [120/156] eta: 0:00:25 lr: 0.005905 min_lr: 0.005905 loss: 3.5830 (3.4336) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [115] [130/156] eta: 0:00:17 lr: 0.005902 min_lr: 0.005902 loss: 3.6624 (3.4335) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0011 max mem: 55573 Epoch: [115] [140/156] eta: 0:00:10 lr: 0.005900 min_lr: 0.005900 loss: 3.7332 (3.4505) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0010 max mem: 55573 Epoch: [115] [150/156] eta: 0:00:04 lr: 0.005897 min_lr: 0.005897 loss: 3.7391 (3.4551) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [115] [155/156] eta: 0:00:00 lr: 0.005896 min_lr: 0.005896 loss: 3.8033 (3.4695) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [115] Total time: 0:01:46 (0.6799 s / it) Averaged stats: lr: 0.005896 min_lr: 0.005896 loss: 3.8033 (3.4751) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.4149 (1.4149) acc1: 76.4323 (76.4323) acc5: 93.2292 (93.2292) time: 7.1561 data: 6.9185 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5264 (1.5459) acc1: 72.3958 (71.2640) acc5: 92.9688 (90.7840) time: 1.5779 data: 1.3838 max mem: 55573 Test: Total time: 0:00:08 (1.6333 s / it) * Acc@1 71.914 Acc@5 90.842 loss 1.538 Accuracy of the model on the 50000 test images: 71.9% Max accuracy: 72.88% Test: [0/5] eta: 0:00:36 loss: 5.6213 (5.6213) acc1: 7.2917 (7.2917) acc5: 17.3177 (17.3177) time: 7.2774 data: 7.0408 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 5.9243 (5.9152) acc1: 6.2500 (5.7600) acc5: 13.2812 (14.8800) time: 1.6009 data: 1.4083 max mem: 55573 Test: Total time: 0:00:08 (1.6209 s / it) * Acc@1 4.424 Acc@5 11.102 loss 6.159 Accuracy of the model EMA on 50000 test images: 4.4% Max EMA accuracy: 4.42% Epoch: [116] [ 0/156] eta: 0:31:47 lr: 0.005896 min_lr: 0.005896 loss: 3.8366 (3.8366) weight_decay: 0.0500 (0.0500) time: 12.2257 data: 10.4274 max mem: 55573 Epoch: [116] [ 10/156] eta: 0:04:06 lr: 0.005893 min_lr: 0.005893 loss: 3.6851 (3.5631) weight_decay: 0.0500 (0.0500) time: 1.6897 data: 0.9485 max mem: 55573 Epoch: [116] [ 20/156] eta: 0:02:38 lr: 0.005891 min_lr: 0.005891 loss: 3.6422 (3.5101) weight_decay: 0.0500 (0.0500) time: 0.6133 data: 0.0006 max mem: 55573 Epoch: [116] [ 30/156] eta: 0:02:03 lr: 0.005888 min_lr: 0.005888 loss: 3.5315 (3.5040) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0006 max mem: 55573 Epoch: [116] [ 40/156] eta: 0:01:42 lr: 0.005886 min_lr: 0.005886 loss: 3.4726 (3.4787) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0006 max mem: 55573 Epoch: [116] [ 50/156] eta: 0:01:27 lr: 0.005883 min_lr: 0.005883 loss: 3.5270 (3.4816) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0007 max mem: 55573 Epoch: [116] [ 60/156] eta: 0:01:15 lr: 0.005881 min_lr: 0.005881 loss: 3.5478 (3.4847) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0007 max mem: 55573 Epoch: [116] [ 70/156] eta: 0:01:05 lr: 0.005878 min_lr: 0.005878 loss: 3.5791 (3.4717) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [116] [ 80/156] eta: 0:00:56 lr: 0.005875 min_lr: 0.005875 loss: 3.5916 (3.4937) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [116] [ 90/156] eta: 0:00:47 lr: 0.005873 min_lr: 0.005873 loss: 3.5916 (3.4880) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [116] [100/156] eta: 0:00:39 lr: 0.005870 min_lr: 0.005870 loss: 3.6523 (3.4982) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [116] [110/156] eta: 0:00:32 lr: 0.005868 min_lr: 0.005868 loss: 3.6712 (3.5011) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [116] [120/156] eta: 0:00:24 lr: 0.005865 min_lr: 0.005865 loss: 3.5666 (3.5183) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [116] [130/156] eta: 0:00:17 lr: 0.005863 min_lr: 0.005863 loss: 3.6771 (3.5209) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0012 max mem: 55573 Epoch: [116] [140/156] eta: 0:00:10 lr: 0.005860 min_lr: 0.005860 loss: 3.7148 (3.5240) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0010 max mem: 55573 Epoch: [116] [150/156] eta: 0:00:04 lr: 0.005858 min_lr: 0.005858 loss: 3.7556 (3.5275) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [116] [155/156] eta: 0:00:00 lr: 0.005856 min_lr: 0.005856 loss: 3.7556 (3.5266) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [116] Total time: 0:01:45 (0.6738 s / it) Averaged stats: lr: 0.005856 min_lr: 0.005856 loss: 3.7556 (3.4670) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.3068 (1.3068) acc1: 76.3021 (76.3021) acc5: 93.7500 (93.7500) time: 6.9671 data: 6.7296 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3068 (1.3952) acc1: 74.8698 (71.5840) acc5: 93.7500 (90.8160) time: 1.5459 data: 1.3532 max mem: 55573 Test: Total time: 0:00:07 (1.5869 s / it) * Acc@1 71.756 Acc@5 90.912 loss 1.394 Accuracy of the model on the 50000 test images: 71.8% Max accuracy: 72.88% Test: [0/5] eta: 0:00:35 loss: 5.3873 (5.3873) acc1: 8.5938 (8.5938) acc5: 20.5729 (20.5729) time: 7.1362 data: 6.9001 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 5.7017 (5.6963) acc1: 7.2917 (7.0400) acc5: 18.8679 (17.9200) time: 1.5726 data: 1.3801 max mem: 55573 Test: Total time: 0:00:07 (1.5918 s / it) * Acc@1 5.548 Acc@5 13.826 loss 5.951 Accuracy of the model EMA on 50000 test images: 5.5% Max EMA accuracy: 5.55% Epoch: [117] [ 0/156] eta: 0:31:12 lr: 0.005856 min_lr: 0.005856 loss: 4.0609 (4.0609) weight_decay: 0.0500 (0.0500) time: 12.0010 data: 9.3068 max mem: 55573 Epoch: [117] [ 10/156] eta: 0:04:07 lr: 0.005854 min_lr: 0.005854 loss: 3.9304 (3.8105) weight_decay: 0.0500 (0.0500) time: 1.6921 data: 0.8667 max mem: 55573 Epoch: [117] [ 20/156] eta: 0:02:38 lr: 0.005851 min_lr: 0.005851 loss: 3.7401 (3.7495) weight_decay: 0.0500 (0.0500) time: 0.6264 data: 0.0116 max mem: 55573 Epoch: [117] [ 30/156] eta: 0:02:03 lr: 0.005848 min_lr: 0.005848 loss: 3.6627 (3.6798) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [117] [ 40/156] eta: 0:01:42 lr: 0.005846 min_lr: 0.005846 loss: 3.5729 (3.6017) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0006 max mem: 55573 Epoch: [117] [ 50/156] eta: 0:01:27 lr: 0.005843 min_lr: 0.005843 loss: 3.5366 (3.5981) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0008 max mem: 55573 Epoch: [117] [ 60/156] eta: 0:01:15 lr: 0.005841 min_lr: 0.005841 loss: 3.4655 (3.5835) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0007 max mem: 55573 Epoch: [117] [ 70/156] eta: 0:01:05 lr: 0.005838 min_lr: 0.005838 loss: 3.5656 (3.5803) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0006 max mem: 55573 Epoch: [117] [ 80/156] eta: 0:00:56 lr: 0.005836 min_lr: 0.005836 loss: 3.6944 (3.5607) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0006 max mem: 55573 Epoch: [117] [ 90/156] eta: 0:00:47 lr: 0.005833 min_lr: 0.005833 loss: 3.4821 (3.5462) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0007 max mem: 55573 Epoch: [117] [100/156] eta: 0:00:39 lr: 0.005831 min_lr: 0.005831 loss: 3.3279 (3.5266) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0008 max mem: 55573 Epoch: [117] [110/156] eta: 0:00:32 lr: 0.005828 min_lr: 0.005828 loss: 3.3661 (3.5075) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0006 max mem: 55573 Epoch: [117] [120/156] eta: 0:00:24 lr: 0.005825 min_lr: 0.005825 loss: 3.4725 (3.5033) weight_decay: 0.0500 (0.0500) time: 0.5942 data: 0.0006 max mem: 55573 Epoch: [117] [130/156] eta: 0:00:17 lr: 0.005823 min_lr: 0.005823 loss: 3.6039 (3.5038) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0011 max mem: 55573 Epoch: [117] [140/156] eta: 0:00:10 lr: 0.005820 min_lr: 0.005820 loss: 3.7482 (3.5227) weight_decay: 0.0500 (0.0500) time: 0.5853 data: 0.0009 max mem: 55573 Epoch: [117] [150/156] eta: 0:00:04 lr: 0.005818 min_lr: 0.005818 loss: 3.7765 (3.5178) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0002 max mem: 55573 Epoch: [117] [155/156] eta: 0:00:00 lr: 0.005816 min_lr: 0.005816 loss: 3.5927 (3.5163) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0002 max mem: 55573 Epoch: [117] Total time: 0:01:45 (0.6741 s / it) Averaged stats: lr: 0.005816 min_lr: 0.005816 loss: 3.5927 (3.4763) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1227 (1.1227) acc1: 77.3438 (77.3438) acc5: 94.6615 (94.6615) time: 7.1453 data: 6.9079 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1842 (1.2090) acc1: 75.5208 (72.0640) acc5: 93.8802 (91.9040) time: 1.5756 data: 1.3817 max mem: 55573 Test: Total time: 0:00:08 (1.6225 s / it) * Acc@1 72.886 Acc@5 91.718 loss 1.211 Accuracy of the model on the 50000 test images: 72.9% Max accuracy: 72.89% Test: [0/5] eta: 0:00:36 loss: 5.1627 (5.1627) acc1: 9.7656 (9.7656) acc5: 24.3490 (24.3490) time: 7.2261 data: 6.9894 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 5.4713 (5.4811) acc1: 8.4635 (8.5760) acc5: 21.2240 (20.8000) time: 1.5908 data: 1.3980 max mem: 55573 Test: Total time: 0:00:08 (1.6106 s / it) * Acc@1 6.882 Acc@5 16.620 loss 5.742 Accuracy of the model EMA on 50000 test images: 6.9% Max EMA accuracy: 6.88% Epoch: [118] [ 0/156] eta: 0:35:16 lr: 0.005816 min_lr: 0.005816 loss: 3.3487 (3.3487) weight_decay: 0.0500 (0.0500) time: 13.5666 data: 8.7588 max mem: 55573 Epoch: [118] [ 10/156] eta: 0:04:21 lr: 0.005814 min_lr: 0.005814 loss: 3.8120 (3.5675) weight_decay: 0.0500 (0.0500) time: 1.7879 data: 0.7969 max mem: 55573 Epoch: [118] [ 20/156] eta: 0:02:45 lr: 0.005811 min_lr: 0.005811 loss: 3.4638 (3.4330) weight_decay: 0.0500 (0.0500) time: 0.6011 data: 0.0006 max mem: 55573 Epoch: [118] [ 30/156] eta: 0:02:08 lr: 0.005809 min_lr: 0.005809 loss: 3.4683 (3.4348) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0006 max mem: 55573 Epoch: [118] [ 40/156] eta: 0:01:45 lr: 0.005806 min_lr: 0.005806 loss: 3.5423 (3.4200) weight_decay: 0.0500 (0.0500) time: 0.5932 data: 0.0006 max mem: 55573 Epoch: [118] [ 50/156] eta: 0:01:30 lr: 0.005803 min_lr: 0.005803 loss: 3.1876 (3.3929) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0006 max mem: 55573 Epoch: [118] [ 60/156] eta: 0:01:17 lr: 0.005801 min_lr: 0.005801 loss: 3.2180 (3.3744) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0007 max mem: 55573 Epoch: [118] [ 70/156] eta: 0:01:06 lr: 0.005798 min_lr: 0.005798 loss: 3.5711 (3.4348) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0006 max mem: 55573 Epoch: [118] [ 80/156] eta: 0:00:57 lr: 0.005796 min_lr: 0.005796 loss: 3.7200 (3.4329) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0006 max mem: 55573 Epoch: [118] [ 90/156] eta: 0:00:48 lr: 0.005793 min_lr: 0.005793 loss: 3.4381 (3.4498) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0007 max mem: 55573 Epoch: [118] [100/156] eta: 0:00:40 lr: 0.005791 min_lr: 0.005791 loss: 3.6345 (3.4559) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0008 max mem: 55573 Epoch: [118] [110/156] eta: 0:00:32 lr: 0.005788 min_lr: 0.005788 loss: 3.6403 (3.4773) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0009 max mem: 55573 Epoch: [118] [120/156] eta: 0:00:25 lr: 0.005785 min_lr: 0.005785 loss: 3.6222 (3.4797) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0008 max mem: 55573 Epoch: [118] [130/156] eta: 0:00:17 lr: 0.005783 min_lr: 0.005783 loss: 3.2361 (3.4619) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0012 max mem: 55573 Epoch: [118] [140/156] eta: 0:00:10 lr: 0.005780 min_lr: 0.005780 loss: 3.0885 (3.4470) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0009 max mem: 55573 Epoch: [118] [150/156] eta: 0:00:04 lr: 0.005778 min_lr: 0.005778 loss: 3.5492 (3.4633) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0002 max mem: 55573 Epoch: [118] [155/156] eta: 0:00:00 lr: 0.005776 min_lr: 0.005776 loss: 3.5492 (3.4555) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0002 max mem: 55573 Epoch: [118] Total time: 0:01:46 (0.6813 s / it) Averaged stats: lr: 0.005776 min_lr: 0.005776 loss: 3.5492 (3.4702) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1607 (1.1607) acc1: 77.7344 (77.7344) acc5: 94.4010 (94.4010) time: 7.1258 data: 6.8884 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2664 (1.2999) acc1: 74.3490 (71.5520) acc5: 93.2292 (91.5520) time: 1.5715 data: 1.3777 max mem: 55573 Test: Total time: 0:00:08 (1.6270 s / it) * Acc@1 72.422 Acc@5 91.570 loss 1.280 Accuracy of the model on the 50000 test images: 72.4% Max accuracy: 72.89% Test: [0/5] eta: 0:00:35 loss: 4.9356 (4.9356) acc1: 12.2396 (12.2396) acc5: 27.7344 (27.7344) time: 7.0864 data: 6.8504 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 5.2310 (5.2598) acc1: 10.6771 (10.7200) acc5: 22.7865 (23.9040) time: 1.5626 data: 1.3702 max mem: 55573 Test: Total time: 0:00:07 (1.5796 s / it) * Acc@1 8.414 Acc@5 19.788 loss 5.526 Accuracy of the model EMA on 50000 test images: 8.4% Max EMA accuracy: 8.41% Epoch: [119] [ 0/156] eta: 0:26:24 lr: 0.005776 min_lr: 0.005776 loss: 2.5564 (2.5564) weight_decay: 0.0500 (0.0500) time: 10.1575 data: 8.4803 max mem: 55573 Epoch: [119] [ 10/156] eta: 0:04:17 lr: 0.005774 min_lr: 0.005774 loss: 3.3588 (3.3302) weight_decay: 0.0500 (0.0500) time: 1.7608 data: 0.8922 max mem: 55573 Epoch: [119] [ 20/156] eta: 0:02:43 lr: 0.005771 min_lr: 0.005771 loss: 3.3761 (3.3811) weight_decay: 0.0500 (0.0500) time: 0.7556 data: 0.0670 max mem: 55573 Epoch: [119] [ 30/156] eta: 0:02:06 lr: 0.005768 min_lr: 0.005768 loss: 3.3761 (3.3739) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0007 max mem: 55573 Epoch: [119] [ 40/156] eta: 0:01:44 lr: 0.005766 min_lr: 0.005766 loss: 3.5837 (3.3916) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0006 max mem: 55573 Epoch: [119] [ 50/156] eta: 0:01:29 lr: 0.005763 min_lr: 0.005763 loss: 3.6158 (3.4063) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [119] [ 60/156] eta: 0:01:16 lr: 0.005761 min_lr: 0.005761 loss: 3.4906 (3.3722) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [119] [ 70/156] eta: 0:01:06 lr: 0.005758 min_lr: 0.005758 loss: 3.1343 (3.3513) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0005 max mem: 55573 Epoch: [119] [ 80/156] eta: 0:00:56 lr: 0.005755 min_lr: 0.005755 loss: 3.5055 (3.3919) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [119] [ 90/156] eta: 0:00:48 lr: 0.005753 min_lr: 0.005753 loss: 3.6279 (3.3948) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0006 max mem: 55573 Epoch: [119] [100/156] eta: 0:00:40 lr: 0.005750 min_lr: 0.005750 loss: 3.4787 (3.3999) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0006 max mem: 55573 Epoch: [119] [110/156] eta: 0:00:32 lr: 0.005748 min_lr: 0.005748 loss: 3.4381 (3.3957) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0007 max mem: 55573 Epoch: [119] [120/156] eta: 0:00:25 lr: 0.005745 min_lr: 0.005745 loss: 3.5099 (3.4019) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0007 max mem: 55573 Epoch: [119] [130/156] eta: 0:00:17 lr: 0.005743 min_lr: 0.005743 loss: 3.7229 (3.4277) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0013 max mem: 55573 Epoch: [119] [140/156] eta: 0:00:10 lr: 0.005740 min_lr: 0.005740 loss: 3.6348 (3.4220) weight_decay: 0.0500 (0.0500) time: 0.5868 data: 0.0010 max mem: 55573 Epoch: [119] [150/156] eta: 0:00:04 lr: 0.005737 min_lr: 0.005737 loss: 3.2959 (3.4121) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0002 max mem: 55573 Epoch: [119] [155/156] eta: 0:00:00 lr: 0.005736 min_lr: 0.005736 loss: 3.2343 (3.4090) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [119] Total time: 0:01:45 (0.6793 s / it) Averaged stats: lr: 0.005736 min_lr: 0.005736 loss: 3.2343 (3.4517) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.2107 (1.2107) acc1: 75.3906 (75.3906) acc5: 93.7500 (93.7500) time: 6.7812 data: 6.5442 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3694 (1.3917) acc1: 73.0469 (70.7840) acc5: 92.8385 (90.4000) time: 1.5028 data: 1.3089 max mem: 55573 Test: Total time: 0:00:07 (1.5453 s / it) * Acc@1 71.784 Acc@5 90.854 loss 1.368 Accuracy of the model on the 50000 test images: 71.8% Max accuracy: 72.89% Test: [0/5] eta: 0:00:33 loss: 4.7147 (4.7147) acc1: 14.4531 (14.4531) acc5: 30.3385 (30.3385) time: 6.7306 data: 6.4946 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 4.9976 (5.0382) acc1: 12.7604 (12.7680) acc5: 26.8229 (27.2320) time: 1.5699 data: 1.3775 max mem: 55573 Test: Total time: 0:00:07 (1.5933 s / it) * Acc@1 10.164 Acc@5 23.016 loss 5.306 Accuracy of the model EMA on 50000 test images: 10.2% Max EMA accuracy: 10.16% Epoch: [120] [ 0/156] eta: 0:35:29 lr: 0.005736 min_lr: 0.005736 loss: 3.4372 (3.4372) weight_decay: 0.0500 (0.0500) time: 13.6490 data: 9.9843 max mem: 55573 Epoch: [120] [ 10/156] eta: 0:04:22 lr: 0.005733 min_lr: 0.005733 loss: 3.4372 (3.4656) weight_decay: 0.0500 (0.0500) time: 1.8003 data: 0.9082 max mem: 55573 Epoch: [120] [ 20/156] eta: 0:02:46 lr: 0.005731 min_lr: 0.005731 loss: 3.3260 (3.3381) weight_decay: 0.0500 (0.0500) time: 0.6019 data: 0.0005 max mem: 55573 Epoch: [120] [ 30/156] eta: 0:02:08 lr: 0.005728 min_lr: 0.005728 loss: 3.4157 (3.3578) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [120] [ 40/156] eta: 0:01:46 lr: 0.005725 min_lr: 0.005725 loss: 3.3341 (3.3406) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.0006 max mem: 55573 Epoch: [120] [ 50/156] eta: 0:01:30 lr: 0.005723 min_lr: 0.005723 loss: 3.4564 (3.3787) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [120] [ 60/156] eta: 0:01:17 lr: 0.005720 min_lr: 0.005720 loss: 3.6444 (3.4055) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [120] [ 70/156] eta: 0:01:06 lr: 0.005718 min_lr: 0.005718 loss: 3.4733 (3.4212) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0006 max mem: 55573 Epoch: [120] [ 80/156] eta: 0:00:57 lr: 0.005715 min_lr: 0.005715 loss: 3.5909 (3.4504) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [120] [ 90/156] eta: 0:00:48 lr: 0.005712 min_lr: 0.005712 loss: 3.6158 (3.4582) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0007 max mem: 55573 Epoch: [120] [100/156] eta: 0:00:40 lr: 0.005710 min_lr: 0.005710 loss: 3.3263 (3.4450) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0007 max mem: 55573 Epoch: [120] [110/156] eta: 0:00:32 lr: 0.005707 min_lr: 0.005707 loss: 3.3492 (3.4378) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0006 max mem: 55573 Epoch: [120] [120/156] eta: 0:00:25 lr: 0.005705 min_lr: 0.005705 loss: 3.5891 (3.4317) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0007 max mem: 55573 Epoch: [120] [130/156] eta: 0:00:18 lr: 0.005702 min_lr: 0.005702 loss: 3.4125 (3.4102) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0015 max mem: 55573 Epoch: [120] [140/156] eta: 0:00:10 lr: 0.005699 min_lr: 0.005699 loss: 3.4125 (3.4129) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0012 max mem: 55573 Epoch: [120] [150/156] eta: 0:00:04 lr: 0.005697 min_lr: 0.005697 loss: 3.6841 (3.4255) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0002 max mem: 55573 Epoch: [120] [155/156] eta: 0:00:00 lr: 0.005696 min_lr: 0.005696 loss: 3.6841 (3.4240) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [120] Total time: 0:01:46 (0.6822 s / it) Averaged stats: lr: 0.005696 min_lr: 0.005696 loss: 3.6841 (3.4670) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1259 (1.1259) acc1: 77.8646 (77.8646) acc5: 93.2292 (93.2292) time: 6.9987 data: 6.7613 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1753 (1.2338) acc1: 75.9115 (73.1520) acc5: 93.2292 (92.0960) time: 1.5468 data: 1.3526 max mem: 55573 Test: Total time: 0:00:07 (1.5952 s / it) * Acc@1 73.672 Acc@5 92.256 loss 1.209 Accuracy of the model on the 50000 test images: 73.7% Max accuracy: 73.67% Test: [0/5] eta: 0:00:34 loss: 4.4953 (4.4953) acc1: 16.0156 (16.0156) acc5: 33.7240 (33.7240) time: 6.9333 data: 6.6972 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 4.7522 (4.8120) acc1: 14.9740 (14.6880) acc5: 32.0755 (31.4560) time: 1.5320 data: 1.3396 max mem: 55573 Test: Total time: 0:00:07 (1.5494 s / it) * Acc@1 12.038 Acc@5 26.514 loss 5.078 Accuracy of the model EMA on 50000 test images: 12.0% Max EMA accuracy: 12.04% Epoch: [121] [ 0/156] eta: 0:33:30 lr: 0.005695 min_lr: 0.005695 loss: 3.8528 (3.8528) weight_decay: 0.0500 (0.0500) time: 12.8895 data: 12.2308 max mem: 55573 Epoch: [121] [ 10/156] eta: 0:04:09 lr: 0.005693 min_lr: 0.005693 loss: 3.6669 (3.5650) weight_decay: 0.0500 (0.0500) time: 1.7113 data: 1.1124 max mem: 55573 Epoch: [121] [ 20/156] eta: 0:02:40 lr: 0.005690 min_lr: 0.005690 loss: 3.6669 (3.6794) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0006 max mem: 55573 Epoch: [121] [ 30/156] eta: 0:02:04 lr: 0.005687 min_lr: 0.005687 loss: 3.5926 (3.5750) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0006 max mem: 55573 Epoch: [121] [ 40/156] eta: 0:01:43 lr: 0.005685 min_lr: 0.005685 loss: 3.3887 (3.5149) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0006 max mem: 55573 Epoch: [121] [ 50/156] eta: 0:01:28 lr: 0.005682 min_lr: 0.005682 loss: 3.5314 (3.5265) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0006 max mem: 55573 Epoch: [121] [ 60/156] eta: 0:01:16 lr: 0.005680 min_lr: 0.005680 loss: 3.6012 (3.4905) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0006 max mem: 55573 Epoch: [121] [ 70/156] eta: 0:01:05 lr: 0.005677 min_lr: 0.005677 loss: 3.3106 (3.4578) weight_decay: 0.0500 (0.0500) time: 0.5987 data: 0.0006 max mem: 55573 Epoch: [121] [ 80/156] eta: 0:00:56 lr: 0.005674 min_lr: 0.005674 loss: 3.2830 (3.4258) weight_decay: 0.0500 (0.0500) time: 0.5971 data: 0.0006 max mem: 55573 Epoch: [121] [ 90/156] eta: 0:00:48 lr: 0.005672 min_lr: 0.005672 loss: 3.3520 (3.4263) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0006 max mem: 55573 Epoch: [121] [100/156] eta: 0:00:39 lr: 0.005669 min_lr: 0.005669 loss: 3.5988 (3.4539) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0006 max mem: 55573 Epoch: [121] [110/156] eta: 0:00:32 lr: 0.005667 min_lr: 0.005667 loss: 3.6708 (3.4450) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0007 max mem: 55573 Epoch: [121] [120/156] eta: 0:00:24 lr: 0.005664 min_lr: 0.005664 loss: 3.6024 (3.4408) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0006 max mem: 55573 Epoch: [121] [130/156] eta: 0:00:17 lr: 0.005661 min_lr: 0.005661 loss: 3.6530 (3.4399) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0012 max mem: 55573 Epoch: [121] [140/156] eta: 0:00:10 lr: 0.005659 min_lr: 0.005659 loss: 3.6342 (3.4439) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0010 max mem: 55573 Epoch: [121] [150/156] eta: 0:00:04 lr: 0.005656 min_lr: 0.005656 loss: 3.5548 (3.4411) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [121] [155/156] eta: 0:00:00 lr: 0.005655 min_lr: 0.005655 loss: 3.5613 (3.4345) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [121] Total time: 0:01:45 (0.6757 s / it) Averaged stats: lr: 0.005655 min_lr: 0.005655 loss: 3.5613 (3.4491) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0238 (1.0238) acc1: 77.3438 (77.3438) acc5: 95.7031 (95.7031) time: 6.8798 data: 6.6427 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1447 (1.2249) acc1: 76.4323 (72.2240) acc5: 94.7917 (92.3200) time: 1.5224 data: 1.3286 max mem: 55573 Test: Total time: 0:00:07 (1.5589 s / it) * Acc@1 73.090 Acc@5 91.888 loss 1.224 Accuracy of the model on the 50000 test images: 73.1% Max accuracy: 73.67% Test: [0/5] eta: 0:00:35 loss: 4.2887 (4.2887) acc1: 17.7083 (17.7083) acc5: 37.3698 (37.3698) time: 7.0664 data: 6.8305 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 4.5158 (4.5971) acc1: 16.9811 (16.3840) acc5: 33.9623 (34.8800) time: 1.5586 data: 1.3662 max mem: 55573 Test: Total time: 0:00:07 (1.5761 s / it) * Acc@1 13.952 Acc@5 30.212 loss 4.859 Accuracy of the model EMA on 50000 test images: 14.0% Max EMA accuracy: 13.95% Epoch: [122] [ 0/156] eta: 0:37:45 lr: 0.005655 min_lr: 0.005655 loss: 3.8062 (3.8062) weight_decay: 0.0500 (0.0500) time: 14.5200 data: 13.9028 max mem: 55573 Epoch: [122] [ 10/156] eta: 0:04:30 lr: 0.005652 min_lr: 0.005652 loss: 3.1310 (3.1823) weight_decay: 0.0500 (0.0500) time: 1.8547 data: 1.2643 max mem: 55573 Epoch: [122] [ 20/156] eta: 0:02:50 lr: 0.005649 min_lr: 0.005649 loss: 3.2188 (3.2834) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [122] [ 30/156] eta: 0:02:10 lr: 0.005647 min_lr: 0.005647 loss: 3.4532 (3.3648) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0006 max mem: 55573 Epoch: [122] [ 40/156] eta: 0:01:47 lr: 0.005644 min_lr: 0.005644 loss: 3.6090 (3.4258) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [122] [ 50/156] eta: 0:01:31 lr: 0.005641 min_lr: 0.005641 loss: 3.7525 (3.4571) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0005 max mem: 55573 Epoch: [122] [ 60/156] eta: 0:01:18 lr: 0.005639 min_lr: 0.005639 loss: 3.5317 (3.4251) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0006 max mem: 55573 Epoch: [122] [ 70/156] eta: 0:01:07 lr: 0.005636 min_lr: 0.005636 loss: 3.5350 (3.4702) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0008 max mem: 55573 Epoch: [122] [ 80/156] eta: 0:00:57 lr: 0.005634 min_lr: 0.005634 loss: 3.7132 (3.4937) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0007 max mem: 55573 Epoch: [122] [ 90/156] eta: 0:00:49 lr: 0.005631 min_lr: 0.005631 loss: 3.5867 (3.4727) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0006 max mem: 55573 Epoch: [122] [100/156] eta: 0:00:40 lr: 0.005628 min_lr: 0.005628 loss: 3.2673 (3.4371) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0006 max mem: 55573 Epoch: [122] [110/156] eta: 0:00:32 lr: 0.005626 min_lr: 0.005626 loss: 3.2670 (3.4251) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [122] [120/156] eta: 0:00:25 lr: 0.005623 min_lr: 0.005623 loss: 3.5429 (3.4286) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [122] [130/156] eta: 0:00:18 lr: 0.005620 min_lr: 0.005620 loss: 3.5529 (3.4232) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0012 max mem: 55573 Epoch: [122] [140/156] eta: 0:00:11 lr: 0.005618 min_lr: 0.005618 loss: 3.7482 (3.4405) weight_decay: 0.0500 (0.0500) time: 0.5868 data: 0.0010 max mem: 55573 Epoch: [122] [150/156] eta: 0:00:04 lr: 0.005615 min_lr: 0.005615 loss: 3.7547 (3.4477) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0002 max mem: 55573 Epoch: [122] [155/156] eta: 0:00:00 lr: 0.005614 min_lr: 0.005614 loss: 3.7547 (3.4631) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0002 max mem: 55573 Epoch: [122] Total time: 0:01:46 (0.6856 s / it) Averaged stats: lr: 0.005614 min_lr: 0.005614 loss: 3.7547 (3.4566) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.2550 (1.2550) acc1: 76.0417 (76.0417) acc5: 94.0104 (94.0104) time: 7.2103 data: 6.9728 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3540 (1.3826) acc1: 73.0469 (71.5200) acc5: 93.6198 (91.5840) time: 1.5887 data: 1.3947 max mem: 55573 Test: Total time: 0:00:08 (1.6273 s / it) * Acc@1 72.856 Acc@5 91.630 loss 1.363 Accuracy of the model on the 50000 test images: 72.9% Max accuracy: 73.67% Test: [0/5] eta: 0:00:34 loss: 4.0998 (4.0998) acc1: 20.4427 (20.4427) acc5: 40.4948 (40.4948) time: 6.9342 data: 6.6976 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 4.3014 (4.4001) acc1: 20.3125 (18.9440) acc5: 36.4583 (37.8240) time: 1.5421 data: 1.3493 max mem: 55573 Test: Total time: 0:00:07 (1.5626 s / it) * Acc@1 16.016 Acc@5 33.574 loss 4.654 Accuracy of the model EMA on 50000 test images: 16.0% Max EMA accuracy: 16.02% Epoch: [123] [ 0/156] eta: 0:32:02 lr: 0.005614 min_lr: 0.005614 loss: 3.6126 (3.6126) weight_decay: 0.0500 (0.0500) time: 12.3226 data: 10.7436 max mem: 55573 Epoch: [123] [ 10/156] eta: 0:04:05 lr: 0.005611 min_lr: 0.005611 loss: 3.3895 (3.3322) weight_decay: 0.0500 (0.0500) time: 1.6790 data: 0.9772 max mem: 55573 Epoch: [123] [ 20/156] eta: 0:02:37 lr: 0.005608 min_lr: 0.005608 loss: 3.4017 (3.3708) weight_decay: 0.0500 (0.0500) time: 0.6030 data: 0.0005 max mem: 55573 Epoch: [123] [ 30/156] eta: 0:02:03 lr: 0.005606 min_lr: 0.005606 loss: 3.6412 (3.4904) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [123] [ 40/156] eta: 0:01:42 lr: 0.005603 min_lr: 0.005603 loss: 3.6409 (3.5224) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [123] [ 50/156] eta: 0:01:27 lr: 0.005600 min_lr: 0.005600 loss: 3.6339 (3.5208) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [123] [ 60/156] eta: 0:01:15 lr: 0.005598 min_lr: 0.005598 loss: 3.5975 (3.5110) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [123] [ 70/156] eta: 0:01:05 lr: 0.005595 min_lr: 0.005595 loss: 3.1776 (3.4364) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [123] [ 80/156] eta: 0:00:56 lr: 0.005592 min_lr: 0.005592 loss: 2.8652 (3.4351) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [123] [ 90/156] eta: 0:00:47 lr: 0.005590 min_lr: 0.005590 loss: 3.6345 (3.4394) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0006 max mem: 55573 Epoch: [123] [100/156] eta: 0:00:39 lr: 0.005587 min_lr: 0.005587 loss: 3.3702 (3.4274) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0006 max mem: 55573 Epoch: [123] [110/156] eta: 0:00:32 lr: 0.005585 min_lr: 0.005585 loss: 3.3538 (3.4268) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0007 max mem: 55573 Epoch: [123] [120/156] eta: 0:00:24 lr: 0.005582 min_lr: 0.005582 loss: 3.4953 (3.4310) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0008 max mem: 55573 Epoch: [123] [130/156] eta: 0:00:17 lr: 0.005579 min_lr: 0.005579 loss: 3.4953 (3.4272) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0019 max mem: 55573 Epoch: [123] [140/156] eta: 0:00:10 lr: 0.005577 min_lr: 0.005577 loss: 3.3656 (3.4229) weight_decay: 0.0500 (0.0500) time: 0.5870 data: 0.0016 max mem: 55573 Epoch: [123] [150/156] eta: 0:00:04 lr: 0.005574 min_lr: 0.005574 loss: 3.4826 (3.4203) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0002 max mem: 55573 Epoch: [123] [155/156] eta: 0:00:00 lr: 0.005573 min_lr: 0.005573 loss: 3.5358 (3.4294) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0002 max mem: 55573 Epoch: [123] Total time: 0:01:44 (0.6727 s / it) Averaged stats: lr: 0.005573 min_lr: 0.005573 loss: 3.5358 (3.4621) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1748 (1.1748) acc1: 78.1250 (78.1250) acc5: 94.1406 (94.1406) time: 7.1475 data: 6.9094 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2558 (1.3181) acc1: 76.9531 (73.8240) acc5: 94.1406 (91.5520) time: 1.5760 data: 1.3820 max mem: 55573 Test: Total time: 0:00:08 (1.6108 s / it) * Acc@1 73.598 Acc@5 92.100 loss 1.307 Accuracy of the model on the 50000 test images: 73.6% Max accuracy: 73.67% Test: [0/5] eta: 0:00:36 loss: 3.9155 (3.9155) acc1: 22.1354 (22.1354) acc5: 43.4896 (43.4896) time: 7.2053 data: 6.9692 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 4.0827 (4.2033) acc1: 22.1354 (21.0880) acc5: 40.1042 (41.3120) time: 1.5865 data: 1.3940 max mem: 55573 Test: Total time: 0:00:08 (1.6042 s / it) * Acc@1 18.070 Acc@5 37.000 loss 4.451 Accuracy of the model EMA on 50000 test images: 18.1% Max EMA accuracy: 18.07% Epoch: [124] [ 0/156] eta: 0:31:56 lr: 0.005572 min_lr: 0.005572 loss: 3.8701 (3.8701) weight_decay: 0.0500 (0.0500) time: 12.2833 data: 9.7771 max mem: 55573 Epoch: [124] [ 10/156] eta: 0:04:24 lr: 0.005570 min_lr: 0.005570 loss: 3.6566 (3.4835) weight_decay: 0.0500 (0.0500) time: 1.8083 data: 0.9236 max mem: 55573 Epoch: [124] [ 20/156] eta: 0:02:47 lr: 0.005567 min_lr: 0.005567 loss: 3.5969 (3.4967) weight_decay: 0.0500 (0.0500) time: 0.6761 data: 0.0193 max mem: 55573 Epoch: [124] [ 30/156] eta: 0:02:08 lr: 0.005564 min_lr: 0.005564 loss: 3.3457 (3.3931) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [124] [ 40/156] eta: 0:01:46 lr: 0.005562 min_lr: 0.005562 loss: 3.3027 (3.3637) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [124] [ 50/156] eta: 0:01:30 lr: 0.005559 min_lr: 0.005559 loss: 3.2385 (3.3078) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [124] [ 60/156] eta: 0:01:17 lr: 0.005557 min_lr: 0.005557 loss: 3.3802 (3.3517) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [124] [ 70/156] eta: 0:01:07 lr: 0.005554 min_lr: 0.005554 loss: 3.5918 (3.3854) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0005 max mem: 55573 Epoch: [124] [ 80/156] eta: 0:00:57 lr: 0.005551 min_lr: 0.005551 loss: 3.7706 (3.4404) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [124] [ 90/156] eta: 0:00:48 lr: 0.005549 min_lr: 0.005549 loss: 3.7328 (3.4331) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [124] [100/156] eta: 0:00:40 lr: 0.005546 min_lr: 0.005546 loss: 3.5275 (3.4521) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [124] [110/156] eta: 0:00:32 lr: 0.005543 min_lr: 0.005543 loss: 3.6994 (3.4734) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [124] [120/156] eta: 0:00:25 lr: 0.005541 min_lr: 0.005541 loss: 3.6994 (3.4745) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [124] [130/156] eta: 0:00:18 lr: 0.005538 min_lr: 0.005538 loss: 3.6629 (3.4756) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0011 max mem: 55573 Epoch: [124] [140/156] eta: 0:00:10 lr: 0.005535 min_lr: 0.005535 loss: 3.7410 (3.4906) weight_decay: 0.0500 (0.0500) time: 0.5867 data: 0.0010 max mem: 55573 Epoch: [124] [150/156] eta: 0:00:04 lr: 0.005533 min_lr: 0.005533 loss: 3.7201 (3.4901) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0002 max mem: 55573 Epoch: [124] [155/156] eta: 0:00:00 lr: 0.005531 min_lr: 0.005531 loss: 3.6804 (3.4876) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [124] Total time: 0:01:46 (0.6818 s / it) Averaged stats: lr: 0.005531 min_lr: 0.005531 loss: 3.6804 (3.4558) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.2924 (1.2924) acc1: 78.7760 (78.7760) acc5: 94.0104 (94.0104) time: 7.1271 data: 6.8896 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3603 (1.4054) acc1: 75.2604 (72.7360) acc5: 93.2292 (91.2960) time: 1.5720 data: 1.3780 max mem: 55573 Test: Total time: 0:00:08 (1.6176 s / it) * Acc@1 72.460 Acc@5 91.636 loss 1.381 Accuracy of the model on the 50000 test images: 72.5% Max accuracy: 73.67% Test: [0/5] eta: 0:00:36 loss: 3.7405 (3.7405) acc1: 24.4792 (24.4792) acc5: 46.4844 (46.4844) time: 7.3077 data: 7.0715 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 3.8843 (4.0147) acc1: 22.6415 (23.0400) acc5: 43.3962 (44.3200) time: 1.6069 data: 1.4144 max mem: 55573 Test: Total time: 0:00:08 (1.6270 s / it) * Acc@1 20.078 Acc@5 40.322 loss 4.255 Accuracy of the model EMA on 50000 test images: 20.1% Max EMA accuracy: 20.08% Epoch: [125] [ 0/156] eta: 0:32:59 lr: 0.005531 min_lr: 0.005531 loss: 3.1081 (3.1081) weight_decay: 0.0500 (0.0500) time: 12.6869 data: 10.0959 max mem: 55573 Epoch: [125] [ 10/156] eta: 0:04:07 lr: 0.005528 min_lr: 0.005528 loss: 3.1451 (3.2051) weight_decay: 0.0500 (0.0500) time: 1.6954 data: 0.9182 max mem: 55573 Epoch: [125] [ 20/156] eta: 0:02:39 lr: 0.005526 min_lr: 0.005526 loss: 3.5250 (3.3328) weight_decay: 0.0500 (0.0500) time: 0.5946 data: 0.0005 max mem: 55573 Epoch: [125] [ 30/156] eta: 0:02:04 lr: 0.005523 min_lr: 0.005523 loss: 3.5216 (3.3493) weight_decay: 0.0500 (0.0500) time: 0.5947 data: 0.0005 max mem: 55573 Epoch: [125] [ 40/156] eta: 0:01:43 lr: 0.005520 min_lr: 0.005520 loss: 3.6035 (3.4071) weight_decay: 0.0500 (0.0500) time: 0.5949 data: 0.0005 max mem: 55573 Epoch: [125] [ 50/156] eta: 0:01:28 lr: 0.005518 min_lr: 0.005518 loss: 3.6680 (3.4102) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [125] [ 60/156] eta: 0:01:16 lr: 0.005515 min_lr: 0.005515 loss: 3.5545 (3.4116) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [125] [ 70/156] eta: 0:01:05 lr: 0.005512 min_lr: 0.005512 loss: 3.6114 (3.4655) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [125] [ 80/156] eta: 0:00:56 lr: 0.005510 min_lr: 0.005510 loss: 3.6973 (3.4769) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0005 max mem: 55573 Epoch: [125] [ 90/156] eta: 0:00:47 lr: 0.005507 min_lr: 0.005507 loss: 3.5256 (3.4585) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [125] [100/156] eta: 0:00:39 lr: 0.005504 min_lr: 0.005504 loss: 3.3531 (3.4478) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [125] [110/156] eta: 0:00:32 lr: 0.005502 min_lr: 0.005502 loss: 3.5445 (3.4610) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [125] [120/156] eta: 0:00:24 lr: 0.005499 min_lr: 0.005499 loss: 3.5988 (3.4638) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [125] [130/156] eta: 0:00:17 lr: 0.005496 min_lr: 0.005496 loss: 3.6568 (3.4730) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0011 max mem: 55573 Epoch: [125] [140/156] eta: 0:00:10 lr: 0.005494 min_lr: 0.005494 loss: 3.6795 (3.4676) weight_decay: 0.0500 (0.0500) time: 0.5867 data: 0.0009 max mem: 55573 Epoch: [125] [150/156] eta: 0:00:04 lr: 0.005491 min_lr: 0.005491 loss: 3.3142 (3.4489) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [125] [155/156] eta: 0:00:00 lr: 0.005490 min_lr: 0.005490 loss: 3.3038 (3.4396) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [125] Total time: 0:01:45 (0.6751 s / it) Averaged stats: lr: 0.005490 min_lr: 0.005490 loss: 3.3038 (3.4466) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1947 (1.1947) acc1: 76.6927 (76.6927) acc5: 93.4896 (93.4896) time: 7.0065 data: 6.7695 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2923 (1.2914) acc1: 74.3490 (72.2240) acc5: 93.0990 (91.3600) time: 1.5478 data: 1.3540 max mem: 55573 Test: Total time: 0:00:08 (1.6114 s / it) * Acc@1 73.162 Acc@5 91.714 loss 1.305 Accuracy of the model on the 50000 test images: 73.2% Max accuracy: 73.67% Test: [0/5] eta: 0:00:34 loss: 3.5753 (3.5753) acc1: 25.5208 (25.5208) acc5: 49.7396 (49.7396) time: 6.8670 data: 6.6308 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 3.6983 (3.8349) acc1: 24.5283 (24.7360) acc5: 47.1698 (46.8800) time: 1.5274 data: 1.3349 max mem: 55573 Test: Total time: 0:00:07 (1.5483 s / it) * Acc@1 22.026 Acc@5 43.390 loss 4.069 Accuracy of the model EMA on 50000 test images: 22.0% Max EMA accuracy: 22.03% Epoch: [126] [ 0/156] eta: 0:33:05 lr: 0.005489 min_lr: 0.005489 loss: 2.8353 (2.8353) weight_decay: 0.0500 (0.0500) time: 12.7303 data: 7.6435 max mem: 55573 Epoch: [126] [ 10/156] eta: 0:04:07 lr: 0.005487 min_lr: 0.005487 loss: 3.5135 (3.3157) weight_decay: 0.0500 (0.0500) time: 1.6964 data: 0.6952 max mem: 55573 Epoch: [126] [ 20/156] eta: 0:02:39 lr: 0.005484 min_lr: 0.005484 loss: 3.5135 (3.3340) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [126] [ 30/156] eta: 0:02:03 lr: 0.005481 min_lr: 0.005481 loss: 3.4694 (3.3526) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [126] [ 40/156] eta: 0:01:42 lr: 0.005479 min_lr: 0.005479 loss: 3.4891 (3.3815) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [126] [ 50/156] eta: 0:01:27 lr: 0.005476 min_lr: 0.005476 loss: 3.5947 (3.4282) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [126] [ 60/156] eta: 0:01:15 lr: 0.005473 min_lr: 0.005473 loss: 3.4067 (3.3784) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [126] [ 70/156] eta: 0:01:05 lr: 0.005471 min_lr: 0.005471 loss: 3.4067 (3.4176) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [126] [ 80/156] eta: 0:00:56 lr: 0.005468 min_lr: 0.005468 loss: 3.6085 (3.4226) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [126] [ 90/156] eta: 0:00:47 lr: 0.005465 min_lr: 0.005465 loss: 3.5424 (3.4277) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [126] [100/156] eta: 0:00:39 lr: 0.005463 min_lr: 0.005463 loss: 3.5424 (3.4197) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [126] [110/156] eta: 0:00:32 lr: 0.005460 min_lr: 0.005460 loss: 3.7095 (3.4468) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [126] [120/156] eta: 0:00:24 lr: 0.005457 min_lr: 0.005457 loss: 3.6630 (3.4286) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [126] [130/156] eta: 0:00:17 lr: 0.005455 min_lr: 0.005455 loss: 3.3177 (3.4256) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0011 max mem: 55573 Epoch: [126] [140/156] eta: 0:00:10 lr: 0.005452 min_lr: 0.005452 loss: 3.3177 (3.4084) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [126] [150/156] eta: 0:00:04 lr: 0.005449 min_lr: 0.005449 loss: 3.4682 (3.4079) weight_decay: 0.0500 (0.0500) time: 0.5832 data: 0.0001 max mem: 55573 Epoch: [126] [155/156] eta: 0:00:00 lr: 0.005448 min_lr: 0.005448 loss: 3.4420 (3.4049) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [126] Total time: 0:01:45 (0.6741 s / it) Averaged stats: lr: 0.005448 min_lr: 0.005448 loss: 3.4420 (3.4339) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1381 (1.1381) acc1: 77.2135 (77.2135) acc5: 93.6198 (93.6198) time: 6.9261 data: 6.6892 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2437 (1.2269) acc1: 73.4375 (72.4480) acc5: 93.2292 (91.1680) time: 1.5306 data: 1.3379 max mem: 55573 Test: Total time: 0:00:07 (1.5474 s / it) * Acc@1 73.590 Acc@5 91.870 loss 1.226 Accuracy of the model on the 50000 test images: 73.6% Max accuracy: 73.67% Test: [0/5] eta: 0:00:35 loss: 3.4222 (3.4222) acc1: 27.2135 (27.2135) acc5: 52.4740 (52.4740) time: 7.1265 data: 6.8905 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 3.5273 (3.6660) acc1: 26.4151 (26.4320) acc5: 49.0566 (49.7280) time: 1.5706 data: 1.3782 max mem: 55573 Test: Total time: 0:00:07 (1.5899 s / it) * Acc@1 24.020 Acc@5 46.490 loss 3.892 Accuracy of the model EMA on 50000 test images: 24.0% Max EMA accuracy: 24.02% Epoch: [127] [ 0/156] eta: 0:34:28 lr: 0.005448 min_lr: 0.005448 loss: 3.2270 (3.2270) weight_decay: 0.0500 (0.0500) time: 13.2623 data: 11.4714 max mem: 55573 Epoch: [127] [ 10/156] eta: 0:04:20 lr: 0.005445 min_lr: 0.005445 loss: 3.5844 (3.3883) weight_decay: 0.0500 (0.0500) time: 1.7870 data: 1.0432 max mem: 55573 Epoch: [127] [ 20/156] eta: 0:02:45 lr: 0.005442 min_lr: 0.005442 loss: 3.4925 (3.3111) weight_decay: 0.0500 (0.0500) time: 0.6147 data: 0.0004 max mem: 55573 Epoch: [127] [ 30/156] eta: 0:02:07 lr: 0.005440 min_lr: 0.005440 loss: 3.4748 (3.3490) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [127] [ 40/156] eta: 0:01:45 lr: 0.005437 min_lr: 0.005437 loss: 3.3540 (3.3028) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [127] [ 50/156] eta: 0:01:30 lr: 0.005434 min_lr: 0.005434 loss: 3.5032 (3.3658) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [127] [ 60/156] eta: 0:01:17 lr: 0.005432 min_lr: 0.005432 loss: 3.5543 (3.3686) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [127] [ 70/156] eta: 0:01:06 lr: 0.005429 min_lr: 0.005429 loss: 3.7218 (3.3958) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [127] [ 80/156] eta: 0:00:57 lr: 0.005426 min_lr: 0.005426 loss: 3.7228 (3.4049) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [127] [ 90/156] eta: 0:00:48 lr: 0.005424 min_lr: 0.005424 loss: 3.6538 (3.4228) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [127] [100/156] eta: 0:00:40 lr: 0.005421 min_lr: 0.005421 loss: 3.3990 (3.4071) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [127] [110/156] eta: 0:00:32 lr: 0.005418 min_lr: 0.005418 loss: 3.4356 (3.4261) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [127] [120/156] eta: 0:00:25 lr: 0.005416 min_lr: 0.005416 loss: 3.6743 (3.4337) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [127] [130/156] eta: 0:00:17 lr: 0.005413 min_lr: 0.005413 loss: 3.4705 (3.4395) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0011 max mem: 55573 Epoch: [127] [140/156] eta: 0:00:10 lr: 0.005410 min_lr: 0.005410 loss: 3.4213 (3.4352) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0009 max mem: 55573 Epoch: [127] [150/156] eta: 0:00:04 lr: 0.005407 min_lr: 0.005407 loss: 3.5466 (3.4231) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [127] [155/156] eta: 0:00:00 lr: 0.005406 min_lr: 0.005406 loss: 3.6005 (3.4343) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [127] Total time: 0:01:46 (0.6803 s / it) Averaged stats: lr: 0.005406 min_lr: 0.005406 loss: 3.6005 (3.4259) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1132 (1.1132) acc1: 77.9948 (77.9948) acc5: 93.6198 (93.6198) time: 6.8553 data: 6.6177 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1516 (1.2088) acc1: 74.2188 (72.5120) acc5: 93.6198 (91.2960) time: 1.5175 data: 1.3236 max mem: 55573 Test: Total time: 0:00:07 (1.5627 s / it) * Acc@1 73.518 Acc@5 92.044 loss 1.177 Accuracy of the model on the 50000 test images: 73.5% Max accuracy: 73.67% Test: [0/5] eta: 0:00:35 loss: 3.2751 (3.2751) acc1: 29.2969 (29.2969) acc5: 56.2500 (56.2500) time: 7.1857 data: 6.9497 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 3.3645 (3.5025) acc1: 29.2969 (28.8960) acc5: 54.7170 (52.5120) time: 1.5831 data: 1.3902 max mem: 55573 Test: Total time: 0:00:08 (1.6031 s / it) * Acc@1 26.210 Acc@5 49.390 loss 3.721 Accuracy of the model EMA on 50000 test images: 26.2% Max EMA accuracy: 26.21% Epoch: [128] [ 0/156] eta: 0:37:18 lr: 0.005406 min_lr: 0.005406 loss: 3.4356 (3.4356) weight_decay: 0.0500 (0.0500) time: 14.3469 data: 10.2503 max mem: 55573 Epoch: [128] [ 10/156] eta: 0:04:33 lr: 0.005403 min_lr: 0.005403 loss: 3.3105 (3.3497) weight_decay: 0.0500 (0.0500) time: 1.8765 data: 0.9707 max mem: 55573 Epoch: [128] [ 20/156] eta: 0:02:51 lr: 0.005400 min_lr: 0.005400 loss: 3.3105 (3.3046) weight_decay: 0.0500 (0.0500) time: 0.6097 data: 0.0216 max mem: 55573 Epoch: [128] [ 30/156] eta: 0:02:11 lr: 0.005398 min_lr: 0.005398 loss: 3.3565 (3.3161) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [128] [ 40/156] eta: 0:01:48 lr: 0.005395 min_lr: 0.005395 loss: 3.3565 (3.3363) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [128] [ 50/156] eta: 0:01:31 lr: 0.005392 min_lr: 0.005392 loss: 3.5754 (3.3835) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [128] [ 60/156] eta: 0:01:18 lr: 0.005390 min_lr: 0.005390 loss: 3.6426 (3.3804) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [128] [ 70/156] eta: 0:01:07 lr: 0.005387 min_lr: 0.005387 loss: 3.6650 (3.4248) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [128] [ 80/156] eta: 0:00:58 lr: 0.005384 min_lr: 0.005384 loss: 3.7378 (3.4474) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [128] [ 90/156] eta: 0:00:49 lr: 0.005382 min_lr: 0.005382 loss: 3.5677 (3.4384) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [128] [100/156] eta: 0:00:40 lr: 0.005379 min_lr: 0.005379 loss: 3.5681 (3.4525) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [128] [110/156] eta: 0:00:33 lr: 0.005376 min_lr: 0.005376 loss: 3.7013 (3.4751) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [128] [120/156] eta: 0:00:25 lr: 0.005373 min_lr: 0.005373 loss: 3.4854 (3.4634) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [128] [130/156] eta: 0:00:18 lr: 0.005371 min_lr: 0.005371 loss: 3.5374 (3.4850) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0011 max mem: 55573 Epoch: [128] [140/156] eta: 0:00:11 lr: 0.005368 min_lr: 0.005368 loss: 3.6962 (3.4791) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0009 max mem: 55573 Epoch: [128] [150/156] eta: 0:00:04 lr: 0.005365 min_lr: 0.005365 loss: 3.2180 (3.4541) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [128] [155/156] eta: 0:00:00 lr: 0.005364 min_lr: 0.005364 loss: 3.1090 (3.4458) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [128] Total time: 0:01:47 (0.6870 s / it) Averaged stats: lr: 0.005364 min_lr: 0.005364 loss: 3.1090 (3.4380) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.1135 (1.1135) acc1: 76.9531 (76.9531) acc5: 94.9219 (94.9219) time: 7.2825 data: 7.0450 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1691 (1.2008) acc1: 76.4323 (73.4400) acc5: 93.6198 (92.0960) time: 1.6030 data: 1.4091 max mem: 55573 Test: Total time: 0:00:08 (1.6522 s / it) * Acc@1 73.894 Acc@5 92.274 loss 1.205 Accuracy of the model on the 50000 test images: 73.9% Max accuracy: 73.89% Test: [0/5] eta: 0:00:35 loss: 3.1412 (3.1412) acc1: 30.9896 (30.9896) acc5: 59.1146 (59.1146) time: 7.0384 data: 6.8023 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 3.2147 (3.3515) acc1: 30.9896 (30.3360) acc5: 58.4906 (54.8160) time: 1.5530 data: 1.3606 max mem: 55573 Test: Total time: 0:00:07 (1.5712 s / it) * Acc@1 28.082 Acc@5 51.964 loss 3.563 Accuracy of the model EMA on 50000 test images: 28.1% Max EMA accuracy: 28.08% Epoch: [129] [ 0/156] eta: 0:35:55 lr: 0.005364 min_lr: 0.005364 loss: 4.2157 (4.2157) weight_decay: 0.0500 (0.0500) time: 13.8188 data: 9.0143 max mem: 55573 Epoch: [129] [ 10/156] eta: 0:04:23 lr: 0.005361 min_lr: 0.005361 loss: 3.5320 (3.5059) weight_decay: 0.0500 (0.0500) time: 1.8048 data: 0.8199 max mem: 55573 Epoch: [129] [ 20/156] eta: 0:02:46 lr: 0.005358 min_lr: 0.005358 loss: 3.3651 (3.4173) weight_decay: 0.0500 (0.0500) time: 0.5969 data: 0.0005 max mem: 55573 Epoch: [129] [ 30/156] eta: 0:02:08 lr: 0.005356 min_lr: 0.005356 loss: 3.4301 (3.4317) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [129] [ 40/156] eta: 0:01:46 lr: 0.005353 min_lr: 0.005353 loss: 3.4561 (3.4144) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [129] [ 50/156] eta: 0:01:30 lr: 0.005350 min_lr: 0.005350 loss: 3.5540 (3.4405) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [129] [ 60/156] eta: 0:01:17 lr: 0.005347 min_lr: 0.005347 loss: 3.2729 (3.3576) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [129] [ 70/156] eta: 0:01:06 lr: 0.005345 min_lr: 0.005345 loss: 3.1980 (3.3704) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [129] [ 80/156] eta: 0:00:57 lr: 0.005342 min_lr: 0.005342 loss: 3.4207 (3.3718) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [129] [ 90/156] eta: 0:00:48 lr: 0.005339 min_lr: 0.005339 loss: 3.4207 (3.3703) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [129] [100/156] eta: 0:00:40 lr: 0.005337 min_lr: 0.005337 loss: 3.4339 (3.3745) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [129] [110/156] eta: 0:00:32 lr: 0.005334 min_lr: 0.005334 loss: 3.5953 (3.3798) weight_decay: 0.0500 (0.0500) time: 0.5953 data: 0.0005 max mem: 55573 Epoch: [129] [120/156] eta: 0:00:25 lr: 0.005331 min_lr: 0.005331 loss: 3.6774 (3.3993) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.0005 max mem: 55573 Epoch: [129] [130/156] eta: 0:00:18 lr: 0.005329 min_lr: 0.005329 loss: 3.5978 (3.3782) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0010 max mem: 55573 Epoch: [129] [140/156] eta: 0:00:10 lr: 0.005326 min_lr: 0.005326 loss: 3.1545 (3.3811) weight_decay: 0.0500 (0.0500) time: 0.5854 data: 0.0009 max mem: 55573 Epoch: [129] [150/156] eta: 0:00:04 lr: 0.005323 min_lr: 0.005323 loss: 3.5188 (3.3822) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [129] [155/156] eta: 0:00:00 lr: 0.005322 min_lr: 0.005322 loss: 3.6009 (3.3866) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [129] Total time: 0:01:46 (0.6823 s / it) Averaged stats: lr: 0.005322 min_lr: 0.005322 loss: 3.6009 (3.4169) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.1174 (1.1174) acc1: 78.2552 (78.2552) acc5: 94.0104 (94.0104) time: 6.7744 data: 6.5365 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1966 (1.2679) acc1: 75.6510 (72.5440) acc5: 94.0104 (91.7440) time: 1.5002 data: 1.3074 max mem: 55573 Test: Total time: 0:00:07 (1.5178 s / it) * Acc@1 72.810 Acc@5 91.508 loss 1.270 Accuracy of the model on the 50000 test images: 72.8% Max accuracy: 73.89% Test: [0/5] eta: 0:00:35 loss: 3.0198 (3.0198) acc1: 32.2917 (32.2917) acc5: 61.4583 (61.4583) time: 7.1238 data: 6.8879 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 3.0808 (3.2135) acc1: 32.2917 (32.1280) acc5: 61.0677 (57.2480) time: 1.5701 data: 1.3777 max mem: 55573 Test: Total time: 0:00:08 (1.6006 s / it) * Acc@1 29.920 Acc@5 54.408 loss 3.418 Accuracy of the model EMA on 50000 test images: 29.9% Max EMA accuracy: 29.92% Epoch: [130] [ 0/156] eta: 0:31:46 lr: 0.005321 min_lr: 0.005321 loss: 3.9553 (3.9553) weight_decay: 0.0500 (0.0500) time: 12.2217 data: 8.2492 max mem: 55573 Epoch: [130] [ 10/156] eta: 0:04:12 lr: 0.005319 min_lr: 0.005319 loss: 3.7141 (3.5385) weight_decay: 0.0500 (0.0500) time: 1.7284 data: 0.7504 max mem: 55573 Epoch: [130] [ 20/156] eta: 0:02:41 lr: 0.005316 min_lr: 0.005316 loss: 3.6431 (3.5003) weight_decay: 0.0500 (0.0500) time: 0.6347 data: 0.0005 max mem: 55573 Epoch: [130] [ 30/156] eta: 0:02:05 lr: 0.005313 min_lr: 0.005313 loss: 3.5533 (3.4494) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [130] [ 40/156] eta: 0:01:43 lr: 0.005311 min_lr: 0.005311 loss: 3.4815 (3.4354) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [130] [ 50/156] eta: 0:01:28 lr: 0.005308 min_lr: 0.005308 loss: 3.4376 (3.4289) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [130] [ 60/156] eta: 0:01:16 lr: 0.005305 min_lr: 0.005305 loss: 3.4466 (3.4671) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [130] [ 70/156] eta: 0:01:05 lr: 0.005302 min_lr: 0.005302 loss: 3.2459 (3.4083) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [130] [ 80/156] eta: 0:00:56 lr: 0.005300 min_lr: 0.005300 loss: 3.0474 (3.3746) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [130] [ 90/156] eta: 0:00:48 lr: 0.005297 min_lr: 0.005297 loss: 3.2653 (3.4045) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [130] [100/156] eta: 0:00:40 lr: 0.005294 min_lr: 0.005294 loss: 3.5904 (3.4106) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [130] [110/156] eta: 0:00:32 lr: 0.005292 min_lr: 0.005292 loss: 3.5904 (3.4246) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [130] [120/156] eta: 0:00:24 lr: 0.005289 min_lr: 0.005289 loss: 3.4462 (3.4211) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [130] [130/156] eta: 0:00:17 lr: 0.005286 min_lr: 0.005286 loss: 3.4014 (3.4082) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0007 max mem: 55573 Epoch: [130] [140/156] eta: 0:00:10 lr: 0.005283 min_lr: 0.005283 loss: 3.5326 (3.4193) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [130] [150/156] eta: 0:00:04 lr: 0.005281 min_lr: 0.005281 loss: 3.6963 (3.4313) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0002 max mem: 55573 Epoch: [130] [155/156] eta: 0:00:00 lr: 0.005279 min_lr: 0.005279 loss: 3.6567 (3.4314) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0002 max mem: 55573 Epoch: [130] Total time: 0:01:45 (0.6766 s / it) Averaged stats: lr: 0.005279 min_lr: 0.005279 loss: 3.6567 (3.4331) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.1280 (1.1280) acc1: 77.0833 (77.0833) acc5: 94.5312 (94.5312) time: 7.3047 data: 7.0671 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2802 (1.2445) acc1: 73.3073 (72.0000) acc5: 92.8385 (91.5200) time: 1.6083 data: 1.4137 max mem: 55573 Test: Total time: 0:00:08 (1.6610 s / it) * Acc@1 73.380 Acc@5 91.938 loss 1.210 Accuracy of the model on the 50000 test images: 73.4% Max accuracy: 73.89% Test: [0/5] eta: 0:00:35 loss: 2.9075 (2.9075) acc1: 34.5052 (34.5052) acc5: 63.4115 (63.4115) time: 7.1632 data: 6.9272 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.9586 (3.0854) acc1: 34.5052 (34.0160) acc5: 62.3698 (59.3280) time: 1.5780 data: 1.3855 max mem: 55573 Test: Total time: 0:00:07 (1.5935 s / it) * Acc@1 31.782 Acc@5 56.704 loss 3.283 Accuracy of the model EMA on 50000 test images: 31.8% Max EMA accuracy: 31.78% Epoch: [131] [ 0/156] eta: 0:38:01 lr: 0.005279 min_lr: 0.005279 loss: 3.3231 (3.3231) weight_decay: 0.0500 (0.0500) time: 14.6245 data: 10.1662 max mem: 55573 Epoch: [131] [ 10/156] eta: 0:04:32 lr: 0.005276 min_lr: 0.005276 loss: 3.3231 (3.2244) weight_decay: 0.0500 (0.0500) time: 1.8630 data: 0.9247 max mem: 55573 Epoch: [131] [ 20/156] eta: 0:02:51 lr: 0.005274 min_lr: 0.005274 loss: 3.3810 (3.3555) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [131] [ 30/156] eta: 0:02:11 lr: 0.005271 min_lr: 0.005271 loss: 3.4479 (3.3762) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [131] [ 40/156] eta: 0:01:48 lr: 0.005268 min_lr: 0.005268 loss: 3.5097 (3.4092) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [131] [ 50/156] eta: 0:01:31 lr: 0.005265 min_lr: 0.005265 loss: 3.5586 (3.4338) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [131] [ 60/156] eta: 0:01:18 lr: 0.005263 min_lr: 0.005263 loss: 3.4774 (3.4153) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [131] [ 70/156] eta: 0:01:07 lr: 0.005260 min_lr: 0.005260 loss: 3.4774 (3.4171) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.0005 max mem: 55573 Epoch: [131] [ 80/156] eta: 0:00:58 lr: 0.005257 min_lr: 0.005257 loss: 3.6349 (3.4280) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [131] [ 90/156] eta: 0:00:49 lr: 0.005254 min_lr: 0.005254 loss: 3.6349 (3.4292) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [131] [100/156] eta: 0:00:40 lr: 0.005252 min_lr: 0.005252 loss: 3.6360 (3.4319) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [131] [110/156] eta: 0:00:32 lr: 0.005249 min_lr: 0.005249 loss: 3.5109 (3.4437) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [131] [120/156] eta: 0:00:25 lr: 0.005246 min_lr: 0.005246 loss: 3.3486 (3.4224) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [131] [130/156] eta: 0:00:18 lr: 0.005244 min_lr: 0.005244 loss: 3.2951 (3.4345) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0010 max mem: 55573 Epoch: [131] [140/156] eta: 0:00:11 lr: 0.005241 min_lr: 0.005241 loss: 3.6640 (3.4442) weight_decay: 0.0500 (0.0500) time: 0.5841 data: 0.0009 max mem: 55573 Epoch: [131] [150/156] eta: 0:00:04 lr: 0.005238 min_lr: 0.005238 loss: 3.6600 (3.4602) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [131] [155/156] eta: 0:00:00 lr: 0.005237 min_lr: 0.005237 loss: 3.6014 (3.4559) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [131] Total time: 0:01:47 (0.6861 s / it) Averaged stats: lr: 0.005237 min_lr: 0.005237 loss: 3.6014 (3.4356) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.2474 (1.2474) acc1: 77.7344 (77.7344) acc5: 94.1406 (94.1406) time: 7.2972 data: 7.0597 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3605 (1.3556) acc1: 73.8281 (72.3840) acc5: 94.1406 (91.9360) time: 1.6066 data: 1.4122 max mem: 55573 Test: Total time: 0:00:08 (1.6490 s / it) * Acc@1 73.714 Acc@5 92.022 loss 1.346 Accuracy of the model on the 50000 test images: 73.7% Max accuracy: 73.89% Test: [0/5] eta: 0:00:33 loss: 2.7979 (2.7979) acc1: 36.4583 (36.4583) acc5: 65.7552 (65.7552) time: 6.7201 data: 6.4839 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.8423 (2.9628) acc1: 36.4583 (35.7760) acc5: 63.8021 (61.3760) time: 1.5048 data: 1.3125 max mem: 55573 Test: Total time: 0:00:07 (1.5272 s / it) * Acc@1 33.622 Acc@5 58.814 loss 3.153 Accuracy of the model EMA on 50000 test images: 33.6% Max EMA accuracy: 33.62% Epoch: [132] [ 0/156] eta: 0:33:58 lr: 0.005236 min_lr: 0.005236 loss: 3.6914 (3.6914) weight_decay: 0.0500 (0.0500) time: 13.0658 data: 9.5003 max mem: 55573 Epoch: [132] [ 10/156] eta: 0:04:24 lr: 0.005234 min_lr: 0.005234 loss: 3.6914 (3.5896) weight_decay: 0.0500 (0.0500) time: 1.8120 data: 0.8641 max mem: 55573 Epoch: [132] [ 20/156] eta: 0:02:47 lr: 0.005231 min_lr: 0.005231 loss: 3.6276 (3.5921) weight_decay: 0.0500 (0.0500) time: 0.6391 data: 0.0005 max mem: 55573 Epoch: [132] [ 30/156] eta: 0:02:09 lr: 0.005228 min_lr: 0.005228 loss: 3.6276 (3.5674) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [132] [ 40/156] eta: 0:01:46 lr: 0.005225 min_lr: 0.005225 loss: 3.4762 (3.4883) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [132] [ 50/156] eta: 0:01:30 lr: 0.005223 min_lr: 0.005223 loss: 3.4139 (3.4509) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [132] [ 60/156] eta: 0:01:17 lr: 0.005220 min_lr: 0.005220 loss: 3.5697 (3.4740) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [132] [ 70/156] eta: 0:01:07 lr: 0.005217 min_lr: 0.005217 loss: 3.6572 (3.4774) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [132] [ 80/156] eta: 0:00:57 lr: 0.005215 min_lr: 0.005215 loss: 3.3808 (3.4578) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [132] [ 90/156] eta: 0:00:48 lr: 0.005212 min_lr: 0.005212 loss: 3.5228 (3.4684) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [132] [100/156] eta: 0:00:40 lr: 0.005209 min_lr: 0.005209 loss: 3.5214 (3.4591) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [132] [110/156] eta: 0:00:32 lr: 0.005206 min_lr: 0.005206 loss: 3.5174 (3.4665) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [132] [120/156] eta: 0:00:25 lr: 0.005204 min_lr: 0.005204 loss: 3.5142 (3.4609) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [132] [130/156] eta: 0:00:18 lr: 0.005201 min_lr: 0.005201 loss: 3.5037 (3.4535) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [132] [140/156] eta: 0:00:10 lr: 0.005198 min_lr: 0.005198 loss: 3.5066 (3.4530) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0008 max mem: 55573 Epoch: [132] [150/156] eta: 0:00:04 lr: 0.005195 min_lr: 0.005195 loss: 3.4020 (3.4430) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [132] [155/156] eta: 0:00:00 lr: 0.005194 min_lr: 0.005194 loss: 3.2651 (3.4483) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [132] Total time: 0:01:46 (0.6822 s / it) Averaged stats: lr: 0.005194 min_lr: 0.005194 loss: 3.2651 (3.4226) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1740 (1.1740) acc1: 78.6458 (78.6458) acc5: 94.5312 (94.5312) time: 7.0993 data: 6.8619 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3060 (1.3327) acc1: 75.1302 (72.5440) acc5: 93.7500 (91.8400) time: 1.5664 data: 1.3725 max mem: 55573 Test: Total time: 0:00:08 (1.6066 s / it) * Acc@1 73.526 Acc@5 92.060 loss 1.304 Accuracy of the model on the 50000 test images: 73.5% Max accuracy: 73.89% Test: [0/5] eta: 0:00:35 loss: 2.6944 (2.6944) acc1: 38.5417 (38.5417) acc5: 67.3177 (67.3177) time: 7.0955 data: 6.8596 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.7321 (2.8451) acc1: 38.5417 (37.5360) acc5: 65.7552 (63.1680) time: 1.5644 data: 1.3720 max mem: 55573 Test: Total time: 0:00:07 (1.5877 s / it) * Acc@1 35.352 Acc@5 60.788 loss 3.028 Accuracy of the model EMA on 50000 test images: 35.4% Max EMA accuracy: 35.35% Epoch: [133] [ 0/156] eta: 0:31:17 lr: 0.005194 min_lr: 0.005194 loss: 3.6948 (3.6948) weight_decay: 0.0500 (0.0500) time: 12.0321 data: 9.4314 max mem: 55573 Epoch: [133] [ 10/156] eta: 0:04:06 lr: 0.005191 min_lr: 0.005191 loss: 3.5615 (3.3795) weight_decay: 0.0500 (0.0500) time: 1.6868 data: 0.8578 max mem: 55573 Epoch: [133] [ 20/156] eta: 0:02:38 lr: 0.005188 min_lr: 0.005188 loss: 3.5918 (3.4613) weight_decay: 0.0500 (0.0500) time: 0.6223 data: 0.0004 max mem: 55573 Epoch: [133] [ 30/156] eta: 0:02:03 lr: 0.005185 min_lr: 0.005185 loss: 3.6189 (3.4302) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [133] [ 40/156] eta: 0:01:42 lr: 0.005183 min_lr: 0.005183 loss: 3.6440 (3.5012) weight_decay: 0.0500 (0.0500) time: 0.5953 data: 0.0004 max mem: 55573 Epoch: [133] [ 50/156] eta: 0:01:27 lr: 0.005180 min_lr: 0.005180 loss: 3.6440 (3.4325) weight_decay: 0.0500 (0.0500) time: 0.5963 data: 0.0004 max mem: 55573 Epoch: [133] [ 60/156] eta: 0:01:15 lr: 0.005177 min_lr: 0.005177 loss: 3.2443 (3.4066) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [133] [ 70/156] eta: 0:01:05 lr: 0.005174 min_lr: 0.005174 loss: 3.6011 (3.4259) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [133] [ 80/156] eta: 0:00:56 lr: 0.005172 min_lr: 0.005172 loss: 3.6011 (3.4198) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [133] [ 90/156] eta: 0:00:47 lr: 0.005169 min_lr: 0.005169 loss: 3.6175 (3.4422) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [133] [100/156] eta: 0:00:39 lr: 0.005166 min_lr: 0.005166 loss: 3.5788 (3.4428) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [133] [110/156] eta: 0:00:32 lr: 0.005163 min_lr: 0.005163 loss: 3.5272 (3.4257) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0004 max mem: 55573 Epoch: [133] [120/156] eta: 0:00:24 lr: 0.005161 min_lr: 0.005161 loss: 3.2500 (3.4007) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [133] [130/156] eta: 0:00:17 lr: 0.005158 min_lr: 0.005158 loss: 3.5360 (3.4206) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0010 max mem: 55573 Epoch: [133] [140/156] eta: 0:00:10 lr: 0.005155 min_lr: 0.005155 loss: 3.4542 (3.4041) weight_decay: 0.0500 (0.0500) time: 0.5881 data: 0.0008 max mem: 55573 Epoch: [133] [150/156] eta: 0:00:04 lr: 0.005152 min_lr: 0.005152 loss: 3.4176 (3.4080) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [133] [155/156] eta: 0:00:00 lr: 0.005151 min_lr: 0.005151 loss: 3.4589 (3.4108) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [133] Total time: 0:01:45 (0.6743 s / it) Averaged stats: lr: 0.005151 min_lr: 0.005151 loss: 3.4589 (3.4212) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0729 (1.0729) acc1: 77.3438 (77.3438) acc5: 94.7917 (94.7917) time: 6.8596 data: 6.6222 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2047 (1.2235) acc1: 75.6510 (72.8000) acc5: 94.6615 (92.0000) time: 1.5184 data: 1.3245 max mem: 55573 Test: Total time: 0:00:07 (1.5633 s / it) * Acc@1 74.146 Acc@5 92.282 loss 1.209 Accuracy of the model on the 50000 test images: 74.1% Max accuracy: 74.15% Test: [0/5] eta: 0:00:33 loss: 2.5982 (2.5982) acc1: 39.9740 (39.9740) acc5: 68.4896 (68.4896) time: 6.7673 data: 6.5310 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.6300 (2.7349) acc1: 39.9740 (38.9760) acc5: 68.0990 (65.0880) time: 1.4988 data: 1.3063 max mem: 55573 Test: Total time: 0:00:07 (1.5137 s / it) * Acc@1 36.910 Acc@5 62.774 loss 2.911 Accuracy of the model EMA on 50000 test images: 36.9% Max EMA accuracy: 36.91% Epoch: [134] [ 0/156] eta: 0:36:41 lr: 0.005151 min_lr: 0.005151 loss: 2.9701 (2.9701) weight_decay: 0.0500 (0.0500) time: 14.1116 data: 11.3810 max mem: 55573 Epoch: [134] [ 10/156] eta: 0:04:25 lr: 0.005148 min_lr: 0.005148 loss: 3.1675 (3.2966) weight_decay: 0.0500 (0.0500) time: 1.8181 data: 1.0351 max mem: 55573 Epoch: [134] [ 20/156] eta: 0:02:47 lr: 0.005145 min_lr: 0.005145 loss: 3.5826 (3.4008) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [134] [ 30/156] eta: 0:02:09 lr: 0.005142 min_lr: 0.005142 loss: 3.6236 (3.3472) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [134] [ 40/156] eta: 0:01:46 lr: 0.005140 min_lr: 0.005140 loss: 3.0853 (3.2870) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [134] [ 50/156] eta: 0:01:30 lr: 0.005137 min_lr: 0.005137 loss: 3.4757 (3.3443) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0006 max mem: 55573 Epoch: [134] [ 60/156] eta: 0:01:17 lr: 0.005134 min_lr: 0.005134 loss: 3.5799 (3.3433) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [134] [ 70/156] eta: 0:01:07 lr: 0.005131 min_lr: 0.005131 loss: 3.5457 (3.3421) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [134] [ 80/156] eta: 0:00:57 lr: 0.005129 min_lr: 0.005129 loss: 3.5343 (3.3397) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [134] [ 90/156] eta: 0:00:48 lr: 0.005126 min_lr: 0.005126 loss: 3.4586 (3.3507) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0006 max mem: 55573 Epoch: [134] [100/156] eta: 0:00:40 lr: 0.005123 min_lr: 0.005123 loss: 3.5675 (3.3705) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0006 max mem: 55573 Epoch: [134] [110/156] eta: 0:00:32 lr: 0.005120 min_lr: 0.005120 loss: 3.5165 (3.3624) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [134] [120/156] eta: 0:00:25 lr: 0.005118 min_lr: 0.005118 loss: 3.3668 (3.3447) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [134] [130/156] eta: 0:00:18 lr: 0.005115 min_lr: 0.005115 loss: 3.3668 (3.3438) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0010 max mem: 55573 Epoch: [134] [140/156] eta: 0:00:10 lr: 0.005112 min_lr: 0.005112 loss: 3.4799 (3.3564) weight_decay: 0.0500 (0.0500) time: 0.5847 data: 0.0009 max mem: 55573 Epoch: [134] [150/156] eta: 0:00:04 lr: 0.005109 min_lr: 0.005109 loss: 3.4568 (3.3564) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [134] [155/156] eta: 0:00:00 lr: 0.005108 min_lr: 0.005108 loss: 3.4839 (3.3572) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [134] Total time: 0:01:46 (0.6821 s / it) Averaged stats: lr: 0.005108 min_lr: 0.005108 loss: 3.4839 (3.3995) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1719 (1.1719) acc1: 76.6927 (76.6927) acc5: 93.4896 (93.4896) time: 7.1589 data: 6.9214 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2603 (1.2462) acc1: 74.2188 (72.3520) acc5: 93.3594 (91.1680) time: 1.5782 data: 1.3844 max mem: 55573 Test: Total time: 0:00:08 (1.6179 s / it) * Acc@1 73.274 Acc@5 91.724 loss 1.233 Accuracy of the model on the 50000 test images: 73.3% Max accuracy: 74.15% Test: [0/5] eta: 0:00:33 loss: 2.5074 (2.5074) acc1: 41.9271 (41.9271) acc5: 69.9219 (69.9219) time: 6.7953 data: 6.5593 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.5348 (2.6312) acc1: 41.9271 (40.3840) acc5: 69.9219 (66.7520) time: 1.5044 data: 1.3120 max mem: 55573 Test: Total time: 0:00:07 (1.5233 s / it) * Acc@1 38.446 Acc@5 64.536 loss 2.802 Accuracy of the model EMA on 50000 test images: 38.4% Max EMA accuracy: 38.45% Epoch: [135] [ 0/156] eta: 0:31:23 lr: 0.005108 min_lr: 0.005108 loss: 3.6188 (3.6188) weight_decay: 0.0500 (0.0500) time: 12.0758 data: 8.8046 max mem: 55573 Epoch: [135] [ 10/156] eta: 0:04:19 lr: 0.005105 min_lr: 0.005105 loss: 3.3493 (3.3597) weight_decay: 0.0500 (0.0500) time: 1.7797 data: 0.9405 max mem: 55573 Epoch: [135] [ 20/156] eta: 0:02:44 lr: 0.005102 min_lr: 0.005102 loss: 3.3333 (3.2669) weight_decay: 0.0500 (0.0500) time: 0.6701 data: 0.0773 max mem: 55573 Epoch: [135] [ 30/156] eta: 0:02:07 lr: 0.005099 min_lr: 0.005099 loss: 3.4652 (3.3252) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [135] [ 40/156] eta: 0:01:45 lr: 0.005097 min_lr: 0.005097 loss: 3.5351 (3.3226) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [135] [ 50/156] eta: 0:01:29 lr: 0.005094 min_lr: 0.005094 loss: 3.3529 (3.3004) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [135] [ 60/156] eta: 0:01:17 lr: 0.005091 min_lr: 0.005091 loss: 3.2870 (3.3039) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [135] [ 70/156] eta: 0:01:06 lr: 0.005088 min_lr: 0.005088 loss: 3.3326 (3.3063) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [135] [ 80/156] eta: 0:00:57 lr: 0.005086 min_lr: 0.005086 loss: 3.3282 (3.2988) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0004 max mem: 55573 Epoch: [135] [ 90/156] eta: 0:00:48 lr: 0.005083 min_lr: 0.005083 loss: 3.6224 (3.3410) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0004 max mem: 55573 Epoch: [135] [100/156] eta: 0:00:40 lr: 0.005080 min_lr: 0.005080 loss: 3.6448 (3.3516) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [135] [110/156] eta: 0:00:32 lr: 0.005077 min_lr: 0.005077 loss: 3.4838 (3.3427) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [135] [120/156] eta: 0:00:25 lr: 0.005074 min_lr: 0.005074 loss: 3.4052 (3.3379) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [135] [130/156] eta: 0:00:17 lr: 0.005072 min_lr: 0.005072 loss: 3.5516 (3.3574) weight_decay: 0.0500 (0.0500) time: 0.5945 data: 0.0011 max mem: 55573 Epoch: [135] [140/156] eta: 0:00:10 lr: 0.005069 min_lr: 0.005069 loss: 3.7234 (3.3674) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [135] [150/156] eta: 0:00:04 lr: 0.005066 min_lr: 0.005066 loss: 3.7739 (3.3859) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [135] [155/156] eta: 0:00:00 lr: 0.005065 min_lr: 0.005065 loss: 3.6944 (3.3870) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [135] Total time: 0:01:46 (0.6812 s / it) Averaged stats: lr: 0.005065 min_lr: 0.005065 loss: 3.6944 (3.4095) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.2982 (1.2982) acc1: 76.1719 (76.1719) acc5: 94.4010 (94.4010) time: 6.9635 data: 6.7265 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3240 (1.3698) acc1: 75.0000 (72.6400) acc5: 94.4010 (91.7120) time: 1.5391 data: 1.3454 max mem: 55573 Test: Total time: 0:00:07 (1.5790 s / it) * Acc@1 73.194 Acc@5 91.836 loss 1.374 Accuracy of the model on the 50000 test images: 73.2% Max accuracy: 74.15% Test: [0/5] eta: 0:00:35 loss: 2.4267 (2.4267) acc1: 43.3594 (43.3594) acc5: 71.2240 (71.2240) time: 7.1928 data: 6.9568 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.4502 (2.5400) acc1: 43.3594 (41.6960) acc5: 71.2240 (68.2880) time: 1.5839 data: 1.3915 max mem: 55573 Test: Total time: 0:00:08 (1.6082 s / it) * Acc@1 39.938 Acc@5 66.202 loss 2.704 Accuracy of the model EMA on 50000 test images: 39.9% Max EMA accuracy: 39.94% Epoch: [136] [ 0/156] eta: 0:31:39 lr: 0.005065 min_lr: 0.005065 loss: 3.4973 (3.4973) weight_decay: 0.0500 (0.0500) time: 12.1765 data: 11.5748 max mem: 55573 Epoch: [136] [ 10/156] eta: 0:04:04 lr: 0.005062 min_lr: 0.005062 loss: 3.5683 (3.4011) weight_decay: 0.0500 (0.0500) time: 1.6737 data: 1.0527 max mem: 55573 Epoch: [136] [ 20/156] eta: 0:02:37 lr: 0.005059 min_lr: 0.005059 loss: 3.5849 (3.3942) weight_decay: 0.0500 (0.0500) time: 0.6071 data: 0.0005 max mem: 55573 Epoch: [136] [ 30/156] eta: 0:02:02 lr: 0.005056 min_lr: 0.005056 loss: 3.5849 (3.4379) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [136] [ 40/156] eta: 0:01:42 lr: 0.005053 min_lr: 0.005053 loss: 3.4783 (3.4211) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [136] [ 50/156] eta: 0:01:27 lr: 0.005051 min_lr: 0.005051 loss: 3.4796 (3.4128) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [136] [ 60/156] eta: 0:01:15 lr: 0.005048 min_lr: 0.005048 loss: 3.4796 (3.4210) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [136] [ 70/156] eta: 0:01:05 lr: 0.005045 min_lr: 0.005045 loss: 3.6315 (3.4411) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [136] [ 80/156] eta: 0:00:56 lr: 0.005042 min_lr: 0.005042 loss: 3.4469 (3.4101) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [136] [ 90/156] eta: 0:00:47 lr: 0.005040 min_lr: 0.005040 loss: 3.2628 (3.3814) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [136] [100/156] eta: 0:00:39 lr: 0.005037 min_lr: 0.005037 loss: 2.9583 (3.3330) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [136] [110/156] eta: 0:00:32 lr: 0.005034 min_lr: 0.005034 loss: 3.1271 (3.3510) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [136] [120/156] eta: 0:00:24 lr: 0.005031 min_lr: 0.005031 loss: 3.6190 (3.3618) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [136] [130/156] eta: 0:00:17 lr: 0.005028 min_lr: 0.005028 loss: 3.5370 (3.3727) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0010 max mem: 55573 Epoch: [136] [140/156] eta: 0:00:10 lr: 0.005026 min_lr: 0.005026 loss: 3.5842 (3.3856) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0009 max mem: 55573 Epoch: [136] [150/156] eta: 0:00:04 lr: 0.005023 min_lr: 0.005023 loss: 3.6588 (3.3993) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [136] [155/156] eta: 0:00:00 lr: 0.005021 min_lr: 0.005021 loss: 3.6696 (3.4008) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [136] Total time: 0:01:44 (0.6726 s / it) Averaged stats: lr: 0.005021 min_lr: 0.005021 loss: 3.6696 (3.4095) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1246 (1.1246) acc1: 77.4740 (77.4740) acc5: 94.4010 (94.4010) time: 7.1868 data: 6.9494 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2261 (1.2617) acc1: 75.2604 (72.5120) acc5: 93.6198 (91.4560) time: 1.5836 data: 1.3899 max mem: 55573 Test: Total time: 0:00:08 (1.6267 s / it) * Acc@1 73.442 Acc@5 91.914 loss 1.228 Accuracy of the model on the 50000 test images: 73.4% Max accuracy: 74.15% Test: [0/5] eta: 0:00:35 loss: 2.3502 (2.3502) acc1: 45.0521 (45.0521) acc5: 72.2656 (72.2656) time: 7.1513 data: 6.9152 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.3716 (2.4545) acc1: 45.0521 (43.1680) acc5: 72.2656 (69.5360) time: 1.5756 data: 1.3832 max mem: 55573 Test: Total time: 0:00:07 (1.5953 s / it) * Acc@1 41.284 Acc@5 67.584 loss 2.613 Accuracy of the model EMA on 50000 test images: 41.3% Max EMA accuracy: 41.28% Epoch: [137] [ 0/156] eta: 0:36:55 lr: 0.005021 min_lr: 0.005021 loss: 4.0033 (4.0033) weight_decay: 0.0500 (0.0500) time: 14.1987 data: 11.7106 max mem: 55573 Epoch: [137] [ 10/156] eta: 0:04:26 lr: 0.005018 min_lr: 0.005018 loss: 3.0694 (3.2148) weight_decay: 0.0500 (0.0500) time: 1.8279 data: 1.0650 max mem: 55573 Epoch: [137] [ 20/156] eta: 0:02:48 lr: 0.005016 min_lr: 0.005016 loss: 3.2831 (3.3319) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [137] [ 30/156] eta: 0:02:09 lr: 0.005013 min_lr: 0.005013 loss: 3.5954 (3.3968) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [137] [ 40/156] eta: 0:01:47 lr: 0.005010 min_lr: 0.005010 loss: 3.1988 (3.3138) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [137] [ 50/156] eta: 0:01:30 lr: 0.005007 min_lr: 0.005007 loss: 3.2558 (3.3355) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [137] [ 60/156] eta: 0:01:18 lr: 0.005004 min_lr: 0.005004 loss: 3.4780 (3.3194) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [137] [ 70/156] eta: 0:01:07 lr: 0.005002 min_lr: 0.005002 loss: 3.2661 (3.3050) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [137] [ 80/156] eta: 0:00:57 lr: 0.004999 min_lr: 0.004999 loss: 3.3919 (3.3380) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [137] [ 90/156] eta: 0:00:48 lr: 0.004996 min_lr: 0.004996 loss: 3.3919 (3.3056) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [137] [100/156] eta: 0:00:40 lr: 0.004993 min_lr: 0.004993 loss: 3.5112 (3.3189) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0005 max mem: 55573 Epoch: [137] [110/156] eta: 0:00:32 lr: 0.004991 min_lr: 0.004991 loss: 3.6124 (3.3298) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [137] [120/156] eta: 0:00:25 lr: 0.004988 min_lr: 0.004988 loss: 3.4258 (3.3287) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [137] [130/156] eta: 0:00:18 lr: 0.004985 min_lr: 0.004985 loss: 3.2593 (3.3148) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0015 max mem: 55573 Epoch: [137] [140/156] eta: 0:00:10 lr: 0.004982 min_lr: 0.004982 loss: 3.2593 (3.3137) weight_decay: 0.0500 (0.0500) time: 0.5876 data: 0.0013 max mem: 55573 Epoch: [137] [150/156] eta: 0:00:04 lr: 0.004979 min_lr: 0.004979 loss: 3.5470 (3.3207) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [137] [155/156] eta: 0:00:00 lr: 0.004978 min_lr: 0.004978 loss: 3.5470 (3.3268) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [137] Total time: 0:01:46 (0.6837 s / it) Averaged stats: lr: 0.004978 min_lr: 0.004978 loss: 3.5470 (3.4002) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1940 (1.1940) acc1: 77.2135 (77.2135) acc5: 93.8802 (93.8802) time: 6.8638 data: 6.6266 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3060 (1.2974) acc1: 76.8229 (73.2480) acc5: 92.9688 (91.6800) time: 1.5181 data: 1.3254 max mem: 55573 Test: Total time: 0:00:07 (1.5354 s / it) * Acc@1 73.470 Acc@5 91.918 loss 1.294 Accuracy of the model on the 50000 test images: 73.5% Max accuracy: 74.15% Test: [0/5] eta: 0:00:35 loss: 2.2785 (2.2785) acc1: 46.4844 (46.4844) acc5: 73.1771 (73.1771) time: 7.1299 data: 6.8939 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.2986 (2.3749) acc1: 46.4844 (44.2880) acc5: 73.1771 (70.3040) time: 1.5713 data: 1.3789 max mem: 55573 Test: Total time: 0:00:07 (1.5896 s / it) * Acc@1 42.702 Acc@5 68.922 loss 2.527 Accuracy of the model EMA on 50000 test images: 42.7% Max EMA accuracy: 42.70% Epoch: [138] [ 0/156] eta: 0:31:26 lr: 0.004978 min_lr: 0.004978 loss: 2.6817 (2.6817) weight_decay: 0.0500 (0.0500) time: 12.0950 data: 10.3102 max mem: 55573 Epoch: [138] [ 10/156] eta: 0:04:22 lr: 0.004975 min_lr: 0.004975 loss: 2.7884 (3.0561) weight_decay: 0.0500 (0.0500) time: 1.7962 data: 0.9376 max mem: 55573 Epoch: [138] [ 20/156] eta: 0:02:46 lr: 0.004972 min_lr: 0.004972 loss: 2.8315 (3.1297) weight_decay: 0.0500 (0.0500) time: 0.6780 data: 0.0004 max mem: 55573 Epoch: [138] [ 30/156] eta: 0:02:08 lr: 0.004969 min_lr: 0.004969 loss: 3.1125 (3.1227) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [138] [ 40/156] eta: 0:01:46 lr: 0.004967 min_lr: 0.004967 loss: 3.4907 (3.2365) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [138] [ 50/156] eta: 0:01:30 lr: 0.004964 min_lr: 0.004964 loss: 3.5929 (3.2792) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [138] [ 60/156] eta: 0:01:17 lr: 0.004961 min_lr: 0.004961 loss: 3.5955 (3.3017) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [138] [ 70/156] eta: 0:01:06 lr: 0.004958 min_lr: 0.004958 loss: 3.3375 (3.2543) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [138] [ 80/156] eta: 0:00:57 lr: 0.004955 min_lr: 0.004955 loss: 2.8134 (3.2500) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [138] [ 90/156] eta: 0:00:48 lr: 0.004953 min_lr: 0.004953 loss: 3.0597 (3.2409) weight_decay: 0.0500 (0.0500) time: 0.5947 data: 0.0004 max mem: 55573 Epoch: [138] [100/156] eta: 0:00:40 lr: 0.004950 min_lr: 0.004950 loss: 3.4624 (3.2683) weight_decay: 0.0500 (0.0500) time: 0.5964 data: 0.0004 max mem: 55573 Epoch: [138] [110/156] eta: 0:00:32 lr: 0.004947 min_lr: 0.004947 loss: 3.6209 (3.2904) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [138] [120/156] eta: 0:00:25 lr: 0.004944 min_lr: 0.004944 loss: 3.6772 (3.3095) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [138] [130/156] eta: 0:00:18 lr: 0.004941 min_lr: 0.004941 loss: 3.3837 (3.3058) weight_decay: 0.0500 (0.0500) time: 0.5883 data: 0.0010 max mem: 55573 Epoch: [138] [140/156] eta: 0:00:10 lr: 0.004939 min_lr: 0.004939 loss: 3.2634 (3.3085) weight_decay: 0.0500 (0.0500) time: 0.5849 data: 0.0009 max mem: 55573 Epoch: [138] [150/156] eta: 0:00:04 lr: 0.004936 min_lr: 0.004936 loss: 3.3603 (3.3214) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [138] [155/156] eta: 0:00:00 lr: 0.004934 min_lr: 0.004934 loss: 3.3680 (3.3220) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [138] Total time: 0:01:46 (0.6816 s / it) Averaged stats: lr: 0.004934 min_lr: 0.004934 loss: 3.3680 (3.4030) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.2375 (1.2375) acc1: 78.3854 (78.3854) acc5: 94.7917 (94.7917) time: 7.2189 data: 6.9816 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3480 (1.3673) acc1: 75.6510 (73.3120) acc5: 93.6198 (91.3600) time: 1.5902 data: 1.3964 max mem: 55573 Test: Total time: 0:00:08 (1.6352 s / it) * Acc@1 74.010 Acc@5 92.184 loss 1.335 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 74.15% Test: [0/5] eta: 0:00:35 loss: 2.2097 (2.2097) acc1: 47.9167 (47.9167) acc5: 74.8698 (74.8698) time: 7.1341 data: 6.8980 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.2296 (2.3001) acc1: 47.9167 (45.5040) acc5: 74.8698 (71.4880) time: 1.5722 data: 1.3797 max mem: 55573 Test: Total time: 0:00:07 (1.5915 s / it) * Acc@1 44.010 Acc@5 70.194 loss 2.447 Accuracy of the model EMA on 50000 test images: 44.0% Max EMA accuracy: 44.01% Epoch: [139] [ 0/156] eta: 0:34:49 lr: 0.004934 min_lr: 0.004934 loss: 2.7249 (2.7249) weight_decay: 0.0500 (0.0500) time: 13.3946 data: 10.2349 max mem: 55573 Epoch: [139] [ 10/156] eta: 0:04:17 lr: 0.004931 min_lr: 0.004931 loss: 3.6492 (3.5529) weight_decay: 0.0500 (0.0500) time: 1.7644 data: 0.9309 max mem: 55573 Epoch: [139] [ 20/156] eta: 0:02:43 lr: 0.004929 min_lr: 0.004929 loss: 3.5365 (3.4367) weight_decay: 0.0500 (0.0500) time: 0.5958 data: 0.0005 max mem: 55573 Epoch: [139] [ 30/156] eta: 0:02:06 lr: 0.004926 min_lr: 0.004926 loss: 3.5250 (3.4740) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [139] [ 40/156] eta: 0:01:45 lr: 0.004923 min_lr: 0.004923 loss: 3.5825 (3.4782) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [139] [ 50/156] eta: 0:01:29 lr: 0.004920 min_lr: 0.004920 loss: 3.5760 (3.4630) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [139] [ 60/156] eta: 0:01:17 lr: 0.004917 min_lr: 0.004917 loss: 3.4170 (3.4257) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [139] [ 70/156] eta: 0:01:06 lr: 0.004915 min_lr: 0.004915 loss: 3.4170 (3.4146) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [139] [ 80/156] eta: 0:00:57 lr: 0.004912 min_lr: 0.004912 loss: 3.6928 (3.4664) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [139] [ 90/156] eta: 0:00:48 lr: 0.004909 min_lr: 0.004909 loss: 3.7743 (3.4506) weight_decay: 0.0500 (0.0500) time: 0.5941 data: 0.0004 max mem: 55573 Epoch: [139] [100/156] eta: 0:00:40 lr: 0.004906 min_lr: 0.004906 loss: 3.2561 (3.4343) weight_decay: 0.0500 (0.0500) time: 0.5942 data: 0.0004 max mem: 55573 Epoch: [139] [110/156] eta: 0:00:32 lr: 0.004903 min_lr: 0.004903 loss: 3.2561 (3.4160) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [139] [120/156] eta: 0:00:25 lr: 0.004901 min_lr: 0.004901 loss: 3.5903 (3.4206) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [139] [130/156] eta: 0:00:17 lr: 0.004898 min_lr: 0.004898 loss: 3.5903 (3.4329) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0011 max mem: 55573 Epoch: [139] [140/156] eta: 0:00:10 lr: 0.004895 min_lr: 0.004895 loss: 3.6313 (3.4397) weight_decay: 0.0500 (0.0500) time: 0.5846 data: 0.0009 max mem: 55573 Epoch: [139] [150/156] eta: 0:00:04 lr: 0.004892 min_lr: 0.004892 loss: 3.6470 (3.4494) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [139] [155/156] eta: 0:00:00 lr: 0.004891 min_lr: 0.004891 loss: 3.4035 (3.4377) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [139] Total time: 0:01:45 (0.6791 s / it) Averaged stats: lr: 0.004891 min_lr: 0.004891 loss: 3.4035 (3.4025) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.0642 (1.0642) acc1: 78.5156 (78.5156) acc5: 94.6615 (94.6615) time: 6.6488 data: 6.4117 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1766 (1.1905) acc1: 75.1302 (73.1840) acc5: 94.3396 (92.6400) time: 1.5523 data: 1.3596 max mem: 55573 Test: Total time: 0:00:07 (1.5842 s / it) * Acc@1 74.212 Acc@5 92.294 loss 1.179 Accuracy of the model on the 50000 test images: 74.2% Max accuracy: 74.21% Test: [0/5] eta: 0:00:32 loss: 2.1467 (2.1467) acc1: 49.7396 (49.7396) acc5: 75.5208 (75.5208) time: 6.5638 data: 6.3279 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.1666 (2.2322) acc1: 49.7396 (46.7520) acc5: 75.5208 (72.5120) time: 1.4581 data: 1.2657 max mem: 55573 Test: Total time: 0:00:07 (1.4796 s / it) * Acc@1 45.230 Acc@5 71.368 loss 2.374 Accuracy of the model EMA on 50000 test images: 45.2% Max EMA accuracy: 45.23% Epoch: [140] [ 0/156] eta: 0:37:25 lr: 0.004890 min_lr: 0.004890 loss: 3.8380 (3.8380) weight_decay: 0.0500 (0.0500) time: 14.3950 data: 11.9485 max mem: 55573 Epoch: [140] [ 10/156] eta: 0:04:30 lr: 0.004888 min_lr: 0.004888 loss: 3.6261 (3.3332) weight_decay: 0.0500 (0.0500) time: 1.8552 data: 1.0866 max mem: 55573 Epoch: [140] [ 20/156] eta: 0:02:50 lr: 0.004885 min_lr: 0.004885 loss: 3.0788 (3.2810) weight_decay: 0.0500 (0.0500) time: 0.5961 data: 0.0004 max mem: 55573 Epoch: [140] [ 30/156] eta: 0:02:10 lr: 0.004882 min_lr: 0.004882 loss: 3.4116 (3.3873) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [140] [ 40/156] eta: 0:01:47 lr: 0.004879 min_lr: 0.004879 loss: 3.5902 (3.4061) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [140] [ 50/156] eta: 0:01:31 lr: 0.004876 min_lr: 0.004876 loss: 3.5576 (3.3859) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [140] [ 60/156] eta: 0:01:18 lr: 0.004874 min_lr: 0.004874 loss: 3.5576 (3.4227) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [140] [ 70/156] eta: 0:01:07 lr: 0.004871 min_lr: 0.004871 loss: 3.4371 (3.3785) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [140] [ 80/156] eta: 0:00:57 lr: 0.004868 min_lr: 0.004868 loss: 3.4137 (3.4069) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [140] [ 90/156] eta: 0:00:49 lr: 0.004865 min_lr: 0.004865 loss: 3.6583 (3.4184) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [140] [100/156] eta: 0:00:40 lr: 0.004862 min_lr: 0.004862 loss: 3.4837 (3.4150) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [140] [110/156] eta: 0:00:32 lr: 0.004860 min_lr: 0.004860 loss: 3.4551 (3.4041) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [140] [120/156] eta: 0:00:25 lr: 0.004857 min_lr: 0.004857 loss: 3.4174 (3.4003) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [140] [130/156] eta: 0:00:18 lr: 0.004854 min_lr: 0.004854 loss: 3.2138 (3.3881) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0010 max mem: 55573 Epoch: [140] [140/156] eta: 0:00:11 lr: 0.004851 min_lr: 0.004851 loss: 3.3292 (3.4022) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0008 max mem: 55573 Epoch: [140] [150/156] eta: 0:00:04 lr: 0.004848 min_lr: 0.004848 loss: 3.6011 (3.4090) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [140] [155/156] eta: 0:00:00 lr: 0.004847 min_lr: 0.004847 loss: 3.5496 (3.4116) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [140] Total time: 0:01:46 (0.6858 s / it) Averaged stats: lr: 0.004847 min_lr: 0.004847 loss: 3.5496 (3.4027) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.2517 (1.2517) acc1: 78.1250 (78.1250) acc5: 93.8802 (93.8802) time: 7.3237 data: 7.0863 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3050 (1.3590) acc1: 75.0000 (72.2880) acc5: 93.0990 (91.4240) time: 1.6109 data: 1.4173 max mem: 55573 Test: Total time: 0:00:08 (1.6586 s / it) * Acc@1 73.372 Acc@5 91.932 loss 1.344 Accuracy of the model on the 50000 test images: 73.4% Max accuracy: 74.21% Test: [0/5] eta: 0:00:36 loss: 2.0845 (2.0845) acc1: 50.3906 (50.3906) acc5: 76.5625 (76.5625) time: 7.2698 data: 7.0337 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.1068 (2.1673) acc1: 50.3906 (47.7120) acc5: 76.5625 (73.6640) time: 1.6017 data: 1.4093 max mem: 55573 Test: Total time: 0:00:08 (1.6193 s / it) * Acc@1 46.350 Acc@5 72.528 loss 2.303 Accuracy of the model EMA on 50000 test images: 46.4% Max EMA accuracy: 46.35% Epoch: [141] [ 0/156] eta: 0:35:35 lr: 0.004847 min_lr: 0.004847 loss: 3.6784 (3.6784) weight_decay: 0.0500 (0.0500) time: 13.6883 data: 9.3604 max mem: 55573 Epoch: [141] [ 10/156] eta: 0:04:21 lr: 0.004844 min_lr: 0.004844 loss: 3.6784 (3.6256) weight_decay: 0.0500 (0.0500) time: 1.7887 data: 0.8514 max mem: 55573 Epoch: [141] [ 20/156] eta: 0:02:45 lr: 0.004841 min_lr: 0.004841 loss: 3.6348 (3.5139) weight_decay: 0.0500 (0.0500) time: 0.5946 data: 0.0004 max mem: 55573 Epoch: [141] [ 30/156] eta: 0:02:07 lr: 0.004838 min_lr: 0.004838 loss: 3.2135 (3.3878) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [141] [ 40/156] eta: 0:01:45 lr: 0.004835 min_lr: 0.004835 loss: 3.1596 (3.3128) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [141] [ 50/156] eta: 0:01:29 lr: 0.004833 min_lr: 0.004833 loss: 3.4294 (3.3385) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [141] [ 60/156] eta: 0:01:17 lr: 0.004830 min_lr: 0.004830 loss: 3.5694 (3.3555) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [141] [ 70/156] eta: 0:01:06 lr: 0.004827 min_lr: 0.004827 loss: 3.5583 (3.3806) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [141] [ 80/156] eta: 0:00:57 lr: 0.004824 min_lr: 0.004824 loss: 3.5584 (3.3715) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [141] [ 90/156] eta: 0:00:48 lr: 0.004821 min_lr: 0.004821 loss: 3.5584 (3.3693) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [141] [100/156] eta: 0:00:40 lr: 0.004819 min_lr: 0.004819 loss: 3.2033 (3.3629) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [141] [110/156] eta: 0:00:32 lr: 0.004816 min_lr: 0.004816 loss: 3.3351 (3.3753) weight_decay: 0.0500 (0.0500) time: 0.5954 data: 0.0005 max mem: 55573 Epoch: [141] [120/156] eta: 0:00:25 lr: 0.004813 min_lr: 0.004813 loss: 3.3351 (3.3488) weight_decay: 0.0500 (0.0500) time: 0.5959 data: 0.0005 max mem: 55573 Epoch: [141] [130/156] eta: 0:00:17 lr: 0.004810 min_lr: 0.004810 loss: 3.1434 (3.3490) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0013 max mem: 55573 Epoch: [141] [140/156] eta: 0:00:10 lr: 0.004807 min_lr: 0.004807 loss: 3.3979 (3.3522) weight_decay: 0.0500 (0.0500) time: 0.5844 data: 0.0012 max mem: 55573 Epoch: [141] [150/156] eta: 0:00:04 lr: 0.004804 min_lr: 0.004804 loss: 3.3842 (3.3442) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [141] [155/156] eta: 0:00:00 lr: 0.004803 min_lr: 0.004803 loss: 3.4783 (3.3567) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [141] Total time: 0:01:46 (0.6813 s / it) Averaged stats: lr: 0.004803 min_lr: 0.004803 loss: 3.4783 (3.3904) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3531 (1.3531) acc1: 75.7812 (75.7812) acc5: 93.3594 (93.3594) time: 7.0670 data: 6.8296 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4292 (1.4805) acc1: 74.7396 (71.9040) acc5: 93.3594 (91.1040) time: 1.5598 data: 1.3660 max mem: 55573 Test: Total time: 0:00:08 (1.6097 s / it) * Acc@1 73.500 Acc@5 91.736 loss 1.472 Accuracy of the model on the 50000 test images: 73.5% Max accuracy: 74.21% Test: [0/5] eta: 0:00:33 loss: 2.0274 (2.0274) acc1: 51.8229 (51.8229) acc5: 77.8646 (77.8646) time: 6.6867 data: 6.4505 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 2.0526 (2.1088) acc1: 51.8229 (48.7040) acc5: 77.7344 (74.7520) time: 1.5394 data: 1.3470 max mem: 55573 Test: Total time: 0:00:07 (1.5555 s / it) * Acc@1 47.380 Acc@5 73.544 loss 2.240 Accuracy of the model EMA on 50000 test images: 47.4% Max EMA accuracy: 47.38% Epoch: [142] [ 0/156] eta: 0:33:41 lr: 0.004803 min_lr: 0.004803 loss: 2.6037 (2.6037) weight_decay: 0.0500 (0.0500) time: 12.9561 data: 8.9688 max mem: 55573 Epoch: [142] [ 10/156] eta: 0:04:11 lr: 0.004800 min_lr: 0.004800 loss: 3.1093 (3.1875) weight_decay: 0.0500 (0.0500) time: 1.7235 data: 0.8158 max mem: 55573 Epoch: [142] [ 20/156] eta: 0:02:40 lr: 0.004797 min_lr: 0.004797 loss: 3.5464 (3.3204) weight_decay: 0.0500 (0.0500) time: 0.5951 data: 0.0005 max mem: 55573 Epoch: [142] [ 30/156] eta: 0:02:05 lr: 0.004794 min_lr: 0.004794 loss: 3.7239 (3.3860) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [142] [ 40/156] eta: 0:01:43 lr: 0.004791 min_lr: 0.004791 loss: 3.6908 (3.3572) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [142] [ 50/156] eta: 0:01:28 lr: 0.004789 min_lr: 0.004789 loss: 3.5567 (3.3608) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [142] [ 60/156] eta: 0:01:16 lr: 0.004786 min_lr: 0.004786 loss: 3.5567 (3.3503) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [142] [ 70/156] eta: 0:01:05 lr: 0.004783 min_lr: 0.004783 loss: 3.5339 (3.3685) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [142] [ 80/156] eta: 0:00:56 lr: 0.004780 min_lr: 0.004780 loss: 3.5168 (3.3640) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [142] [ 90/156] eta: 0:00:48 lr: 0.004777 min_lr: 0.004777 loss: 3.4120 (3.3612) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [142] [100/156] eta: 0:00:39 lr: 0.004775 min_lr: 0.004775 loss: 3.5480 (3.3827) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [142] [110/156] eta: 0:00:32 lr: 0.004772 min_lr: 0.004772 loss: 3.6857 (3.4110) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [142] [120/156] eta: 0:00:24 lr: 0.004769 min_lr: 0.004769 loss: 3.7082 (3.4343) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [142] [130/156] eta: 0:00:17 lr: 0.004766 min_lr: 0.004766 loss: 3.6407 (3.4509) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0011 max mem: 55573 Epoch: [142] [140/156] eta: 0:00:10 lr: 0.004763 min_lr: 0.004763 loss: 3.4905 (3.4302) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0009 max mem: 55573 Epoch: [142] [150/156] eta: 0:00:04 lr: 0.004760 min_lr: 0.004760 loss: 3.1779 (3.4160) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [142] [155/156] eta: 0:00:00 lr: 0.004759 min_lr: 0.004759 loss: 3.3745 (3.4144) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [142] Total time: 0:01:45 (0.6762 s / it) Averaged stats: lr: 0.004759 min_lr: 0.004759 loss: 3.3745 (3.3976) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1411 (1.1411) acc1: 75.7812 (75.7812) acc5: 94.2708 (94.2708) time: 7.0892 data: 6.8519 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2229 (1.2798) acc1: 75.7812 (72.8960) acc5: 92.4528 (92.3200) time: 1.5642 data: 1.3704 max mem: 55573 Test: Total time: 0:00:08 (1.6046 s / it) * Acc@1 74.026 Acc@5 92.164 loss 1.266 Accuracy of the model on the 50000 test images: 74.0% Max accuracy: 74.21% Test: [0/5] eta: 0:00:34 loss: 1.9720 (1.9720) acc1: 52.4740 (52.4740) acc5: 78.9062 (78.9062) time: 6.8591 data: 6.6230 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.9994 (2.0517) acc1: 52.4740 (49.6000) acc5: 78.7760 (75.7760) time: 1.5171 data: 1.3247 max mem: 55573 Test: Total time: 0:00:07 (1.5478 s / it) * Acc@1 48.452 Acc@5 74.536 loss 2.177 Accuracy of the model EMA on 50000 test images: 48.5% Max EMA accuracy: 48.45% Epoch: [143] [ 0/156] eta: 0:36:34 lr: 0.004759 min_lr: 0.004759 loss: 3.2024 (3.2024) weight_decay: 0.0500 (0.0500) time: 14.0676 data: 8.1886 max mem: 55573 Epoch: [143] [ 10/156] eta: 0:04:24 lr: 0.004756 min_lr: 0.004756 loss: 3.6043 (3.5381) weight_decay: 0.0500 (0.0500) time: 1.8133 data: 0.7448 max mem: 55573 Epoch: [143] [ 20/156] eta: 0:02:47 lr: 0.004753 min_lr: 0.004753 loss: 3.7328 (3.5291) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [143] [ 30/156] eta: 0:02:09 lr: 0.004750 min_lr: 0.004750 loss: 3.7354 (3.5783) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [143] [ 40/156] eta: 0:01:46 lr: 0.004747 min_lr: 0.004747 loss: 3.6861 (3.5492) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [143] [ 50/156] eta: 0:01:30 lr: 0.004745 min_lr: 0.004745 loss: 3.4360 (3.5183) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [143] [ 60/156] eta: 0:01:17 lr: 0.004742 min_lr: 0.004742 loss: 3.4671 (3.5010) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [143] [ 70/156] eta: 0:01:07 lr: 0.004739 min_lr: 0.004739 loss: 3.5806 (3.4913) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [143] [ 80/156] eta: 0:00:57 lr: 0.004736 min_lr: 0.004736 loss: 3.5612 (3.4980) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0005 max mem: 55573 Epoch: [143] [ 90/156] eta: 0:00:48 lr: 0.004733 min_lr: 0.004733 loss: 3.5374 (3.4973) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [143] [100/156] eta: 0:00:40 lr: 0.004730 min_lr: 0.004730 loss: 3.4958 (3.4768) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [143] [110/156] eta: 0:00:32 lr: 0.004728 min_lr: 0.004728 loss: 3.6421 (3.4892) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [143] [120/156] eta: 0:00:25 lr: 0.004725 min_lr: 0.004725 loss: 3.5379 (3.4694) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [143] [130/156] eta: 0:00:18 lr: 0.004722 min_lr: 0.004722 loss: 3.5379 (3.4769) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0010 max mem: 55573 Epoch: [143] [140/156] eta: 0:00:10 lr: 0.004719 min_lr: 0.004719 loss: 3.6453 (3.4826) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0008 max mem: 55573 Epoch: [143] [150/156] eta: 0:00:04 lr: 0.004716 min_lr: 0.004716 loss: 3.6405 (3.4795) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [143] [155/156] eta: 0:00:00 lr: 0.004715 min_lr: 0.004715 loss: 3.6244 (3.4907) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [143] Total time: 0:01:46 (0.6824 s / it) Averaged stats: lr: 0.004715 min_lr: 0.004715 loss: 3.6244 (3.3947) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.3399 (1.3399) acc1: 78.5156 (78.5156) acc5: 94.1406 (94.1406) time: 7.1510 data: 6.9135 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3595 (1.4063) acc1: 75.5208 (73.8240) acc5: 94.1406 (91.9040) time: 1.5765 data: 1.3828 max mem: 55573 Test: Total time: 0:00:08 (1.6278 s / it) * Acc@1 74.204 Acc@5 92.306 loss 1.394 Accuracy of the model on the 50000 test images: 74.2% Max accuracy: 74.21% Test: [0/5] eta: 0:00:35 loss: 1.9209 (1.9209) acc1: 52.9948 (52.9948) acc5: 79.6875 (79.6875) time: 7.1736 data: 6.9376 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.9506 (1.9996) acc1: 52.9948 (50.3360) acc5: 79.4271 (76.6720) time: 1.5800 data: 1.3876 max mem: 55573 Test: Total time: 0:00:07 (1.5965 s / it) * Acc@1 49.468 Acc@5 75.476 loss 2.120 Accuracy of the model EMA on 50000 test images: 49.5% Max EMA accuracy: 49.47% Epoch: [144] [ 0/156] eta: 0:37:24 lr: 0.004715 min_lr: 0.004715 loss: 2.7820 (2.7820) weight_decay: 0.0500 (0.0500) time: 14.3891 data: 13.4251 max mem: 55573 Epoch: [144] [ 10/156] eta: 0:04:28 lr: 0.004712 min_lr: 0.004712 loss: 3.4056 (3.2741) weight_decay: 0.0500 (0.0500) time: 1.8421 data: 1.2210 max mem: 55573 Epoch: [144] [ 20/156] eta: 0:02:49 lr: 0.004709 min_lr: 0.004709 loss: 3.6079 (3.3675) weight_decay: 0.0500 (0.0500) time: 0.5880 data: 0.0005 max mem: 55573 Epoch: [144] [ 30/156] eta: 0:02:10 lr: 0.004706 min_lr: 0.004706 loss: 3.5363 (3.3383) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [144] [ 40/156] eta: 0:01:47 lr: 0.004703 min_lr: 0.004703 loss: 3.3991 (3.3654) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [144] [ 50/156] eta: 0:01:31 lr: 0.004700 min_lr: 0.004700 loss: 3.6326 (3.4370) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0004 max mem: 55573 Epoch: [144] [ 60/156] eta: 0:01:18 lr: 0.004698 min_lr: 0.004698 loss: 3.5794 (3.4339) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [144] [ 70/156] eta: 0:01:07 lr: 0.004695 min_lr: 0.004695 loss: 3.3822 (3.4190) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [144] [ 80/156] eta: 0:00:57 lr: 0.004692 min_lr: 0.004692 loss: 3.2864 (3.3918) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [144] [ 90/156] eta: 0:00:48 lr: 0.004689 min_lr: 0.004689 loss: 3.4274 (3.3920) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [144] [100/156] eta: 0:00:40 lr: 0.004686 min_lr: 0.004686 loss: 3.4248 (3.3778) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [144] [110/156] eta: 0:00:32 lr: 0.004683 min_lr: 0.004683 loss: 3.4248 (3.3932) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [144] [120/156] eta: 0:00:25 lr: 0.004681 min_lr: 0.004681 loss: 3.6008 (3.3881) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [144] [130/156] eta: 0:00:18 lr: 0.004678 min_lr: 0.004678 loss: 2.9199 (3.3575) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0011 max mem: 55573 Epoch: [144] [140/156] eta: 0:00:10 lr: 0.004675 min_lr: 0.004675 loss: 3.2919 (3.3727) weight_decay: 0.0500 (0.0500) time: 0.5864 data: 0.0010 max mem: 55573 Epoch: [144] [150/156] eta: 0:00:04 lr: 0.004672 min_lr: 0.004672 loss: 3.5902 (3.3762) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0002 max mem: 55573 Epoch: [144] [155/156] eta: 0:00:00 lr: 0.004671 min_lr: 0.004671 loss: 3.4961 (3.3722) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [144] Total time: 0:01:46 (0.6840 s / it) Averaged stats: lr: 0.004671 min_lr: 0.004671 loss: 3.4961 (3.3768) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.0768 (1.0768) acc1: 76.9531 (76.9531) acc5: 95.3125 (95.3125) time: 7.2163 data: 6.9790 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2772 (1.2713) acc1: 75.3906 (72.5760) acc5: 93.2292 (91.8720) time: 1.5898 data: 1.3959 max mem: 55573 Test: Total time: 0:00:08 (1.6343 s / it) * Acc@1 73.816 Acc@5 92.278 loss 1.243 Accuracy of the model on the 50000 test images: 73.8% Max accuracy: 74.21% Test: [0/5] eta: 0:00:33 loss: 1.8751 (1.8751) acc1: 54.8177 (54.8177) acc5: 79.9479 (79.9479) time: 6.7376 data: 6.5016 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.9063 (1.9516) acc1: 54.8177 (51.5840) acc5: 79.9479 (77.4080) time: 1.4929 data: 1.3005 max mem: 55573 Test: Total time: 0:00:07 (1.5129 s / it) * Acc@1 50.434 Acc@5 76.284 loss 2.067 Accuracy of the model EMA on 50000 test images: 50.4% Max EMA accuracy: 50.43% Epoch: [145] [ 0/156] eta: 0:39:42 lr: 0.004670 min_lr: 0.004670 loss: 3.6732 (3.6732) weight_decay: 0.0500 (0.0500) time: 15.2752 data: 7.2197 max mem: 55573 Epoch: [145] [ 10/156] eta: 0:04:41 lr: 0.004668 min_lr: 0.004668 loss: 3.6389 (3.5724) weight_decay: 0.0500 (0.0500) time: 1.9248 data: 0.6567 max mem: 55573 Epoch: [145] [ 20/156] eta: 0:02:55 lr: 0.004665 min_lr: 0.004665 loss: 3.5872 (3.5874) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [145] [ 30/156] eta: 0:02:14 lr: 0.004662 min_lr: 0.004662 loss: 3.4787 (3.4898) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0004 max mem: 55573 Epoch: [145] [ 40/156] eta: 0:01:50 lr: 0.004659 min_lr: 0.004659 loss: 3.6392 (3.5049) weight_decay: 0.0500 (0.0500) time: 0.6005 data: 0.0004 max mem: 55573 Epoch: [145] [ 50/156] eta: 0:01:33 lr: 0.004656 min_lr: 0.004656 loss: 3.6212 (3.4503) weight_decay: 0.0500 (0.0500) time: 0.5982 data: 0.0005 max mem: 55573 Epoch: [145] [ 60/156] eta: 0:01:20 lr: 0.004653 min_lr: 0.004653 loss: 3.2523 (3.4177) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [145] [ 70/156] eta: 0:01:08 lr: 0.004651 min_lr: 0.004651 loss: 2.9220 (3.3302) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [145] [ 80/156] eta: 0:00:58 lr: 0.004648 min_lr: 0.004648 loss: 3.0568 (3.3362) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [145] [ 90/156] eta: 0:00:49 lr: 0.004645 min_lr: 0.004645 loss: 3.3617 (3.3288) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [145] [100/156] eta: 0:00:41 lr: 0.004642 min_lr: 0.004642 loss: 3.5230 (3.3384) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [145] [110/156] eta: 0:00:33 lr: 0.004639 min_lr: 0.004639 loss: 3.5319 (3.3385) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [145] [120/156] eta: 0:00:25 lr: 0.004636 min_lr: 0.004636 loss: 3.4425 (3.3376) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [145] [130/156] eta: 0:00:18 lr: 0.004634 min_lr: 0.004634 loss: 3.5622 (3.3471) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0010 max mem: 55573 Epoch: [145] [140/156] eta: 0:00:11 lr: 0.004631 min_lr: 0.004631 loss: 3.5622 (3.3555) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.0009 max mem: 55573 Epoch: [145] [150/156] eta: 0:00:04 lr: 0.004628 min_lr: 0.004628 loss: 3.4447 (3.3547) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [145] [155/156] eta: 0:00:00 lr: 0.004626 min_lr: 0.004626 loss: 3.3853 (3.3459) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [145] Total time: 0:01:47 (0.6920 s / it) Averaged stats: lr: 0.004626 min_lr: 0.004626 loss: 3.3853 (3.3668) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1238 (1.1238) acc1: 78.5156 (78.5156) acc5: 94.9219 (94.9219) time: 6.8535 data: 6.6163 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2730 (1.2905) acc1: 75.9115 (73.2480) acc5: 94.2708 (91.8720) time: 1.5172 data: 1.3233 max mem: 55573 Test: Total time: 0:00:07 (1.5635 s / it) * Acc@1 73.570 Acc@5 92.000 loss 1.295 Accuracy of the model on the 50000 test images: 73.6% Max accuracy: 74.21% Test: [0/5] eta: 0:00:35 loss: 1.8319 (1.8319) acc1: 56.9010 (56.9010) acc5: 80.5990 (80.5990) time: 7.1767 data: 6.9408 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.8650 (1.9071) acc1: 55.9896 (52.6080) acc5: 80.5990 (78.0480) time: 1.5806 data: 1.3882 max mem: 55573 Test: Total time: 0:00:07 (1.5993 s / it) * Acc@1 51.378 Acc@5 77.024 loss 2.017 Accuracy of the model EMA on 50000 test images: 51.4% Max EMA accuracy: 51.38% Epoch: [146] [ 0/156] eta: 0:34:18 lr: 0.004626 min_lr: 0.004626 loss: 4.0272 (4.0272) weight_decay: 0.0500 (0.0500) time: 13.1982 data: 9.6638 max mem: 55573 Epoch: [146] [ 10/156] eta: 0:04:15 lr: 0.004623 min_lr: 0.004623 loss: 3.5909 (3.6565) weight_decay: 0.0500 (0.0500) time: 1.7509 data: 0.8789 max mem: 55573 Epoch: [146] [ 20/156] eta: 0:02:42 lr: 0.004620 min_lr: 0.004620 loss: 3.5554 (3.4657) weight_decay: 0.0500 (0.0500) time: 0.5979 data: 0.0004 max mem: 55573 Epoch: [146] [ 30/156] eta: 0:02:06 lr: 0.004618 min_lr: 0.004618 loss: 3.6599 (3.5400) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [146] [ 40/156] eta: 0:01:44 lr: 0.004615 min_lr: 0.004615 loss: 3.6777 (3.5165) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [146] [ 50/156] eta: 0:01:29 lr: 0.004612 min_lr: 0.004612 loss: 3.3735 (3.4298) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [146] [ 60/156] eta: 0:01:16 lr: 0.004609 min_lr: 0.004609 loss: 3.3103 (3.4274) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [146] [ 70/156] eta: 0:01:06 lr: 0.004606 min_lr: 0.004606 loss: 3.4147 (3.4190) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [146] [ 80/156] eta: 0:00:56 lr: 0.004603 min_lr: 0.004603 loss: 3.5595 (3.4196) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [146] [ 90/156] eta: 0:00:48 lr: 0.004601 min_lr: 0.004601 loss: 3.5595 (3.3920) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [146] [100/156] eta: 0:00:40 lr: 0.004598 min_lr: 0.004598 loss: 3.0780 (3.3640) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [146] [110/156] eta: 0:00:32 lr: 0.004595 min_lr: 0.004595 loss: 3.2583 (3.3529) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [146] [120/156] eta: 0:00:25 lr: 0.004592 min_lr: 0.004592 loss: 3.4323 (3.3600) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [146] [130/156] eta: 0:00:17 lr: 0.004589 min_lr: 0.004589 loss: 3.5991 (3.3584) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0011 max mem: 55573 Epoch: [146] [140/156] eta: 0:00:10 lr: 0.004586 min_lr: 0.004586 loss: 3.5586 (3.3744) weight_decay: 0.0500 (0.0500) time: 0.5870 data: 0.0010 max mem: 55573 Epoch: [146] [150/156] eta: 0:00:04 lr: 0.004584 min_lr: 0.004584 loss: 3.3622 (3.3648) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [146] [155/156] eta: 0:00:00 lr: 0.004582 min_lr: 0.004582 loss: 3.4010 (3.3668) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [146] Total time: 0:01:45 (0.6784 s / it) Averaged stats: lr: 0.004582 min_lr: 0.004582 loss: 3.4010 (3.3672) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0120 (1.0120) acc1: 79.4271 (79.4271) acc5: 96.0938 (96.0938) time: 7.0080 data: 6.7700 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1756 (1.1480) acc1: 75.9115 (74.2080) acc5: 93.8802 (92.5440) time: 1.5480 data: 1.3541 max mem: 55573 Test: Total time: 0:00:07 (1.5816 s / it) * Acc@1 74.516 Acc@5 92.438 loss 1.142 Accuracy of the model on the 50000 test images: 74.5% Max accuracy: 74.52% Test: [0/5] eta: 0:00:34 loss: 1.7924 (1.7924) acc1: 57.5521 (57.5521) acc5: 81.1198 (81.1198) time: 6.8190 data: 6.5829 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.8271 (1.8669) acc1: 56.1198 (53.2160) acc5: 81.1198 (78.5280) time: 1.5092 data: 1.3167 max mem: 55573 Test: Total time: 0:00:07 (1.5261 s / it) * Acc@1 52.200 Acc@5 77.656 loss 1.972 Accuracy of the model EMA on 50000 test images: 52.2% Max EMA accuracy: 52.20% Epoch: [147] [ 0/156] eta: 0:32:06 lr: 0.004582 min_lr: 0.004582 loss: 3.6801 (3.6801) weight_decay: 0.0500 (0.0500) time: 12.3501 data: 9.5425 max mem: 55573 Epoch: [147] [ 10/156] eta: 0:04:24 lr: 0.004579 min_lr: 0.004579 loss: 3.5410 (3.3388) weight_decay: 0.0500 (0.0500) time: 1.8092 data: 0.9112 max mem: 55573 Epoch: [147] [ 20/156] eta: 0:02:47 lr: 0.004576 min_lr: 0.004576 loss: 3.4657 (3.3535) weight_decay: 0.0500 (0.0500) time: 0.6718 data: 0.0243 max mem: 55573 Epoch: [147] [ 30/156] eta: 0:02:08 lr: 0.004573 min_lr: 0.004573 loss: 3.5843 (3.3995) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [147] [ 40/156] eta: 0:01:46 lr: 0.004570 min_lr: 0.004570 loss: 3.3994 (3.3294) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [147] [ 50/156] eta: 0:01:30 lr: 0.004568 min_lr: 0.004568 loss: 3.3994 (3.3649) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [147] [ 60/156] eta: 0:01:17 lr: 0.004565 min_lr: 0.004565 loss: 3.4987 (3.3357) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [147] [ 70/156] eta: 0:01:06 lr: 0.004562 min_lr: 0.004562 loss: 3.4899 (3.3399) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [147] [ 80/156] eta: 0:00:57 lr: 0.004559 min_lr: 0.004559 loss: 3.6371 (3.3818) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [147] [ 90/156] eta: 0:00:48 lr: 0.004556 min_lr: 0.004556 loss: 3.4984 (3.3740) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [147] [100/156] eta: 0:00:40 lr: 0.004553 min_lr: 0.004553 loss: 3.3483 (3.3604) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [147] [110/156] eta: 0:00:32 lr: 0.004550 min_lr: 0.004550 loss: 3.4396 (3.3542) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [147] [120/156] eta: 0:00:25 lr: 0.004548 min_lr: 0.004548 loss: 3.3584 (3.3390) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [147] [130/156] eta: 0:00:18 lr: 0.004545 min_lr: 0.004545 loss: 2.9680 (3.3189) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0010 max mem: 55573 Epoch: [147] [140/156] eta: 0:00:10 lr: 0.004542 min_lr: 0.004542 loss: 3.1954 (3.3198) weight_decay: 0.0500 (0.0500) time: 0.5884 data: 0.0009 max mem: 55573 Epoch: [147] [150/156] eta: 0:00:04 lr: 0.004539 min_lr: 0.004539 loss: 3.6082 (3.3305) weight_decay: 0.0500 (0.0500) time: 0.5832 data: 0.0001 max mem: 55573 Epoch: [147] [155/156] eta: 0:00:00 lr: 0.004538 min_lr: 0.004538 loss: 3.6082 (3.3372) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [147] Total time: 0:01:46 (0.6824 s / it) Averaged stats: lr: 0.004538 min_lr: 0.004538 loss: 3.6082 (3.3890) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1327 (1.1327) acc1: 78.2552 (78.2552) acc5: 94.9219 (94.9219) time: 6.8996 data: 6.6625 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2649 (1.2981) acc1: 75.6510 (73.0560) acc5: 94.6615 (91.9040) time: 1.5264 data: 1.3326 max mem: 55573 Test: Total time: 0:00:07 (1.5729 s / it) * Acc@1 73.762 Acc@5 92.214 loss 1.291 Accuracy of the model on the 50000 test images: 73.8% Max accuracy: 74.52% Test: [0/5] eta: 0:00:35 loss: 1.7561 (1.7561) acc1: 58.3333 (58.3333) acc5: 81.9010 (81.9010) time: 7.0317 data: 6.7955 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.7922 (1.8291) acc1: 56.3802 (53.7280) acc5: 81.9010 (79.1040) time: 1.5517 data: 1.3592 max mem: 55573 Test: Total time: 0:00:07 (1.5710 s / it) * Acc@1 52.976 Acc@5 78.234 loss 1.929 Accuracy of the model EMA on 50000 test images: 53.0% Max EMA accuracy: 52.98% Epoch: [148] [ 0/156] eta: 0:33:18 lr: 0.004537 min_lr: 0.004537 loss: 3.1926 (3.1926) weight_decay: 0.0500 (0.0500) time: 12.8106 data: 12.1979 max mem: 55573 Epoch: [148] [ 10/156] eta: 0:04:09 lr: 0.004535 min_lr: 0.004535 loss: 3.4614 (3.3038) weight_decay: 0.0500 (0.0500) time: 1.7112 data: 1.1092 max mem: 55573 Epoch: [148] [ 20/156] eta: 0:02:40 lr: 0.004532 min_lr: 0.004532 loss: 3.6304 (3.4203) weight_decay: 0.0500 (0.0500) time: 0.5955 data: 0.0004 max mem: 55573 Epoch: [148] [ 30/156] eta: 0:02:04 lr: 0.004529 min_lr: 0.004529 loss: 3.5890 (3.3076) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [148] [ 40/156] eta: 0:01:43 lr: 0.004526 min_lr: 0.004526 loss: 3.2188 (3.3225) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [148] [ 50/156] eta: 0:01:28 lr: 0.004523 min_lr: 0.004523 loss: 3.2282 (3.3000) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [148] [ 60/156] eta: 0:01:16 lr: 0.004520 min_lr: 0.004520 loss: 3.4373 (3.3191) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [148] [ 70/156] eta: 0:01:05 lr: 0.004517 min_lr: 0.004517 loss: 3.5487 (3.3380) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [148] [ 80/156] eta: 0:00:56 lr: 0.004515 min_lr: 0.004515 loss: 3.3703 (3.3196) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [148] [ 90/156] eta: 0:00:47 lr: 0.004512 min_lr: 0.004512 loss: 3.4136 (3.3271) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [148] [100/156] eta: 0:00:39 lr: 0.004509 min_lr: 0.004509 loss: 3.4240 (3.3133) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [148] [110/156] eta: 0:00:32 lr: 0.004506 min_lr: 0.004506 loss: 3.5164 (3.3320) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [148] [120/156] eta: 0:00:24 lr: 0.004503 min_lr: 0.004503 loss: 3.6274 (3.3253) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [148] [130/156] eta: 0:00:17 lr: 0.004500 min_lr: 0.004500 loss: 3.3523 (3.3286) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0010 max mem: 55573 Epoch: [148] [140/156] eta: 0:00:10 lr: 0.004497 min_lr: 0.004497 loss: 3.4990 (3.3417) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.0009 max mem: 55573 Epoch: [148] [150/156] eta: 0:00:04 lr: 0.004495 min_lr: 0.004495 loss: 3.4186 (3.3520) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [148] [155/156] eta: 0:00:00 lr: 0.004493 min_lr: 0.004493 loss: 3.4081 (3.3483) weight_decay: 0.0500 (0.0500) time: 0.5835 data: 0.0001 max mem: 55573 Epoch: [148] Total time: 0:01:45 (0.6752 s / it) Averaged stats: lr: 0.004493 min_lr: 0.004493 loss: 3.4081 (3.3810) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0904 (1.0904) acc1: 77.8646 (77.8646) acc5: 94.9219 (94.9219) time: 7.0277 data: 6.7903 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1515 (1.1864) acc1: 75.5208 (73.6960) acc5: 94.2708 (92.5760) time: 1.5521 data: 1.3581 max mem: 55573 Test: Total time: 0:00:07 (1.5950 s / it) * Acc@1 74.474 Acc@5 92.516 loss 1.195 Accuracy of the model on the 50000 test images: 74.5% Max accuracy: 74.52% Test: [0/5] eta: 0:00:35 loss: 1.7221 (1.7221) acc1: 59.1146 (59.1146) acc5: 82.2917 (82.2917) time: 7.1153 data: 6.8789 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.7585 (1.7933) acc1: 56.6406 (54.4000) acc5: 82.2917 (79.5520) time: 1.5683 data: 1.3759 max mem: 55573 Test: Total time: 0:00:07 (1.5974 s / it) * Acc@1 53.690 Acc@5 78.828 loss 1.889 Accuracy of the model EMA on 50000 test images: 53.7% Max EMA accuracy: 53.69% Epoch: [149] [ 0/156] eta: 0:37:53 lr: 0.004493 min_lr: 0.004493 loss: 2.9407 (2.9407) weight_decay: 0.0500 (0.0500) time: 14.5767 data: 13.0068 max mem: 55573 Epoch: [149] [ 10/156] eta: 0:04:38 lr: 0.004490 min_lr: 0.004490 loss: 3.3730 (3.3282) weight_decay: 0.0500 (0.0500) time: 1.9082 data: 1.1827 max mem: 55573 Epoch: [149] [ 20/156] eta: 0:02:54 lr: 0.004487 min_lr: 0.004487 loss: 3.4671 (3.4403) weight_decay: 0.0500 (0.0500) time: 0.6167 data: 0.0003 max mem: 55573 Epoch: [149] [ 30/156] eta: 0:02:13 lr: 0.004484 min_lr: 0.004484 loss: 3.4966 (3.4373) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [149] [ 40/156] eta: 0:01:49 lr: 0.004481 min_lr: 0.004481 loss: 3.4087 (3.3824) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [149] [ 50/156] eta: 0:01:32 lr: 0.004479 min_lr: 0.004479 loss: 3.4128 (3.3505) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [149] [ 60/156] eta: 0:01:19 lr: 0.004476 min_lr: 0.004476 loss: 3.4888 (3.3408) weight_decay: 0.0500 (0.0500) time: 0.5952 data: 0.0005 max mem: 55573 Epoch: [149] [ 70/156] eta: 0:01:08 lr: 0.004473 min_lr: 0.004473 loss: 3.6141 (3.3863) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0005 max mem: 55573 Epoch: [149] [ 80/156] eta: 0:00:58 lr: 0.004470 min_lr: 0.004470 loss: 3.6540 (3.3814) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [149] [ 90/156] eta: 0:00:49 lr: 0.004467 min_lr: 0.004467 loss: 3.5051 (3.3658) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [149] [100/156] eta: 0:00:41 lr: 0.004464 min_lr: 0.004464 loss: 3.5051 (3.3593) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [149] [110/156] eta: 0:00:33 lr: 0.004461 min_lr: 0.004461 loss: 3.5527 (3.3712) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [149] [120/156] eta: 0:00:25 lr: 0.004459 min_lr: 0.004459 loss: 3.6935 (3.3794) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [149] [130/156] eta: 0:00:18 lr: 0.004456 min_lr: 0.004456 loss: 3.4936 (3.3597) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0010 max mem: 55573 Epoch: [149] [140/156] eta: 0:00:11 lr: 0.004453 min_lr: 0.004453 loss: 2.9530 (3.3475) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0008 max mem: 55573 Epoch: [149] [150/156] eta: 0:00:04 lr: 0.004450 min_lr: 0.004450 loss: 3.4855 (3.3534) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [149] [155/156] eta: 0:00:00 lr: 0.004449 min_lr: 0.004449 loss: 2.9530 (3.3418) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [149] Total time: 0:01:47 (0.6903 s / it) Averaged stats: lr: 0.004449 min_lr: 0.004449 loss: 2.9530 (3.3566) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0111 (1.0111) acc1: 77.8646 (77.8646) acc5: 94.7917 (94.7917) time: 7.1382 data: 6.8973 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0976 (1.1140) acc1: 76.3021 (74.4000) acc5: 94.5312 (92.6400) time: 1.5740 data: 1.3796 max mem: 55573 Test: Total time: 0:00:08 (1.6190 s / it) * Acc@1 74.328 Acc@5 92.372 loss 1.113 Accuracy of the model on the 50000 test images: 74.3% Max accuracy: 74.52% Test: [0/5] eta: 0:00:33 loss: 1.6902 (1.6902) acc1: 60.0260 (60.0260) acc5: 82.5521 (82.5521) time: 6.7381 data: 6.5019 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.7270 (1.7597) acc1: 57.1615 (55.2000) acc5: 82.5521 (79.9360) time: 1.4929 data: 1.3005 max mem: 55573 Test: Total time: 0:00:07 (1.5145 s / it) * Acc@1 54.468 Acc@5 79.414 loss 1.851 Accuracy of the model EMA on 50000 test images: 54.5% Max EMA accuracy: 54.47% Epoch: [150] [ 0/156] eta: 0:35:06 lr: 0.004448 min_lr: 0.004448 loss: 3.7174 (3.7174) weight_decay: 0.0500 (0.0500) time: 13.5019 data: 11.0447 max mem: 55573 Epoch: [150] [ 10/156] eta: 0:04:21 lr: 0.004445 min_lr: 0.004445 loss: 3.4160 (3.2174) weight_decay: 0.0500 (0.0500) time: 1.7880 data: 1.0044 max mem: 55573 Epoch: [150] [ 20/156] eta: 0:02:45 lr: 0.004443 min_lr: 0.004443 loss: 3.1288 (3.1835) weight_decay: 0.0500 (0.0500) time: 0.6042 data: 0.0004 max mem: 55573 Epoch: [150] [ 30/156] eta: 0:02:08 lr: 0.004440 min_lr: 0.004440 loss: 3.3392 (3.1777) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [150] [ 40/156] eta: 0:01:45 lr: 0.004437 min_lr: 0.004437 loss: 3.3392 (3.2025) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [150] [ 50/156] eta: 0:01:30 lr: 0.004434 min_lr: 0.004434 loss: 3.3655 (3.2467) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [150] [ 60/156] eta: 0:01:17 lr: 0.004431 min_lr: 0.004431 loss: 3.5706 (3.3070) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [150] [ 70/156] eta: 0:01:06 lr: 0.004428 min_lr: 0.004428 loss: 3.6207 (3.3202) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [150] [ 80/156] eta: 0:00:57 lr: 0.004425 min_lr: 0.004425 loss: 3.4409 (3.3379) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [150] [ 90/156] eta: 0:00:48 lr: 0.004423 min_lr: 0.004423 loss: 3.5107 (3.3327) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [150] [100/156] eta: 0:00:40 lr: 0.004420 min_lr: 0.004420 loss: 3.2644 (3.3058) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [150] [110/156] eta: 0:00:32 lr: 0.004417 min_lr: 0.004417 loss: 3.0118 (3.3077) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [150] [120/156] eta: 0:00:25 lr: 0.004414 min_lr: 0.004414 loss: 3.4591 (3.3159) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [150] [130/156] eta: 0:00:17 lr: 0.004411 min_lr: 0.004411 loss: 3.4889 (3.3241) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0011 max mem: 55573 Epoch: [150] [140/156] eta: 0:00:10 lr: 0.004408 min_lr: 0.004408 loss: 3.5390 (3.3227) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0009 max mem: 55573 Epoch: [150] [150/156] eta: 0:00:04 lr: 0.004405 min_lr: 0.004405 loss: 3.3683 (3.3289) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [150] [155/156] eta: 0:00:00 lr: 0.004404 min_lr: 0.004404 loss: 3.2929 (3.3307) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [150] Total time: 0:01:46 (0.6806 s / it) Averaged stats: lr: 0.004404 min_lr: 0.004404 loss: 3.2929 (3.3586) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0492 (1.0492) acc1: 78.6458 (78.6458) acc5: 94.4010 (94.4010) time: 7.1734 data: 6.9360 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1290 (1.1516) acc1: 77.2135 (74.3360) acc5: 94.4010 (92.9920) time: 1.5809 data: 1.3873 max mem: 55573 Test: Total time: 0:00:08 (1.6254 s / it) * Acc@1 74.832 Acc@5 92.678 loss 1.162 Accuracy of the model on the 50000 test images: 74.8% Max accuracy: 74.83% Test: [0/5] eta: 0:00:34 loss: 1.6611 (1.6611) acc1: 60.1562 (60.1562) acc5: 82.8125 (82.8125) time: 6.8066 data: 6.5706 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6978 (1.7287) acc1: 57.5521 (55.7760) acc5: 82.8125 (80.2240) time: 1.5067 data: 1.3142 max mem: 55573 Test: Total time: 0:00:07 (1.5233 s / it) * Acc@1 55.122 Acc@5 79.996 loss 1.816 Accuracy of the model EMA on 50000 test images: 55.1% Max EMA accuracy: 55.12% Epoch: [151] [ 0/156] eta: 0:34:19 lr: 0.004404 min_lr: 0.004404 loss: 3.2982 (3.2982) weight_decay: 0.0500 (0.0500) time: 13.2038 data: 8.6946 max mem: 55573 Epoch: [151] [ 10/156] eta: 0:04:15 lr: 0.004401 min_lr: 0.004401 loss: 3.3602 (3.3960) weight_decay: 0.0500 (0.0500) time: 1.7506 data: 0.8022 max mem: 55573 Epoch: [151] [ 20/156] eta: 0:02:42 lr: 0.004398 min_lr: 0.004398 loss: 3.4837 (3.4542) weight_decay: 0.0500 (0.0500) time: 0.5979 data: 0.0067 max mem: 55573 Epoch: [151] [ 30/156] eta: 0:02:06 lr: 0.004395 min_lr: 0.004395 loss: 3.4577 (3.4320) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [151] [ 40/156] eta: 0:01:44 lr: 0.004392 min_lr: 0.004392 loss: 3.4801 (3.4542) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [151] [ 50/156] eta: 0:01:29 lr: 0.004389 min_lr: 0.004389 loss: 3.5331 (3.4614) weight_decay: 0.0500 (0.0500) time: 0.5937 data: 0.0004 max mem: 55573 Epoch: [151] [ 60/156] eta: 0:01:16 lr: 0.004387 min_lr: 0.004387 loss: 3.6562 (3.4766) weight_decay: 0.0500 (0.0500) time: 0.5932 data: 0.0005 max mem: 55573 Epoch: [151] [ 70/156] eta: 0:01:06 lr: 0.004384 min_lr: 0.004384 loss: 3.5596 (3.4791) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [151] [ 80/156] eta: 0:00:56 lr: 0.004381 min_lr: 0.004381 loss: 3.5359 (3.4599) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [151] [ 90/156] eta: 0:00:48 lr: 0.004378 min_lr: 0.004378 loss: 3.3860 (3.4465) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [151] [100/156] eta: 0:00:40 lr: 0.004375 min_lr: 0.004375 loss: 3.2480 (3.4112) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [151] [110/156] eta: 0:00:32 lr: 0.004372 min_lr: 0.004372 loss: 3.4709 (3.4236) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [151] [120/156] eta: 0:00:25 lr: 0.004369 min_lr: 0.004369 loss: 3.6554 (3.4297) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [151] [130/156] eta: 0:00:17 lr: 0.004366 min_lr: 0.004366 loss: 3.5353 (3.4222) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0011 max mem: 55573 Epoch: [151] [140/156] eta: 0:00:10 lr: 0.004364 min_lr: 0.004364 loss: 3.4002 (3.4180) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0009 max mem: 55573 Epoch: [151] [150/156] eta: 0:00:04 lr: 0.004361 min_lr: 0.004361 loss: 3.4010 (3.4050) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0002 max mem: 55573 Epoch: [151] [155/156] eta: 0:00:00 lr: 0.004359 min_lr: 0.004359 loss: 3.4591 (3.4089) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0002 max mem: 55573 Epoch: [151] Total time: 0:01:45 (0.6783 s / it) Averaged stats: lr: 0.004359 min_lr: 0.004359 loss: 3.4591 (3.3589) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0927 (1.0927) acc1: 79.4271 (79.4271) acc5: 94.5312 (94.5312) time: 7.1466 data: 6.9092 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2713 (1.2334) acc1: 75.9115 (74.9760) acc5: 92.9688 (92.0960) time: 1.5758 data: 1.3819 max mem: 55573 Test: Total time: 0:00:08 (1.6286 s / it) * Acc@1 74.900 Acc@5 92.516 loss 1.229 Accuracy of the model on the 50000 test images: 74.9% Max accuracy: 74.90% Test: [0/5] eta: 0:00:34 loss: 1.6322 (1.6322) acc1: 60.8073 (60.8073) acc5: 83.2031 (83.2031) time: 6.8854 data: 6.6492 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6696 (1.6983) acc1: 58.5938 (56.2880) acc5: 83.2031 (80.7360) time: 1.5224 data: 1.3300 max mem: 55573 Test: Total time: 0:00:07 (1.5406 s / it) * Acc@1 55.756 Acc@5 80.516 loss 1.782 Accuracy of the model EMA on 50000 test images: 55.8% Max EMA accuracy: 55.76% Epoch: [152] [ 0/156] eta: 0:34:03 lr: 0.004359 min_lr: 0.004359 loss: 3.4798 (3.4798) weight_decay: 0.0500 (0.0500) time: 13.0990 data: 9.1865 max mem: 55573 Epoch: [152] [ 10/156] eta: 0:04:13 lr: 0.004356 min_lr: 0.004356 loss: 3.5854 (3.4543) weight_decay: 0.0500 (0.0500) time: 1.7339 data: 0.8355 max mem: 55573 Epoch: [152] [ 20/156] eta: 0:02:41 lr: 0.004353 min_lr: 0.004353 loss: 3.3970 (3.3238) weight_decay: 0.0500 (0.0500) time: 0.5944 data: 0.0004 max mem: 55573 Epoch: [152] [ 30/156] eta: 0:02:05 lr: 0.004350 min_lr: 0.004350 loss: 3.2547 (3.3204) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [152] [ 40/156] eta: 0:01:44 lr: 0.004348 min_lr: 0.004348 loss: 3.2942 (3.3175) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [152] [ 50/156] eta: 0:01:28 lr: 0.004345 min_lr: 0.004345 loss: 3.3868 (3.3149) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [152] [ 60/156] eta: 0:01:16 lr: 0.004342 min_lr: 0.004342 loss: 3.4666 (3.3280) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [152] [ 70/156] eta: 0:01:06 lr: 0.004339 min_lr: 0.004339 loss: 3.1916 (3.3127) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [152] [ 80/156] eta: 0:00:56 lr: 0.004336 min_lr: 0.004336 loss: 3.0701 (3.2938) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [152] [ 90/156] eta: 0:00:48 lr: 0.004333 min_lr: 0.004333 loss: 3.3032 (3.2862) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [152] [100/156] eta: 0:00:40 lr: 0.004330 min_lr: 0.004330 loss: 3.3323 (3.2928) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [152] [110/156] eta: 0:00:32 lr: 0.004327 min_lr: 0.004327 loss: 3.3289 (3.2893) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [152] [120/156] eta: 0:00:25 lr: 0.004325 min_lr: 0.004325 loss: 3.4401 (3.3060) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [152] [130/156] eta: 0:00:17 lr: 0.004322 min_lr: 0.004322 loss: 3.5862 (3.3049) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0016 max mem: 55573 Epoch: [152] [140/156] eta: 0:00:10 lr: 0.004319 min_lr: 0.004319 loss: 3.2267 (3.2906) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0015 max mem: 55573 Epoch: [152] [150/156] eta: 0:00:04 lr: 0.004316 min_lr: 0.004316 loss: 3.3440 (3.3092) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [152] [155/156] eta: 0:00:00 lr: 0.004315 min_lr: 0.004315 loss: 3.6815 (3.3244) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [152] Total time: 0:01:45 (0.6766 s / it) Averaged stats: lr: 0.004315 min_lr: 0.004315 loss: 3.6815 (3.3645) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1433 (1.1433) acc1: 77.3438 (77.3438) acc5: 94.5312 (94.5312) time: 7.1758 data: 6.9383 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2152 (1.2443) acc1: 74.4792 (72.7680) acc5: 93.7500 (91.7120) time: 1.5817 data: 1.3877 max mem: 55573 Test: Total time: 0:00:08 (1.6207 s / it) * Acc@1 73.866 Acc@5 91.932 loss 1.233 Accuracy of the model on the 50000 test images: 73.9% Max accuracy: 74.90% Test: [0/5] eta: 0:00:33 loss: 1.6058 (1.6058) acc1: 61.4583 (61.4583) acc5: 83.3333 (83.3333) time: 6.6767 data: 6.4405 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6436 (1.6699) acc1: 58.4635 (56.6400) acc5: 83.3333 (81.1840) time: 1.4919 data: 1.2995 max mem: 55573 Test: Total time: 0:00:07 (1.5162 s / it) * Acc@1 56.368 Acc@5 81.014 loss 1.750 Accuracy of the model EMA on 50000 test images: 56.4% Max EMA accuracy: 56.37% Epoch: [153] [ 0/156] eta: 0:37:26 lr: 0.004314 min_lr: 0.004314 loss: 3.3260 (3.3260) weight_decay: 0.0500 (0.0500) time: 14.4000 data: 9.0028 max mem: 55573 Epoch: [153] [ 10/156] eta: 0:04:30 lr: 0.004311 min_lr: 0.004311 loss: 3.3628 (3.1649) weight_decay: 0.0500 (0.0500) time: 1.8548 data: 0.8188 max mem: 55573 Epoch: [153] [ 20/156] eta: 0:02:50 lr: 0.004309 min_lr: 0.004309 loss: 3.4595 (3.3114) weight_decay: 0.0500 (0.0500) time: 0.5965 data: 0.0004 max mem: 55573 Epoch: [153] [ 30/156] eta: 0:02:11 lr: 0.004306 min_lr: 0.004306 loss: 3.6723 (3.3447) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [153] [ 40/156] eta: 0:01:47 lr: 0.004303 min_lr: 0.004303 loss: 3.4846 (3.3440) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [153] [ 50/156] eta: 0:01:31 lr: 0.004300 min_lr: 0.004300 loss: 3.4846 (3.3489) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [153] [ 60/156] eta: 0:01:18 lr: 0.004297 min_lr: 0.004297 loss: 3.4551 (3.3444) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [153] [ 70/156] eta: 0:01:07 lr: 0.004294 min_lr: 0.004294 loss: 3.3874 (3.3692) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0004 max mem: 55573 Epoch: [153] [ 80/156] eta: 0:00:58 lr: 0.004291 min_lr: 0.004291 loss: 3.3206 (3.3555) weight_decay: 0.0500 (0.0500) time: 0.5991 data: 0.0005 max mem: 55573 Epoch: [153] [ 90/156] eta: 0:00:49 lr: 0.004288 min_lr: 0.004288 loss: 3.4429 (3.3648) weight_decay: 0.0500 (0.0500) time: 0.5992 data: 0.0005 max mem: 55573 Epoch: [153] [100/156] eta: 0:00:40 lr: 0.004286 min_lr: 0.004286 loss: 3.5221 (3.3861) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0005 max mem: 55573 Epoch: [153] [110/156] eta: 0:00:33 lr: 0.004283 min_lr: 0.004283 loss: 3.6213 (3.3751) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [153] [120/156] eta: 0:00:25 lr: 0.004280 min_lr: 0.004280 loss: 3.4019 (3.3759) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [153] [130/156] eta: 0:00:18 lr: 0.004277 min_lr: 0.004277 loss: 3.3937 (3.3692) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0010 max mem: 55573 Epoch: [153] [140/156] eta: 0:00:11 lr: 0.004274 min_lr: 0.004274 loss: 3.6439 (3.3899) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0009 max mem: 55573 Epoch: [153] [150/156] eta: 0:00:04 lr: 0.004271 min_lr: 0.004271 loss: 3.6321 (3.3888) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [153] [155/156] eta: 0:00:00 lr: 0.004270 min_lr: 0.004270 loss: 3.5585 (3.3873) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [153] Total time: 0:01:47 (0.6871 s / it) Averaged stats: lr: 0.004270 min_lr: 0.004270 loss: 3.5585 (3.3445) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1000 (1.1000) acc1: 78.5156 (78.5156) acc5: 95.0521 (95.0521) time: 7.0241 data: 6.7867 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2504 (1.2992) acc1: 75.5208 (73.7280) acc5: 94.5312 (92.3200) time: 1.5512 data: 1.3574 max mem: 55573 Test: Total time: 0:00:08 (1.6082 s / it) * Acc@1 74.652 Acc@5 92.548 loss 1.272 Accuracy of the model on the 50000 test images: 74.7% Max accuracy: 74.90% Test: [0/5] eta: 0:00:36 loss: 1.5799 (1.5799) acc1: 61.7188 (61.7188) acc5: 83.8542 (83.8542) time: 7.3271 data: 7.0911 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.6186 (1.6426) acc1: 59.5052 (57.1840) acc5: 83.8542 (81.4720) time: 1.6108 data: 1.4183 max mem: 55573 Test: Total time: 0:00:08 (1.6307 s / it) * Acc@1 56.886 Acc@5 81.446 loss 1.720 Accuracy of the model EMA on 50000 test images: 56.9% Max EMA accuracy: 56.89% Epoch: [154] [ 0/156] eta: 0:35:28 lr: 0.004270 min_lr: 0.004270 loss: 3.8963 (3.8963) weight_decay: 0.0500 (0.0500) time: 13.6435 data: 13.0491 max mem: 55573 Epoch: [154] [ 10/156] eta: 0:04:21 lr: 0.004267 min_lr: 0.004267 loss: 2.9742 (3.0502) weight_decay: 0.0500 (0.0500) time: 1.7928 data: 1.1866 max mem: 55573 Epoch: [154] [ 20/156] eta: 0:02:45 lr: 0.004264 min_lr: 0.004264 loss: 3.0872 (3.2134) weight_decay: 0.0500 (0.0500) time: 0.5991 data: 0.0004 max mem: 55573 Epoch: [154] [ 30/156] eta: 0:02:08 lr: 0.004261 min_lr: 0.004261 loss: 3.4872 (3.2959) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [154] [ 40/156] eta: 0:01:45 lr: 0.004258 min_lr: 0.004258 loss: 3.3440 (3.2690) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [154] [ 50/156] eta: 0:01:30 lr: 0.004255 min_lr: 0.004255 loss: 3.3440 (3.2737) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [154] [ 60/156] eta: 0:01:17 lr: 0.004252 min_lr: 0.004252 loss: 3.3488 (3.2719) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [154] [ 70/156] eta: 0:01:06 lr: 0.004249 min_lr: 0.004249 loss: 3.5503 (3.3279) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [154] [ 80/156] eta: 0:00:57 lr: 0.004247 min_lr: 0.004247 loss: 3.5503 (3.3307) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [154] [ 90/156] eta: 0:00:48 lr: 0.004244 min_lr: 0.004244 loss: 3.5203 (3.3347) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [154] [100/156] eta: 0:00:40 lr: 0.004241 min_lr: 0.004241 loss: 3.5203 (3.3164) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [154] [110/156] eta: 0:00:32 lr: 0.004238 min_lr: 0.004238 loss: 3.4005 (3.3251) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [154] [120/156] eta: 0:00:25 lr: 0.004235 min_lr: 0.004235 loss: 3.4674 (3.3437) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [154] [130/156] eta: 0:00:17 lr: 0.004232 min_lr: 0.004232 loss: 3.5937 (3.3462) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0022 max mem: 55573 Epoch: [154] [140/156] eta: 0:00:10 lr: 0.004229 min_lr: 0.004229 loss: 3.6308 (3.3706) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0020 max mem: 55573 Epoch: [154] [150/156] eta: 0:00:04 lr: 0.004226 min_lr: 0.004226 loss: 3.5870 (3.3539) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [154] [155/156] eta: 0:00:00 lr: 0.004225 min_lr: 0.004225 loss: 3.4475 (3.3602) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [154] Total time: 0:01:46 (0.6815 s / it) Averaged stats: lr: 0.004225 min_lr: 0.004225 loss: 3.4475 (3.3562) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.1239 (1.1239) acc1: 76.8229 (76.8229) acc5: 94.7917 (94.7917) time: 6.7301 data: 6.4928 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2286 (1.2626) acc1: 76.8229 (73.6640) acc5: 94.2708 (92.3520) time: 1.4915 data: 1.2987 max mem: 55573 Test: Total time: 0:00:07 (1.5379 s / it) * Acc@1 74.962 Acc@5 92.694 loss 1.252 Accuracy of the model on the 50000 test images: 75.0% Max accuracy: 74.96% Test: [0/5] eta: 0:00:35 loss: 1.5547 (1.5547) acc1: 62.1094 (62.1094) acc5: 84.6354 (84.6354) time: 7.1584 data: 6.9219 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5941 (1.6161) acc1: 60.1562 (57.8240) acc5: 84.6354 (81.9840) time: 1.5772 data: 1.3845 max mem: 55573 Test: Total time: 0:00:07 (1.5945 s / it) * Acc@1 57.466 Acc@5 81.930 loss 1.690 Accuracy of the model EMA on 50000 test images: 57.5% Max EMA accuracy: 57.47% Epoch: [155] [ 0/156] eta: 0:36:00 lr: 0.004225 min_lr: 0.004225 loss: 3.6642 (3.6642) weight_decay: 0.0500 (0.0500) time: 13.8508 data: 8.8443 max mem: 55573 Epoch: [155] [ 10/156] eta: 0:04:22 lr: 0.004222 min_lr: 0.004222 loss: 3.4187 (3.3385) weight_decay: 0.0500 (0.0500) time: 1.7947 data: 0.8045 max mem: 55573 Epoch: [155] [ 20/156] eta: 0:02:46 lr: 0.004219 min_lr: 0.004219 loss: 3.5122 (3.4436) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [155] [ 30/156] eta: 0:02:08 lr: 0.004216 min_lr: 0.004216 loss: 3.5619 (3.3991) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [155] [ 40/156] eta: 0:01:45 lr: 0.004213 min_lr: 0.004213 loss: 3.3498 (3.3521) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [155] [ 50/156] eta: 0:01:30 lr: 0.004210 min_lr: 0.004210 loss: 3.3498 (3.3400) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [155] [ 60/156] eta: 0:01:17 lr: 0.004208 min_lr: 0.004208 loss: 3.4078 (3.3108) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0004 max mem: 55573 Epoch: [155] [ 70/156] eta: 0:01:06 lr: 0.004205 min_lr: 0.004205 loss: 3.4078 (3.3190) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [155] [ 80/156] eta: 0:00:57 lr: 0.004202 min_lr: 0.004202 loss: 3.4204 (3.3211) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [155] [ 90/156] eta: 0:00:48 lr: 0.004199 min_lr: 0.004199 loss: 3.4738 (3.3498) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [155] [100/156] eta: 0:00:40 lr: 0.004196 min_lr: 0.004196 loss: 3.5588 (3.3614) weight_decay: 0.0500 (0.0500) time: 0.5977 data: 0.0005 max mem: 55573 Epoch: [155] [110/156] eta: 0:00:32 lr: 0.004193 min_lr: 0.004193 loss: 3.4541 (3.3457) weight_decay: 0.0500 (0.0500) time: 0.5975 data: 0.0005 max mem: 55573 Epoch: [155] [120/156] eta: 0:00:25 lr: 0.004190 min_lr: 0.004190 loss: 3.2817 (3.3487) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [155] [130/156] eta: 0:00:18 lr: 0.004187 min_lr: 0.004187 loss: 3.5254 (3.3678) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0010 max mem: 55573 Epoch: [155] [140/156] eta: 0:00:10 lr: 0.004185 min_lr: 0.004185 loss: 3.5754 (3.3710) weight_decay: 0.0500 (0.0500) time: 0.5850 data: 0.0008 max mem: 55573 Epoch: [155] [150/156] eta: 0:00:04 lr: 0.004182 min_lr: 0.004182 loss: 3.4050 (3.3694) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [155] [155/156] eta: 0:00:00 lr: 0.004180 min_lr: 0.004180 loss: 3.3944 (3.3653) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [155] Total time: 0:01:46 (0.6822 s / it) Averaged stats: lr: 0.004180 min_lr: 0.004180 loss: 3.3944 (3.3585) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0264 (1.0264) acc1: 77.6042 (77.6042) acc5: 95.5729 (95.5729) time: 6.9553 data: 6.7179 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1634 (1.2008) acc1: 76.1719 (73.2800) acc5: 95.0521 (92.7360) time: 1.5373 data: 1.3436 max mem: 55573 Test: Total time: 0:00:07 (1.5853 s / it) * Acc@1 74.960 Acc@5 92.698 loss 1.197 Accuracy of the model on the 50000 test images: 75.0% Max accuracy: 74.96% Test: [0/5] eta: 0:00:35 loss: 1.5318 (1.5318) acc1: 62.6302 (62.6302) acc5: 85.0260 (85.0260) time: 7.0691 data: 6.8332 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5718 (1.5924) acc1: 60.5469 (58.2720) acc5: 85.0260 (82.2080) time: 1.5592 data: 1.3667 max mem: 55573 Test: Total time: 0:00:07 (1.5788 s / it) * Acc@1 58.004 Acc@5 82.322 loss 1.663 Accuracy of the model EMA on 50000 test images: 58.0% Max EMA accuracy: 58.00% Epoch: [156] [ 0/156] eta: 0:35:25 lr: 0.004180 min_lr: 0.004180 loss: 2.1633 (2.1633) weight_decay: 0.0500 (0.0500) time: 13.6218 data: 10.4366 max mem: 55573 Epoch: [156] [ 10/156] eta: 0:04:25 lr: 0.004177 min_lr: 0.004177 loss: 3.5722 (3.3424) weight_decay: 0.0500 (0.0500) time: 1.8191 data: 0.9953 max mem: 55573 Epoch: [156] [ 20/156] eta: 0:02:47 lr: 0.004174 min_lr: 0.004174 loss: 3.4604 (3.2767) weight_decay: 0.0500 (0.0500) time: 0.6143 data: 0.0258 max mem: 55573 Epoch: [156] [ 30/156] eta: 0:02:09 lr: 0.004171 min_lr: 0.004171 loss: 3.2561 (3.2865) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [156] [ 40/156] eta: 0:01:46 lr: 0.004168 min_lr: 0.004168 loss: 3.2562 (3.3036) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [156] [ 50/156] eta: 0:01:30 lr: 0.004166 min_lr: 0.004166 loss: 2.9923 (3.2317) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [156] [ 60/156] eta: 0:01:17 lr: 0.004163 min_lr: 0.004163 loss: 3.2453 (3.2802) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [156] [ 70/156] eta: 0:01:07 lr: 0.004160 min_lr: 0.004160 loss: 3.4633 (3.2898) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [156] [ 80/156] eta: 0:00:57 lr: 0.004157 min_lr: 0.004157 loss: 3.4633 (3.3045) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [156] [ 90/156] eta: 0:00:48 lr: 0.004154 min_lr: 0.004154 loss: 3.3266 (3.2894) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [156] [100/156] eta: 0:00:40 lr: 0.004151 min_lr: 0.004151 loss: 3.3020 (3.2814) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [156] [110/156] eta: 0:00:32 lr: 0.004148 min_lr: 0.004148 loss: 3.2526 (3.2700) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [156] [120/156] eta: 0:00:25 lr: 0.004145 min_lr: 0.004145 loss: 2.9713 (3.2556) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [156] [130/156] eta: 0:00:18 lr: 0.004143 min_lr: 0.004143 loss: 3.1934 (3.2635) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0010 max mem: 55573 Epoch: [156] [140/156] eta: 0:00:10 lr: 0.004140 min_lr: 0.004140 loss: 3.4586 (3.2869) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0009 max mem: 55573 Epoch: [156] [150/156] eta: 0:00:04 lr: 0.004137 min_lr: 0.004137 loss: 3.4586 (3.2869) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [156] [155/156] eta: 0:00:00 lr: 0.004135 min_lr: 0.004135 loss: 3.2869 (3.2908) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [156] Total time: 0:01:46 (0.6826 s / it) Averaged stats: lr: 0.004135 min_lr: 0.004135 loss: 3.2869 (3.3355) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.1741 (1.1741) acc1: 76.0417 (76.0417) acc5: 94.9219 (94.9219) time: 6.6372 data: 6.4002 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2458 (1.2645) acc1: 76.0417 (73.5680) acc5: 94.4010 (92.7680) time: 1.4727 data: 1.2801 max mem: 55573 Test: Total time: 0:00:07 (1.4893 s / it) * Acc@1 74.880 Acc@5 92.726 loss 1.253 Accuracy of the model on the 50000 test images: 74.9% Max accuracy: 74.96% Test: [0/5] eta: 0:00:35 loss: 1.5105 (1.5105) acc1: 62.7604 (62.7604) acc5: 85.2865 (85.2865) time: 7.1970 data: 6.9610 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5514 (1.5701) acc1: 61.1979 (58.6880) acc5: 85.2865 (82.4320) time: 1.5847 data: 1.3923 max mem: 55573 Test: Total time: 0:00:08 (1.6035 s / it) * Acc@1 58.492 Acc@5 82.676 loss 1.638 Accuracy of the model EMA on 50000 test images: 58.5% Max EMA accuracy: 58.49% Epoch: [157] [ 0/156] eta: 0:37:13 lr: 0.004135 min_lr: 0.004135 loss: 3.3797 (3.3797) weight_decay: 0.0500 (0.0500) time: 14.3144 data: 8.7082 max mem: 55573 Epoch: [157] [ 10/156] eta: 0:04:29 lr: 0.004132 min_lr: 0.004132 loss: 3.4636 (3.2446) weight_decay: 0.0500 (0.0500) time: 1.8432 data: 0.7920 max mem: 55573 Epoch: [157] [ 20/156] eta: 0:02:49 lr: 0.004129 min_lr: 0.004129 loss: 3.4636 (3.2810) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [157] [ 30/156] eta: 0:02:10 lr: 0.004126 min_lr: 0.004126 loss: 3.3513 (3.3032) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [157] [ 40/156] eta: 0:01:47 lr: 0.004124 min_lr: 0.004124 loss: 3.4893 (3.3299) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [157] [ 50/156] eta: 0:01:31 lr: 0.004121 min_lr: 0.004121 loss: 3.6100 (3.3785) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [157] [ 60/156] eta: 0:01:18 lr: 0.004118 min_lr: 0.004118 loss: 3.6584 (3.3988) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [157] [ 70/156] eta: 0:01:07 lr: 0.004115 min_lr: 0.004115 loss: 3.4704 (3.3637) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [157] [ 80/156] eta: 0:00:57 lr: 0.004112 min_lr: 0.004112 loss: 3.5251 (3.3744) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [157] [ 90/156] eta: 0:00:48 lr: 0.004109 min_lr: 0.004109 loss: 3.5337 (3.3716) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [157] [100/156] eta: 0:00:40 lr: 0.004106 min_lr: 0.004106 loss: 3.5346 (3.3679) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [157] [110/156] eta: 0:00:32 lr: 0.004103 min_lr: 0.004103 loss: 3.3755 (3.3622) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [157] [120/156] eta: 0:00:25 lr: 0.004101 min_lr: 0.004101 loss: 3.2884 (3.3446) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [157] [130/156] eta: 0:00:18 lr: 0.004098 min_lr: 0.004098 loss: 3.3794 (3.3527) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0010 max mem: 55573 Epoch: [157] [140/156] eta: 0:00:11 lr: 0.004095 min_lr: 0.004095 loss: 3.5380 (3.3593) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0008 max mem: 55573 Epoch: [157] [150/156] eta: 0:00:04 lr: 0.004092 min_lr: 0.004092 loss: 3.5041 (3.3596) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [157] [155/156] eta: 0:00:00 lr: 0.004091 min_lr: 0.004091 loss: 3.4701 (3.3668) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [157] Total time: 0:01:46 (0.6844 s / it) Averaged stats: lr: 0.004091 min_lr: 0.004091 loss: 3.4701 (3.3321) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0789 (1.0789) acc1: 78.7760 (78.7760) acc5: 95.4427 (95.4427) time: 7.0855 data: 6.8481 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1188 (1.2187) acc1: 76.4323 (73.6320) acc5: 95.3125 (93.1840) time: 1.5636 data: 1.3697 max mem: 55573 Test: Total time: 0:00:08 (1.6126 s / it) * Acc@1 75.110 Acc@5 92.860 loss 1.190 Accuracy of the model on the 50000 test images: 75.1% Max accuracy: 75.11% Test: [0/5] eta: 0:00:35 loss: 1.4898 (1.4898) acc1: 63.2812 (63.2812) acc5: 85.4167 (85.4167) time: 7.1926 data: 6.9561 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5314 (1.5484) acc1: 61.8490 (59.2000) acc5: 85.4167 (82.7200) time: 1.5840 data: 1.3913 max mem: 55573 Test: Total time: 0:00:08 (1.6009 s / it) * Acc@1 58.926 Acc@5 83.030 loss 1.614 Accuracy of the model EMA on 50000 test images: 58.9% Max EMA accuracy: 58.93% Epoch: [158] [ 0/156] eta: 0:33:07 lr: 0.004090 min_lr: 0.004090 loss: 2.8991 (2.8991) weight_decay: 0.0500 (0.0500) time: 12.7378 data: 10.5927 max mem: 55573 Epoch: [158] [ 10/156] eta: 0:04:37 lr: 0.004087 min_lr: 0.004087 loss: 3.6656 (3.4584) weight_decay: 0.0500 (0.0500) time: 1.8992 data: 1.1721 max mem: 55573 Epoch: [158] [ 20/156] eta: 0:02:53 lr: 0.004084 min_lr: 0.004084 loss: 3.6656 (3.4398) weight_decay: 0.0500 (0.0500) time: 0.7024 data: 0.1153 max mem: 55573 Epoch: [158] [ 30/156] eta: 0:02:12 lr: 0.004082 min_lr: 0.004082 loss: 3.3965 (3.3972) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [158] [ 40/156] eta: 0:01:49 lr: 0.004079 min_lr: 0.004079 loss: 3.3625 (3.3865) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [158] [ 50/156] eta: 0:01:32 lr: 0.004076 min_lr: 0.004076 loss: 3.4963 (3.3973) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [158] [ 60/156] eta: 0:01:19 lr: 0.004073 min_lr: 0.004073 loss: 3.4963 (3.3833) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [158] [ 70/156] eta: 0:01:08 lr: 0.004070 min_lr: 0.004070 loss: 3.5568 (3.3879) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [158] [ 80/156] eta: 0:00:58 lr: 0.004067 min_lr: 0.004067 loss: 3.5541 (3.3896) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [158] [ 90/156] eta: 0:00:49 lr: 0.004064 min_lr: 0.004064 loss: 3.6715 (3.4304) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [158] [100/156] eta: 0:00:41 lr: 0.004061 min_lr: 0.004061 loss: 3.7210 (3.4534) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [158] [110/156] eta: 0:00:33 lr: 0.004059 min_lr: 0.004059 loss: 3.5836 (3.4494) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0004 max mem: 55573 Epoch: [158] [120/156] eta: 0:00:25 lr: 0.004056 min_lr: 0.004056 loss: 3.4081 (3.4441) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [158] [130/156] eta: 0:00:18 lr: 0.004053 min_lr: 0.004053 loss: 3.4506 (3.4496) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0011 max mem: 55573 Epoch: [158] [140/156] eta: 0:00:11 lr: 0.004050 min_lr: 0.004050 loss: 2.9979 (3.3993) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0009 max mem: 55573 Epoch: [158] [150/156] eta: 0:00:04 lr: 0.004047 min_lr: 0.004047 loss: 2.9791 (3.3897) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [158] [155/156] eta: 0:00:00 lr: 0.004046 min_lr: 0.004046 loss: 3.2156 (3.3851) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [158] Total time: 0:01:47 (0.6881 s / it) Averaged stats: lr: 0.004046 min_lr: 0.004046 loss: 3.2156 (3.3329) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:32 loss: 1.0838 (1.0838) acc1: 78.1250 (78.1250) acc5: 95.0521 (95.0521) time: 6.5672 data: 6.3302 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1148 (1.1533) acc1: 76.5625 (74.5600) acc5: 95.0521 (93.0240) time: 1.4588 data: 1.2661 max mem: 55573 Test: Total time: 0:00:07 (1.4765 s / it) * Acc@1 75.120 Acc@5 92.730 loss 1.166 Accuracy of the model on the 50000 test images: 75.1% Max accuracy: 75.12% Test: [0/5] eta: 0:00:33 loss: 1.4697 (1.4697) acc1: 63.9323 (63.9323) acc5: 85.8073 (85.8073) time: 6.7186 data: 6.4827 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.5119 (1.5275) acc1: 62.1094 (59.5520) acc5: 85.8073 (83.0080) time: 1.4890 data: 1.2966 max mem: 55573 Test: Total time: 0:00:07 (1.5044 s / it) * Acc@1 59.408 Acc@5 83.322 loss 1.591 Accuracy of the model EMA on 50000 test images: 59.4% Max EMA accuracy: 59.41% Epoch: [159] [ 0/156] eta: 0:37:04 lr: 0.004045 min_lr: 0.004045 loss: 3.1934 (3.1934) weight_decay: 0.0500 (0.0500) time: 14.2586 data: 11.8814 max mem: 55573 Epoch: [159] [ 10/156] eta: 0:04:28 lr: 0.004042 min_lr: 0.004042 loss: 3.4524 (3.3234) weight_decay: 0.0500 (0.0500) time: 1.8356 data: 1.0805 max mem: 55573 Epoch: [159] [ 20/156] eta: 0:02:49 lr: 0.004040 min_lr: 0.004040 loss: 3.4661 (3.3620) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [159] [ 30/156] eta: 0:02:10 lr: 0.004037 min_lr: 0.004037 loss: 3.2157 (3.2385) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [159] [ 40/156] eta: 0:01:47 lr: 0.004034 min_lr: 0.004034 loss: 3.1221 (3.2570) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [159] [ 50/156] eta: 0:01:31 lr: 0.004031 min_lr: 0.004031 loss: 3.4374 (3.2896) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [159] [ 60/156] eta: 0:01:18 lr: 0.004028 min_lr: 0.004028 loss: 3.5180 (3.3408) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [159] [ 70/156] eta: 0:01:07 lr: 0.004025 min_lr: 0.004025 loss: 3.5776 (3.3348) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [159] [ 80/156] eta: 0:00:57 lr: 0.004022 min_lr: 0.004022 loss: 3.0741 (3.3329) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [159] [ 90/156] eta: 0:00:48 lr: 0.004019 min_lr: 0.004019 loss: 3.3023 (3.3274) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [159] [100/156] eta: 0:00:40 lr: 0.004017 min_lr: 0.004017 loss: 3.3023 (3.3124) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [159] [110/156] eta: 0:00:32 lr: 0.004014 min_lr: 0.004014 loss: 3.3472 (3.3089) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [159] [120/156] eta: 0:00:25 lr: 0.004011 min_lr: 0.004011 loss: 3.3665 (3.3193) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0005 max mem: 55573 Epoch: [159] [130/156] eta: 0:00:18 lr: 0.004008 min_lr: 0.004008 loss: 3.4906 (3.3094) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0010 max mem: 55573 Epoch: [159] [140/156] eta: 0:00:11 lr: 0.004005 min_lr: 0.004005 loss: 3.5810 (3.3284) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0008 max mem: 55573 Epoch: [159] [150/156] eta: 0:00:04 lr: 0.004002 min_lr: 0.004002 loss: 3.5802 (3.3341) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [159] [155/156] eta: 0:00:00 lr: 0.004001 min_lr: 0.004001 loss: 3.5792 (3.3458) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [159] Total time: 0:01:46 (0.6843 s / it) Averaged stats: lr: 0.004001 min_lr: 0.004001 loss: 3.5792 (3.3317) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1676 (1.1676) acc1: 78.5156 (78.5156) acc5: 94.9219 (94.9219) time: 7.0468 data: 6.8094 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2375 (1.2825) acc1: 76.1719 (73.9520) acc5: 94.7917 (92.3520) time: 1.5557 data: 1.3620 max mem: 55573 Test: Total time: 0:00:07 (1.6000 s / it) * Acc@1 74.546 Acc@5 92.480 loss 1.288 Accuracy of the model on the 50000 test images: 74.5% Max accuracy: 75.12% Test: [0/5] eta: 0:00:33 loss: 1.4508 (1.4508) acc1: 64.3229 (64.3229) acc5: 86.0677 (86.0677) time: 6.6910 data: 6.4548 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4935 (1.5079) acc1: 62.3698 (60.0000) acc5: 86.0677 (83.2000) time: 1.4933 data: 1.3008 max mem: 55573 Test: Total time: 0:00:07 (1.5122 s / it) * Acc@1 59.794 Acc@5 83.582 loss 1.569 Accuracy of the model EMA on 50000 test images: 59.8% Max EMA accuracy: 59.79% Epoch: [160] [ 0/156] eta: 0:34:45 lr: 0.004001 min_lr: 0.004001 loss: 3.5150 (3.5150) weight_decay: 0.0500 (0.0500) time: 13.3707 data: 9.1671 max mem: 55573 Epoch: [160] [ 10/156] eta: 0:04:17 lr: 0.003998 min_lr: 0.003998 loss: 3.5150 (3.3202) weight_decay: 0.0500 (0.0500) time: 1.7609 data: 0.8337 max mem: 55573 Epoch: [160] [ 20/156] eta: 0:02:44 lr: 0.003995 min_lr: 0.003995 loss: 3.5244 (3.4634) weight_decay: 0.0500 (0.0500) time: 0.5978 data: 0.0004 max mem: 55573 Epoch: [160] [ 30/156] eta: 0:02:06 lr: 0.003992 min_lr: 0.003992 loss: 3.3883 (3.3349) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0004 max mem: 55573 Epoch: [160] [ 40/156] eta: 0:01:45 lr: 0.003989 min_lr: 0.003989 loss: 3.4317 (3.3718) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [160] [ 50/156] eta: 0:01:29 lr: 0.003986 min_lr: 0.003986 loss: 3.4903 (3.3712) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [160] [ 60/156] eta: 0:01:17 lr: 0.003983 min_lr: 0.003983 loss: 3.4972 (3.3430) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [160] [ 70/156] eta: 0:01:06 lr: 0.003980 min_lr: 0.003980 loss: 3.5297 (3.3495) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [160] [ 80/156] eta: 0:00:56 lr: 0.003977 min_lr: 0.003977 loss: 3.5297 (3.3577) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0005 max mem: 55573 Epoch: [160] [ 90/156] eta: 0:00:48 lr: 0.003975 min_lr: 0.003975 loss: 3.4328 (3.3500) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [160] [100/156] eta: 0:00:40 lr: 0.003972 min_lr: 0.003972 loss: 3.1997 (3.3174) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [160] [110/156] eta: 0:00:32 lr: 0.003969 min_lr: 0.003969 loss: 3.0763 (3.3102) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [160] [120/156] eta: 0:00:25 lr: 0.003966 min_lr: 0.003966 loss: 3.3773 (3.3154) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [160] [130/156] eta: 0:00:17 lr: 0.003963 min_lr: 0.003963 loss: 3.5036 (3.3257) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0010 max mem: 55573 Epoch: [160] [140/156] eta: 0:00:10 lr: 0.003960 min_lr: 0.003960 loss: 3.5036 (3.3313) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0009 max mem: 55573 Epoch: [160] [150/156] eta: 0:00:04 lr: 0.003957 min_lr: 0.003957 loss: 3.4223 (3.3193) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [160] [155/156] eta: 0:00:00 lr: 0.003956 min_lr: 0.003956 loss: 3.0646 (3.3090) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [160] Total time: 0:01:45 (0.6792 s / it) Averaged stats: lr: 0.003956 min_lr: 0.003956 loss: 3.0646 (3.3241) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9448 (0.9448) acc1: 78.2552 (78.2552) acc5: 95.5729 (95.5729) time: 6.9733 data: 6.7360 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0401 (1.0908) acc1: 78.2552 (74.6560) acc5: 94.2708 (92.7360) time: 1.5410 data: 1.3473 max mem: 55573 Test: Total time: 0:00:07 (1.5785 s / it) * Acc@1 75.876 Acc@5 93.330 loss 1.080 Accuracy of the model on the 50000 test images: 75.9% Max accuracy: 75.88% Test: [0/5] eta: 0:00:34 loss: 1.4334 (1.4334) acc1: 64.9740 (64.9740) acc5: 86.0677 (86.0677) time: 6.9208 data: 6.6848 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4766 (1.4900) acc1: 62.7604 (60.5760) acc5: 86.0677 (83.3280) time: 1.5300 data: 1.3371 max mem: 55573 Test: Total time: 0:00:07 (1.5497 s / it) * Acc@1 60.200 Acc@5 83.834 loss 1.549 Accuracy of the model EMA on 50000 test images: 60.2% Max EMA accuracy: 60.20% Epoch: [161] [ 0/156] eta: 0:32:07 lr: 0.003956 min_lr: 0.003956 loss: 2.6711 (2.6711) weight_decay: 0.0500 (0.0500) time: 12.3526 data: 8.5741 max mem: 55573 Epoch: [161] [ 10/156] eta: 0:04:06 lr: 0.003953 min_lr: 0.003953 loss: 3.1873 (3.0974) weight_decay: 0.0500 (0.0500) time: 1.6909 data: 0.7799 max mem: 55573 Epoch: [161] [ 20/156] eta: 0:02:38 lr: 0.003950 min_lr: 0.003950 loss: 3.2669 (3.1645) weight_decay: 0.0500 (0.0500) time: 0.6079 data: 0.0004 max mem: 55573 Epoch: [161] [ 30/156] eta: 0:02:03 lr: 0.003947 min_lr: 0.003947 loss: 3.2669 (3.1963) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [161] [ 40/156] eta: 0:01:42 lr: 0.003944 min_lr: 0.003944 loss: 3.2352 (3.2159) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [161] [ 50/156] eta: 0:01:27 lr: 0.003941 min_lr: 0.003941 loss: 3.4446 (3.2667) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [161] [ 60/156] eta: 0:01:15 lr: 0.003938 min_lr: 0.003938 loss: 3.5001 (3.2763) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [161] [ 70/156] eta: 0:01:05 lr: 0.003935 min_lr: 0.003935 loss: 3.5107 (3.2474) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [161] [ 80/156] eta: 0:00:56 lr: 0.003933 min_lr: 0.003933 loss: 3.4039 (3.2641) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [161] [ 90/156] eta: 0:00:47 lr: 0.003930 min_lr: 0.003930 loss: 3.4054 (3.2963) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [161] [100/156] eta: 0:00:39 lr: 0.003927 min_lr: 0.003927 loss: 3.5276 (3.3260) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [161] [110/156] eta: 0:00:32 lr: 0.003924 min_lr: 0.003924 loss: 3.5391 (3.3458) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [161] [120/156] eta: 0:00:24 lr: 0.003921 min_lr: 0.003921 loss: 3.5100 (3.3564) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [161] [130/156] eta: 0:00:17 lr: 0.003918 min_lr: 0.003918 loss: 3.4268 (3.3451) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0011 max mem: 55573 Epoch: [161] [140/156] eta: 0:00:10 lr: 0.003915 min_lr: 0.003915 loss: 3.4268 (3.3530) weight_decay: 0.0500 (0.0500) time: 0.5872 data: 0.0010 max mem: 55573 Epoch: [161] [150/156] eta: 0:00:04 lr: 0.003912 min_lr: 0.003912 loss: 3.3726 (3.3384) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [161] [155/156] eta: 0:00:00 lr: 0.003911 min_lr: 0.003911 loss: 3.3384 (3.3350) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [161] Total time: 0:01:45 (0.6740 s / it) Averaged stats: lr: 0.003911 min_lr: 0.003911 loss: 3.3384 (3.3634) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9383 (0.9383) acc1: 79.5573 (79.5573) acc5: 95.3125 (95.3125) time: 6.9889 data: 6.7513 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0335 (1.0735) acc1: 76.8229 (74.6560) acc5: 95.1823 (92.9600) time: 1.5442 data: 1.3504 max mem: 55573 Test: Total time: 0:00:07 (1.5887 s / it) * Acc@1 75.428 Acc@5 92.782 loss 1.067 Accuracy of the model on the 50000 test images: 75.4% Max accuracy: 75.88% Test: [0/5] eta: 0:00:35 loss: 1.4166 (1.4166) acc1: 65.3646 (65.3646) acc5: 86.3281 (86.3281) time: 7.1590 data: 6.9229 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4602 (1.4726) acc1: 63.1510 (61.0560) acc5: 86.3281 (83.7120) time: 1.5771 data: 1.3847 max mem: 55573 Test: Total time: 0:00:07 (1.5942 s / it) * Acc@1 60.620 Acc@5 84.126 loss 1.530 Accuracy of the model EMA on 50000 test images: 60.6% Max EMA accuracy: 60.62% Epoch: [162] [ 0/156] eta: 0:34:56 lr: 0.003911 min_lr: 0.003911 loss: 3.8140 (3.8140) weight_decay: 0.0500 (0.0500) time: 13.4371 data: 12.8500 max mem: 55573 Epoch: [162] [ 10/156] eta: 0:04:16 lr: 0.003908 min_lr: 0.003908 loss: 3.5238 (3.4496) weight_decay: 0.0500 (0.0500) time: 1.7582 data: 1.1685 max mem: 55573 Epoch: [162] [ 20/156] eta: 0:02:43 lr: 0.003905 min_lr: 0.003905 loss: 3.4695 (3.3913) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [162] [ 30/156] eta: 0:02:06 lr: 0.003902 min_lr: 0.003902 loss: 3.4732 (3.3938) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [162] [ 40/156] eta: 0:01:44 lr: 0.003899 min_lr: 0.003899 loss: 3.3810 (3.3690) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [162] [ 50/156] eta: 0:01:29 lr: 0.003896 min_lr: 0.003896 loss: 3.2104 (3.3414) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [162] [ 60/156] eta: 0:01:17 lr: 0.003894 min_lr: 0.003894 loss: 3.3313 (3.3400) weight_decay: 0.0500 (0.0500) time: 0.5937 data: 0.0005 max mem: 55573 Epoch: [162] [ 70/156] eta: 0:01:06 lr: 0.003891 min_lr: 0.003891 loss: 3.4458 (3.3395) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0004 max mem: 55573 Epoch: [162] [ 80/156] eta: 0:00:56 lr: 0.003888 min_lr: 0.003888 loss: 3.5648 (3.3566) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [162] [ 90/156] eta: 0:00:48 lr: 0.003885 min_lr: 0.003885 loss: 3.5648 (3.3646) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [162] [100/156] eta: 0:00:40 lr: 0.003882 min_lr: 0.003882 loss: 3.5432 (3.3484) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [162] [110/156] eta: 0:00:32 lr: 0.003879 min_lr: 0.003879 loss: 3.2857 (3.3305) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [162] [120/156] eta: 0:00:25 lr: 0.003876 min_lr: 0.003876 loss: 3.2036 (3.3259) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [162] [130/156] eta: 0:00:17 lr: 0.003873 min_lr: 0.003873 loss: 3.4265 (3.3316) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0024 max mem: 55573 Epoch: [162] [140/156] eta: 0:00:10 lr: 0.003871 min_lr: 0.003871 loss: 3.3203 (3.3197) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0023 max mem: 55573 Epoch: [162] [150/156] eta: 0:00:04 lr: 0.003868 min_lr: 0.003868 loss: 3.1625 (3.3047) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [162] [155/156] eta: 0:00:00 lr: 0.003866 min_lr: 0.003866 loss: 3.1625 (3.3090) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [162] Total time: 0:01:45 (0.6787 s / it) Averaged stats: lr: 0.003866 min_lr: 0.003866 loss: 3.1625 (3.3072) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0478 (1.0478) acc1: 78.6458 (78.6458) acc5: 94.7917 (94.7917) time: 6.9062 data: 6.6692 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1178 (1.1167) acc1: 76.6927 (74.3680) acc5: 94.1406 (92.3520) time: 1.5276 data: 1.3339 max mem: 55573 Test: Total time: 0:00:07 (1.5666 s / it) * Acc@1 75.206 Acc@5 92.818 loss 1.115 Accuracy of the model on the 50000 test images: 75.2% Max accuracy: 75.88% Test: [0/5] eta: 0:00:34 loss: 1.4016 (1.4016) acc1: 66.0156 (66.0156) acc5: 86.4583 (86.4583) time: 6.8483 data: 6.6123 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4454 (1.4567) acc1: 64.1927 (61.7600) acc5: 86.4583 (83.9680) time: 1.5150 data: 1.3226 max mem: 55573 Test: Total time: 0:00:07 (1.5329 s / it) * Acc@1 61.050 Acc@5 84.402 loss 1.512 Accuracy of the model EMA on 50000 test images: 61.1% Max EMA accuracy: 61.05% Epoch: [163] [ 0/156] eta: 0:32:00 lr: 0.003866 min_lr: 0.003866 loss: 2.1192 (2.1192) weight_decay: 0.0500 (0.0500) time: 12.3118 data: 8.8474 max mem: 55573 Epoch: [163] [ 10/156] eta: 0:04:12 lr: 0.003863 min_lr: 0.003863 loss: 3.7415 (3.6000) weight_decay: 0.0500 (0.0500) time: 1.7308 data: 0.8047 max mem: 55573 Epoch: [163] [ 20/156] eta: 0:02:41 lr: 0.003860 min_lr: 0.003860 loss: 3.6856 (3.5885) weight_decay: 0.0500 (0.0500) time: 0.6329 data: 0.0004 max mem: 55573 Epoch: [163] [ 30/156] eta: 0:02:05 lr: 0.003857 min_lr: 0.003857 loss: 3.5686 (3.4284) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [163] [ 40/156] eta: 0:01:44 lr: 0.003854 min_lr: 0.003854 loss: 2.9775 (3.3186) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [163] [ 50/156] eta: 0:01:28 lr: 0.003852 min_lr: 0.003852 loss: 3.3290 (3.3289) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [163] [ 60/156] eta: 0:01:16 lr: 0.003849 min_lr: 0.003849 loss: 3.5623 (3.3323) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [163] [ 70/156] eta: 0:01:05 lr: 0.003846 min_lr: 0.003846 loss: 3.4911 (3.2969) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [163] [ 80/156] eta: 0:00:56 lr: 0.003843 min_lr: 0.003843 loss: 3.0311 (3.2777) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [163] [ 90/156] eta: 0:00:48 lr: 0.003840 min_lr: 0.003840 loss: 3.2617 (3.2507) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [163] [100/156] eta: 0:00:40 lr: 0.003837 min_lr: 0.003837 loss: 3.1476 (3.2228) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [163] [110/156] eta: 0:00:32 lr: 0.003834 min_lr: 0.003834 loss: 2.8841 (3.2021) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [163] [120/156] eta: 0:00:24 lr: 0.003831 min_lr: 0.003831 loss: 3.1616 (3.2177) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [163] [130/156] eta: 0:00:17 lr: 0.003829 min_lr: 0.003829 loss: 3.4301 (3.2129) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0011 max mem: 55573 Epoch: [163] [140/156] eta: 0:00:10 lr: 0.003826 min_lr: 0.003826 loss: 3.4973 (3.2310) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0010 max mem: 55573 Epoch: [163] [150/156] eta: 0:00:04 lr: 0.003823 min_lr: 0.003823 loss: 3.4973 (3.2357) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [163] [155/156] eta: 0:00:00 lr: 0.003821 min_lr: 0.003821 loss: 3.4314 (3.2414) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [163] Total time: 0:01:45 (0.6760 s / it) Averaged stats: lr: 0.003821 min_lr: 0.003821 loss: 3.4314 (3.3012) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1205 (1.1205) acc1: 78.3854 (78.3854) acc5: 94.9219 (94.9219) time: 6.8136 data: 6.5763 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1838 (1.1810) acc1: 77.6042 (74.5920) acc5: 94.1406 (93.2160) time: 1.5089 data: 1.3153 max mem: 55573 Test: Total time: 0:00:07 (1.5511 s / it) * Acc@1 75.906 Acc@5 93.150 loss 1.158 Accuracy of the model on the 50000 test images: 75.9% Max accuracy: 75.91% Test: [0/5] eta: 0:00:34 loss: 1.3859 (1.3859) acc1: 66.2760 (66.2760) acc5: 86.5885 (86.5885) time: 6.8137 data: 6.5777 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4304 (1.4407) acc1: 64.4531 (61.9840) acc5: 86.5885 (84.1280) time: 1.5082 data: 1.3157 max mem: 55573 Test: Total time: 0:00:07 (1.5250 s / it) * Acc@1 61.422 Acc@5 84.628 loss 1.494 Accuracy of the model EMA on 50000 test images: 61.4% Max EMA accuracy: 61.42% Epoch: [164] [ 0/156] eta: 0:34:26 lr: 0.003821 min_lr: 0.003821 loss: 2.8559 (2.8559) weight_decay: 0.0500 (0.0500) time: 13.2499 data: 11.8291 max mem: 55573 Epoch: [164] [ 10/156] eta: 0:04:19 lr: 0.003818 min_lr: 0.003818 loss: 3.5464 (3.4248) weight_decay: 0.0500 (0.0500) time: 1.7771 data: 1.0757 max mem: 55573 Epoch: [164] [ 20/156] eta: 0:02:44 lr: 0.003815 min_lr: 0.003815 loss: 3.3421 (3.2783) weight_decay: 0.0500 (0.0500) time: 0.6107 data: 0.0005 max mem: 55573 Epoch: [164] [ 30/156] eta: 0:02:07 lr: 0.003812 min_lr: 0.003812 loss: 3.4328 (3.3957) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [164] [ 40/156] eta: 0:01:45 lr: 0.003810 min_lr: 0.003810 loss: 3.5066 (3.3717) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [164] [ 50/156] eta: 0:01:29 lr: 0.003807 min_lr: 0.003807 loss: 3.4769 (3.3625) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [164] [ 60/156] eta: 0:01:17 lr: 0.003804 min_lr: 0.003804 loss: 3.4287 (3.3277) weight_decay: 0.0500 (0.0500) time: 0.5958 data: 0.0004 max mem: 55573 Epoch: [164] [ 70/156] eta: 0:01:06 lr: 0.003801 min_lr: 0.003801 loss: 3.4287 (3.3302) weight_decay: 0.0500 (0.0500) time: 0.5953 data: 0.0004 max mem: 55573 Epoch: [164] [ 80/156] eta: 0:00:57 lr: 0.003798 min_lr: 0.003798 loss: 3.4912 (3.3233) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [164] [ 90/156] eta: 0:00:48 lr: 0.003795 min_lr: 0.003795 loss: 3.4125 (3.3076) weight_decay: 0.0500 (0.0500) time: 0.5965 data: 0.0004 max mem: 55573 Epoch: [164] [100/156] eta: 0:00:40 lr: 0.003792 min_lr: 0.003792 loss: 3.0624 (3.2868) weight_decay: 0.0500 (0.0500) time: 0.5966 data: 0.0004 max mem: 55573 Epoch: [164] [110/156] eta: 0:00:32 lr: 0.003789 min_lr: 0.003789 loss: 3.0624 (3.2717) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [164] [120/156] eta: 0:00:25 lr: 0.003787 min_lr: 0.003787 loss: 3.2079 (3.2782) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [164] [130/156] eta: 0:00:18 lr: 0.003784 min_lr: 0.003784 loss: 3.4750 (3.2884) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0010 max mem: 55573 Epoch: [164] [140/156] eta: 0:00:10 lr: 0.003781 min_lr: 0.003781 loss: 3.4750 (3.2996) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0009 max mem: 55573 Epoch: [164] [150/156] eta: 0:00:04 lr: 0.003778 min_lr: 0.003778 loss: 3.7124 (3.3080) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [164] [155/156] eta: 0:00:00 lr: 0.003777 min_lr: 0.003777 loss: 3.4169 (3.3085) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [164] Total time: 0:01:46 (0.6818 s / it) Averaged stats: lr: 0.003777 min_lr: 0.003777 loss: 3.4169 (3.3138) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.1630 (1.1630) acc1: 77.2135 (77.2135) acc5: 93.6198 (93.6198) time: 6.7671 data: 6.5297 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1630 (1.2110) acc1: 77.2135 (74.4000) acc5: 93.6198 (92.5440) time: 1.4999 data: 1.3060 max mem: 55573 Test: Total time: 0:00:07 (1.5311 s / it) * Acc@1 75.404 Acc@5 92.938 loss 1.181 Accuracy of the model on the 50000 test images: 75.4% Max accuracy: 75.91% Test: [0/5] eta: 0:00:33 loss: 1.3707 (1.3707) acc1: 66.4062 (66.4062) acc5: 86.7188 (86.7188) time: 6.7900 data: 6.5540 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4154 (1.4247) acc1: 64.7135 (62.0800) acc5: 86.7188 (84.3840) time: 1.5033 data: 1.3109 max mem: 55573 Test: Total time: 0:00:07 (1.5197 s / it) * Acc@1 61.770 Acc@5 84.874 loss 1.477 Accuracy of the model EMA on 50000 test images: 61.8% Max EMA accuracy: 61.77% Epoch: [165] [ 0/156] eta: 0:35:57 lr: 0.003776 min_lr: 0.003776 loss: 3.4557 (3.4557) weight_decay: 0.0500 (0.0500) time: 13.8305 data: 10.6897 max mem: 55573 Epoch: [165] [ 10/156] eta: 0:04:22 lr: 0.003773 min_lr: 0.003773 loss: 3.2027 (3.1556) weight_decay: 0.0500 (0.0500) time: 1.8003 data: 0.9721 max mem: 55573 Epoch: [165] [ 20/156] eta: 0:02:46 lr: 0.003771 min_lr: 0.003771 loss: 3.5290 (3.2980) weight_decay: 0.0500 (0.0500) time: 0.5944 data: 0.0004 max mem: 55573 Epoch: [165] [ 30/156] eta: 0:02:08 lr: 0.003768 min_lr: 0.003768 loss: 3.4120 (3.1918) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [165] [ 40/156] eta: 0:01:46 lr: 0.003765 min_lr: 0.003765 loss: 3.0809 (3.1801) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [165] [ 50/156] eta: 0:01:30 lr: 0.003762 min_lr: 0.003762 loss: 3.1490 (3.2004) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [165] [ 60/156] eta: 0:01:17 lr: 0.003759 min_lr: 0.003759 loss: 3.2569 (3.2258) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [165] [ 70/156] eta: 0:01:06 lr: 0.003756 min_lr: 0.003756 loss: 3.4256 (3.2574) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [165] [ 80/156] eta: 0:00:57 lr: 0.003753 min_lr: 0.003753 loss: 3.3895 (3.2572) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [165] [ 90/156] eta: 0:00:48 lr: 0.003750 min_lr: 0.003750 loss: 3.2368 (3.2575) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [165] [100/156] eta: 0:00:40 lr: 0.003748 min_lr: 0.003748 loss: 3.2211 (3.2623) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [165] [110/156] eta: 0:00:32 lr: 0.003745 min_lr: 0.003745 loss: 3.4596 (3.2880) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [165] [120/156] eta: 0:00:25 lr: 0.003742 min_lr: 0.003742 loss: 3.5647 (3.2960) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [165] [130/156] eta: 0:00:17 lr: 0.003739 min_lr: 0.003739 loss: 3.5647 (3.3111) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0010 max mem: 55573 Epoch: [165] [140/156] eta: 0:00:10 lr: 0.003736 min_lr: 0.003736 loss: 3.4354 (3.2917) weight_decay: 0.0500 (0.0500) time: 0.5874 data: 0.0009 max mem: 55573 Epoch: [165] [150/156] eta: 0:00:04 lr: 0.003733 min_lr: 0.003733 loss: 3.0079 (3.2695) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [165] [155/156] eta: 0:00:00 lr: 0.003732 min_lr: 0.003732 loss: 3.0079 (3.2754) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [165] Total time: 0:01:46 (0.6813 s / it) Averaged stats: lr: 0.003732 min_lr: 0.003732 loss: 3.0079 (3.2913) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.0191 (1.0191) acc1: 78.3854 (78.3854) acc5: 94.9219 (94.9219) time: 6.7478 data: 6.5105 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0437 (1.1355) acc1: 78.3854 (75.2960) acc5: 94.9219 (93.1200) time: 1.4960 data: 1.3022 max mem: 55573 Test: Total time: 0:00:07 (1.5325 s / it) * Acc@1 75.798 Acc@5 93.234 loss 1.134 Accuracy of the model on the 50000 test images: 75.8% Max accuracy: 75.91% Test: [0/5] eta: 0:00:34 loss: 1.3555 (1.3555) acc1: 66.9271 (66.9271) acc5: 86.7188 (86.7188) time: 6.8724 data: 6.6364 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.4010 (1.4096) acc1: 65.1042 (62.4000) acc5: 86.7188 (84.4800) time: 1.5199 data: 1.3274 max mem: 55573 Test: Total time: 0:00:07 (1.5385 s / it) * Acc@1 62.090 Acc@5 85.106 loss 1.460 Accuracy of the model EMA on 50000 test images: 62.1% Max EMA accuracy: 62.09% Epoch: [166] [ 0/156] eta: 0:34:16 lr: 0.003731 min_lr: 0.003731 loss: 3.6388 (3.6388) weight_decay: 0.0500 (0.0500) time: 13.1825 data: 8.2911 max mem: 55573 Epoch: [166] [ 10/156] eta: 0:04:15 lr: 0.003729 min_lr: 0.003729 loss: 3.0961 (3.0583) weight_decay: 0.0500 (0.0500) time: 1.7481 data: 0.7541 max mem: 55573 Epoch: [166] [ 20/156] eta: 0:02:42 lr: 0.003726 min_lr: 0.003726 loss: 3.2052 (3.1767) weight_decay: 0.0500 (0.0500) time: 0.5963 data: 0.0004 max mem: 55573 Epoch: [166] [ 30/156] eta: 0:02:06 lr: 0.003723 min_lr: 0.003723 loss: 3.3370 (3.2145) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [166] [ 40/156] eta: 0:01:44 lr: 0.003720 min_lr: 0.003720 loss: 3.3864 (3.2094) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [166] [ 50/156] eta: 0:01:29 lr: 0.003717 min_lr: 0.003717 loss: 3.4410 (3.2306) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [166] [ 60/156] eta: 0:01:16 lr: 0.003714 min_lr: 0.003714 loss: 3.0939 (3.2019) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [166] [ 70/156] eta: 0:01:06 lr: 0.003711 min_lr: 0.003711 loss: 3.1417 (3.2283) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [166] [ 80/156] eta: 0:00:56 lr: 0.003709 min_lr: 0.003709 loss: 3.4487 (3.2183) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [166] [ 90/156] eta: 0:00:48 lr: 0.003706 min_lr: 0.003706 loss: 3.2349 (3.2040) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0005 max mem: 55573 Epoch: [166] [100/156] eta: 0:00:40 lr: 0.003703 min_lr: 0.003703 loss: 3.1943 (3.1994) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [166] [110/156] eta: 0:00:32 lr: 0.003700 min_lr: 0.003700 loss: 3.3146 (3.2059) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [166] [120/156] eta: 0:00:25 lr: 0.003697 min_lr: 0.003697 loss: 3.3656 (3.2055) weight_decay: 0.0500 (0.0500) time: 0.5883 data: 0.0005 max mem: 55573 Epoch: [166] [130/156] eta: 0:00:17 lr: 0.003694 min_lr: 0.003694 loss: 3.3656 (3.2203) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0015 max mem: 55573 Epoch: [166] [140/156] eta: 0:00:10 lr: 0.003691 min_lr: 0.003691 loss: 3.4753 (3.2266) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0013 max mem: 55573 Epoch: [166] [150/156] eta: 0:00:04 lr: 0.003688 min_lr: 0.003688 loss: 3.3232 (3.2219) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [166] [155/156] eta: 0:00:00 lr: 0.003687 min_lr: 0.003687 loss: 3.2483 (3.2186) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [166] Total time: 0:01:45 (0.6776 s / it) Averaged stats: lr: 0.003687 min_lr: 0.003687 loss: 3.2483 (3.3036) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0476 (1.0476) acc1: 79.2969 (79.2969) acc5: 95.0521 (95.0521) time: 7.1269 data: 6.8895 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1646 (1.1433) acc1: 75.1302 (74.7200) acc5: 94.2708 (92.6720) time: 1.5718 data: 1.3780 max mem: 55573 Test: Total time: 0:00:08 (1.6170 s / it) * Acc@1 75.436 Acc@5 93.096 loss 1.142 Accuracy of the model on the 50000 test images: 75.4% Max accuracy: 75.91% Test: [0/5] eta: 0:00:33 loss: 1.3417 (1.3417) acc1: 67.4479 (67.4479) acc5: 86.8490 (86.8490) time: 6.7971 data: 6.5604 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3878 (1.3958) acc1: 65.3646 (62.6880) acc5: 86.8490 (84.6720) time: 1.5049 data: 1.3124 max mem: 55573 Test: Total time: 0:00:07 (1.5259 s / it) * Acc@1 62.384 Acc@5 85.340 loss 1.445 Accuracy of the model EMA on 50000 test images: 62.4% Max EMA accuracy: 62.38% Epoch: [167] [ 0/156] eta: 0:34:12 lr: 0.003687 min_lr: 0.003687 loss: 3.5207 (3.5207) weight_decay: 0.0500 (0.0500) time: 13.1544 data: 8.6535 max mem: 55573 Epoch: [167] [ 10/156] eta: 0:04:14 lr: 0.003684 min_lr: 0.003684 loss: 3.5207 (3.2422) weight_decay: 0.0500 (0.0500) time: 1.7410 data: 0.7872 max mem: 55573 Epoch: [167] [ 20/156] eta: 0:02:42 lr: 0.003681 min_lr: 0.003681 loss: 3.3930 (3.1998) weight_decay: 0.0500 (0.0500) time: 0.5947 data: 0.0005 max mem: 55573 Epoch: [167] [ 30/156] eta: 0:02:05 lr: 0.003678 min_lr: 0.003678 loss: 3.3930 (3.2360) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [167] [ 40/156] eta: 0:01:44 lr: 0.003675 min_lr: 0.003675 loss: 3.3745 (3.2420) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [167] [ 50/156] eta: 0:01:28 lr: 0.003672 min_lr: 0.003672 loss: 3.3080 (3.2379) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [167] [ 60/156] eta: 0:01:16 lr: 0.003669 min_lr: 0.003669 loss: 3.4001 (3.2687) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [167] [ 70/156] eta: 0:01:06 lr: 0.003667 min_lr: 0.003667 loss: 3.3229 (3.2327) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [167] [ 80/156] eta: 0:00:56 lr: 0.003664 min_lr: 0.003664 loss: 3.0303 (3.2424) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [167] [ 90/156] eta: 0:00:48 lr: 0.003661 min_lr: 0.003661 loss: 3.1970 (3.2441) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [167] [100/156] eta: 0:00:40 lr: 0.003658 min_lr: 0.003658 loss: 3.2448 (3.2492) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [167] [110/156] eta: 0:00:32 lr: 0.003655 min_lr: 0.003655 loss: 3.2448 (3.2275) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [167] [120/156] eta: 0:00:25 lr: 0.003652 min_lr: 0.003652 loss: 3.5295 (3.2504) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [167] [130/156] eta: 0:00:17 lr: 0.003649 min_lr: 0.003649 loss: 3.5810 (3.2580) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0010 max mem: 55573 Epoch: [167] [140/156] eta: 0:00:10 lr: 0.003647 min_lr: 0.003647 loss: 3.4335 (3.2632) weight_decay: 0.0500 (0.0500) time: 0.5850 data: 0.0009 max mem: 55573 Epoch: [167] [150/156] eta: 0:00:04 lr: 0.003644 min_lr: 0.003644 loss: 3.4335 (3.2621) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [167] [155/156] eta: 0:00:00 lr: 0.003642 min_lr: 0.003642 loss: 3.2748 (3.2584) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [167] Total time: 0:01:45 (0.6775 s / it) Averaged stats: lr: 0.003642 min_lr: 0.003642 loss: 3.2748 (3.3016) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0242 (1.0242) acc1: 79.4271 (79.4271) acc5: 95.9635 (95.9635) time: 7.0074 data: 6.7699 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1097 (1.1288) acc1: 75.3906 (74.0800) acc5: 95.0521 (92.8960) time: 1.5473 data: 1.3541 max mem: 55573 Test: Total time: 0:00:07 (1.5888 s / it) * Acc@1 75.062 Acc@5 92.674 loss 1.126 Accuracy of the model on the 50000 test images: 75.1% Max accuracy: 75.91% Test: [0/5] eta: 0:00:35 loss: 1.3279 (1.3279) acc1: 67.9688 (67.9688) acc5: 86.9792 (86.9792) time: 7.1457 data: 6.9096 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3747 (1.3822) acc1: 65.4948 (62.9760) acc5: 86.9792 (84.8320) time: 1.5745 data: 1.3820 max mem: 55573 Test: Total time: 0:00:07 (1.5966 s / it) * Acc@1 62.688 Acc@5 85.504 loss 1.430 Accuracy of the model EMA on 50000 test images: 62.7% Max EMA accuracy: 62.69% Epoch: [168] [ 0/156] eta: 0:30:08 lr: 0.003642 min_lr: 0.003642 loss: 2.9821 (2.9821) weight_decay: 0.0500 (0.0500) time: 11.5900 data: 9.0171 max mem: 55573 Epoch: [168] [ 10/156] eta: 0:04:20 lr: 0.003639 min_lr: 0.003639 loss: 3.4381 (3.3178) weight_decay: 0.0500 (0.0500) time: 1.7866 data: 0.9894 max mem: 55573 Epoch: [168] [ 20/156] eta: 0:02:45 lr: 0.003636 min_lr: 0.003636 loss: 3.3657 (3.2722) weight_decay: 0.0500 (0.0500) time: 0.6981 data: 0.0935 max mem: 55573 Epoch: [168] [ 30/156] eta: 0:02:07 lr: 0.003633 min_lr: 0.003633 loss: 3.3657 (3.2609) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [168] [ 40/156] eta: 0:01:45 lr: 0.003631 min_lr: 0.003631 loss: 3.4609 (3.2884) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [168] [ 50/156] eta: 0:01:29 lr: 0.003628 min_lr: 0.003628 loss: 3.5119 (3.3326) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [168] [ 60/156] eta: 0:01:17 lr: 0.003625 min_lr: 0.003625 loss: 3.5478 (3.3464) weight_decay: 0.0500 (0.0500) time: 0.5945 data: 0.0005 max mem: 55573 Epoch: [168] [ 70/156] eta: 0:01:06 lr: 0.003622 min_lr: 0.003622 loss: 3.4985 (3.3554) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0005 max mem: 55573 Epoch: [168] [ 80/156] eta: 0:00:57 lr: 0.003619 min_lr: 0.003619 loss: 3.5526 (3.3823) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [168] [ 90/156] eta: 0:00:48 lr: 0.003616 min_lr: 0.003616 loss: 3.4161 (3.3565) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [168] [100/156] eta: 0:00:40 lr: 0.003613 min_lr: 0.003613 loss: 3.3823 (3.3582) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [168] [110/156] eta: 0:00:32 lr: 0.003610 min_lr: 0.003610 loss: 3.2659 (3.3419) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [168] [120/156] eta: 0:00:25 lr: 0.003608 min_lr: 0.003608 loss: 3.3288 (3.3488) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [168] [130/156] eta: 0:00:17 lr: 0.003605 min_lr: 0.003605 loss: 3.4269 (3.3478) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0011 max mem: 55573 Epoch: [168] [140/156] eta: 0:00:10 lr: 0.003602 min_lr: 0.003602 loss: 3.3153 (3.3542) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0009 max mem: 55573 Epoch: [168] [150/156] eta: 0:00:04 lr: 0.003599 min_lr: 0.003599 loss: 3.3153 (3.3404) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [168] [155/156] eta: 0:00:00 lr: 0.003598 min_lr: 0.003598 loss: 3.2990 (3.3431) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [168] Total time: 0:01:46 (0.6804 s / it) Averaged stats: lr: 0.003598 min_lr: 0.003598 loss: 3.2990 (3.2846) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 1.1285 (1.1285) acc1: 77.2135 (77.2135) acc5: 96.0938 (96.0938) time: 7.3436 data: 7.1061 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2146 (1.2592) acc1: 76.5625 (73.9840) acc5: 94.7917 (93.2160) time: 1.6151 data: 1.4213 max mem: 55573 Test: Total time: 0:00:08 (1.6605 s / it) * Acc@1 75.566 Acc@5 93.022 loss 1.262 Accuracy of the model on the 50000 test images: 75.6% Max accuracy: 75.91% Test: [0/5] eta: 0:00:36 loss: 1.3150 (1.3150) acc1: 68.0990 (68.0990) acc5: 87.1094 (87.1094) time: 7.3314 data: 7.0953 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3623 (1.3692) acc1: 65.7552 (63.2000) acc5: 87.1094 (84.9280) time: 1.6116 data: 1.4192 max mem: 55573 Test: Total time: 0:00:08 (1.6273 s / it) * Acc@1 62.948 Acc@5 85.722 loss 1.416 Accuracy of the model EMA on 50000 test images: 62.9% Max EMA accuracy: 62.95% Epoch: [169] [ 0/156] eta: 0:35:52 lr: 0.003597 min_lr: 0.003597 loss: 3.8414 (3.8414) weight_decay: 0.0500 (0.0500) time: 13.8007 data: 10.9082 max mem: 55573 Epoch: [169] [ 10/156] eta: 0:04:23 lr: 0.003594 min_lr: 0.003594 loss: 3.5515 (3.4287) weight_decay: 0.0500 (0.0500) time: 1.8033 data: 0.9920 max mem: 55573 Epoch: [169] [ 20/156] eta: 0:02:46 lr: 0.003592 min_lr: 0.003592 loss: 3.4649 (3.3885) weight_decay: 0.0500 (0.0500) time: 0.5990 data: 0.0004 max mem: 55573 Epoch: [169] [ 30/156] eta: 0:02:08 lr: 0.003589 min_lr: 0.003589 loss: 3.2838 (3.3803) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [169] [ 40/156] eta: 0:01:46 lr: 0.003586 min_lr: 0.003586 loss: 3.2838 (3.3229) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [169] [ 50/156] eta: 0:01:30 lr: 0.003583 min_lr: 0.003583 loss: 3.2997 (3.3169) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [169] [ 60/156] eta: 0:01:17 lr: 0.003580 min_lr: 0.003580 loss: 3.4389 (3.3469) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [169] [ 70/156] eta: 0:01:07 lr: 0.003577 min_lr: 0.003577 loss: 3.4389 (3.3230) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0005 max mem: 55573 Epoch: [169] [ 80/156] eta: 0:00:57 lr: 0.003574 min_lr: 0.003574 loss: 3.3858 (3.3339) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0005 max mem: 55573 Epoch: [169] [ 90/156] eta: 0:00:48 lr: 0.003572 min_lr: 0.003572 loss: 3.4142 (3.3304) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [169] [100/156] eta: 0:00:40 lr: 0.003569 min_lr: 0.003569 loss: 3.4166 (3.3332) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [169] [110/156] eta: 0:00:32 lr: 0.003566 min_lr: 0.003566 loss: 3.3614 (3.3240) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [169] [120/156] eta: 0:00:25 lr: 0.003563 min_lr: 0.003563 loss: 3.2351 (3.3112) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [169] [130/156] eta: 0:00:18 lr: 0.003560 min_lr: 0.003560 loss: 3.1327 (3.2993) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0010 max mem: 55573 Epoch: [169] [140/156] eta: 0:00:10 lr: 0.003557 min_lr: 0.003557 loss: 3.1327 (3.2958) weight_decay: 0.0500 (0.0500) time: 0.5846 data: 0.0009 max mem: 55573 Epoch: [169] [150/156] eta: 0:00:04 lr: 0.003554 min_lr: 0.003554 loss: 3.5203 (3.3017) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [169] [155/156] eta: 0:00:00 lr: 0.003553 min_lr: 0.003553 loss: 3.5203 (3.2990) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [169] Total time: 0:01:46 (0.6824 s / it) Averaged stats: lr: 0.003553 min_lr: 0.003553 loss: 3.5203 (3.2743) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.1328 (1.1328) acc1: 78.5156 (78.5156) acc5: 94.9219 (94.9219) time: 7.0755 data: 6.8379 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2125 (1.2411) acc1: 77.3438 (74.5600) acc5: 94.9219 (93.1520) time: 1.5617 data: 1.3677 max mem: 55573 Test: Total time: 0:00:08 (1.6032 s / it) * Acc@1 75.498 Acc@5 93.046 loss 1.235 Accuracy of the model on the 50000 test images: 75.5% Max accuracy: 75.91% Test: [0/5] eta: 0:00:34 loss: 1.3034 (1.3034) acc1: 68.2292 (68.2292) acc5: 87.3698 (87.3698) time: 6.9257 data: 6.6896 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3510 (1.3573) acc1: 65.6250 (63.2640) acc5: 87.3698 (85.1520) time: 1.5305 data: 1.3380 max mem: 55573 Test: Total time: 0:00:07 (1.5529 s / it) * Acc@1 63.172 Acc@5 85.920 loss 1.404 Accuracy of the model EMA on 50000 test images: 63.2% Max EMA accuracy: 63.17% Epoch: [170] [ 0/156] eta: 0:36:23 lr: 0.003553 min_lr: 0.003553 loss: 3.1506 (3.1506) weight_decay: 0.0500 (0.0500) time: 13.9980 data: 6.9488 max mem: 55573 Epoch: [170] [ 10/156] eta: 0:04:24 lr: 0.003550 min_lr: 0.003550 loss: 3.2551 (3.3371) weight_decay: 0.0500 (0.0500) time: 1.8086 data: 0.6320 max mem: 55573 Epoch: [170] [ 20/156] eta: 0:02:47 lr: 0.003547 min_lr: 0.003547 loss: 3.4668 (3.4467) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [170] [ 30/156] eta: 0:02:08 lr: 0.003544 min_lr: 0.003544 loss: 3.5501 (3.4317) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0007 max mem: 55573 Epoch: [170] [ 40/156] eta: 0:01:46 lr: 0.003541 min_lr: 0.003541 loss: 3.4264 (3.3936) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0007 max mem: 55573 Epoch: [170] [ 50/156] eta: 0:01:30 lr: 0.003538 min_lr: 0.003538 loss: 3.5620 (3.3952) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [170] [ 60/156] eta: 0:01:17 lr: 0.003536 min_lr: 0.003536 loss: 3.3671 (3.3731) weight_decay: 0.0500 (0.0500) time: 0.5937 data: 0.0005 max mem: 55573 Epoch: [170] [ 70/156] eta: 0:01:07 lr: 0.003533 min_lr: 0.003533 loss: 3.3671 (3.3664) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [170] [ 80/156] eta: 0:00:57 lr: 0.003530 min_lr: 0.003530 loss: 3.4728 (3.3488) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [170] [ 90/156] eta: 0:00:48 lr: 0.003527 min_lr: 0.003527 loss: 3.4111 (3.3274) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [170] [100/156] eta: 0:00:40 lr: 0.003524 min_lr: 0.003524 loss: 3.2208 (3.3175) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [170] [110/156] eta: 0:00:32 lr: 0.003521 min_lr: 0.003521 loss: 3.2208 (3.3007) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [170] [120/156] eta: 0:00:25 lr: 0.003518 min_lr: 0.003518 loss: 3.0416 (3.2752) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [170] [130/156] eta: 0:00:18 lr: 0.003516 min_lr: 0.003516 loss: 3.1865 (3.2797) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0010 max mem: 55573 Epoch: [170] [140/156] eta: 0:00:10 lr: 0.003513 min_lr: 0.003513 loss: 3.5606 (3.3036) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0008 max mem: 55573 Epoch: [170] [150/156] eta: 0:00:04 lr: 0.003510 min_lr: 0.003510 loss: 3.6638 (3.3165) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [170] [155/156] eta: 0:00:00 lr: 0.003508 min_lr: 0.003508 loss: 3.5606 (3.3171) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [170] Total time: 0:01:46 (0.6823 s / it) Averaged stats: lr: 0.003508 min_lr: 0.003508 loss: 3.5606 (3.2755) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0318 (1.0318) acc1: 80.7292 (80.7292) acc5: 95.1823 (95.1823) time: 7.1560 data: 6.9187 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1700 (1.2054) acc1: 75.6510 (74.8800) acc5: 94.1406 (92.8000) time: 1.5777 data: 1.3838 max mem: 55573 Test: Total time: 0:00:08 (1.6320 s / it) * Acc@1 75.520 Acc@5 92.826 loss 1.199 Accuracy of the model on the 50000 test images: 75.5% Max accuracy: 75.91% Test: [0/5] eta: 0:00:37 loss: 1.2919 (1.2919) acc1: 68.3594 (68.3594) acc5: 87.6302 (87.6302) time: 7.4454 data: 7.2095 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3401 (1.3457) acc1: 66.1458 (63.5840) acc5: 87.6302 (85.3120) time: 1.6344 data: 1.4420 max mem: 55573 Test: Total time: 0:00:08 (1.6539 s / it) * Acc@1 63.496 Acc@5 86.048 loss 1.391 Accuracy of the model EMA on 50000 test images: 63.5% Max EMA accuracy: 63.50% Epoch: [171] [ 0/156] eta: 0:33:38 lr: 0.003508 min_lr: 0.003508 loss: 2.9451 (2.9451) weight_decay: 0.0500 (0.0500) time: 12.9418 data: 9.2864 max mem: 55573 Epoch: [171] [ 10/156] eta: 0:04:16 lr: 0.003505 min_lr: 0.003505 loss: 3.3314 (3.2869) weight_decay: 0.0500 (0.0500) time: 1.7598 data: 0.8447 max mem: 55573 Epoch: [171] [ 20/156] eta: 0:02:43 lr: 0.003502 min_lr: 0.003502 loss: 3.3698 (3.3240) weight_decay: 0.0500 (0.0500) time: 0.6171 data: 0.0005 max mem: 55573 Epoch: [171] [ 30/156] eta: 0:02:06 lr: 0.003500 min_lr: 0.003500 loss: 3.5589 (3.3892) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0005 max mem: 55573 Epoch: [171] [ 40/156] eta: 0:01:45 lr: 0.003497 min_lr: 0.003497 loss: 3.3238 (3.3464) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [171] [ 50/156] eta: 0:01:29 lr: 0.003494 min_lr: 0.003494 loss: 3.3238 (3.3673) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [171] [ 60/156] eta: 0:01:17 lr: 0.003491 min_lr: 0.003491 loss: 3.4305 (3.3387) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [171] [ 70/156] eta: 0:01:06 lr: 0.003488 min_lr: 0.003488 loss: 3.3949 (3.3390) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [171] [ 80/156] eta: 0:00:56 lr: 0.003485 min_lr: 0.003485 loss: 3.2025 (3.3116) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [171] [ 90/156] eta: 0:00:48 lr: 0.003482 min_lr: 0.003482 loss: 3.2025 (3.2985) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [171] [100/156] eta: 0:00:40 lr: 0.003480 min_lr: 0.003480 loss: 3.2891 (3.2947) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [171] [110/156] eta: 0:00:32 lr: 0.003477 min_lr: 0.003477 loss: 3.4487 (3.3126) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [171] [120/156] eta: 0:00:25 lr: 0.003474 min_lr: 0.003474 loss: 3.5013 (3.3207) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [171] [130/156] eta: 0:00:17 lr: 0.003471 min_lr: 0.003471 loss: 3.5013 (3.3330) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0011 max mem: 55573 Epoch: [171] [140/156] eta: 0:00:10 lr: 0.003468 min_lr: 0.003468 loss: 3.3254 (3.3154) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0009 max mem: 55573 Epoch: [171] [150/156] eta: 0:00:04 lr: 0.003465 min_lr: 0.003465 loss: 3.4172 (3.3339) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [171] [155/156] eta: 0:00:00 lr: 0.003464 min_lr: 0.003464 loss: 3.4172 (3.3265) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [171] Total time: 0:01:45 (0.6787 s / it) Averaged stats: lr: 0.003464 min_lr: 0.003464 loss: 3.4172 (3.2968) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1004 (1.1004) acc1: 77.3438 (77.3438) acc5: 94.2708 (94.2708) time: 6.9444 data: 6.7071 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1641 (1.1487) acc1: 75.3906 (74.2080) acc5: 94.2708 (92.7360) time: 1.5354 data: 1.3415 max mem: 55573 Test: Total time: 0:00:07 (1.5781 s / it) * Acc@1 75.808 Acc@5 93.198 loss 1.127 Accuracy of the model on the 50000 test images: 75.8% Max accuracy: 75.91% Test: [0/5] eta: 0:00:35 loss: 1.2805 (1.2805) acc1: 68.7500 (68.7500) acc5: 87.8906 (87.8906) time: 7.1946 data: 6.9585 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3287 (1.3342) acc1: 66.2760 (63.7760) acc5: 87.8906 (85.6320) time: 1.5843 data: 1.3918 max mem: 55573 Test: Total time: 0:00:08 (1.6063 s / it) * Acc@1 63.724 Acc@5 86.234 loss 1.378 Accuracy of the model EMA on 50000 test images: 63.7% Max EMA accuracy: 63.72% Epoch: [172] [ 0/156] eta: 0:34:36 lr: 0.003464 min_lr: 0.003464 loss: 1.9981 (1.9981) weight_decay: 0.0500 (0.0500) time: 13.3121 data: 10.6206 max mem: 55573 Epoch: [172] [ 10/156] eta: 0:04:16 lr: 0.003461 min_lr: 0.003461 loss: 3.0789 (3.0152) weight_decay: 0.0500 (0.0500) time: 1.7582 data: 0.9659 max mem: 55573 Epoch: [172] [ 20/156] eta: 0:02:43 lr: 0.003458 min_lr: 0.003458 loss: 3.3347 (3.2198) weight_decay: 0.0500 (0.0500) time: 0.5963 data: 0.0004 max mem: 55573 Epoch: [172] [ 30/156] eta: 0:02:06 lr: 0.003455 min_lr: 0.003455 loss: 3.4907 (3.3058) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [172] [ 40/156] eta: 0:01:44 lr: 0.003452 min_lr: 0.003452 loss: 3.5233 (3.3276) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [172] [ 50/156] eta: 0:01:29 lr: 0.003449 min_lr: 0.003449 loss: 3.4476 (3.3417) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [172] [ 60/156] eta: 0:01:16 lr: 0.003447 min_lr: 0.003447 loss: 3.3819 (3.2983) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [172] [ 70/156] eta: 0:01:06 lr: 0.003444 min_lr: 0.003444 loss: 3.3731 (3.3269) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [172] [ 80/156] eta: 0:00:56 lr: 0.003441 min_lr: 0.003441 loss: 3.4036 (3.3169) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [172] [ 90/156] eta: 0:00:48 lr: 0.003438 min_lr: 0.003438 loss: 3.4426 (3.3204) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [172] [100/156] eta: 0:00:40 lr: 0.003435 min_lr: 0.003435 loss: 3.4426 (3.3029) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [172] [110/156] eta: 0:00:32 lr: 0.003432 min_lr: 0.003432 loss: 3.2991 (3.3034) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [172] [120/156] eta: 0:00:25 lr: 0.003429 min_lr: 0.003429 loss: 3.4305 (3.3150) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [172] [130/156] eta: 0:00:17 lr: 0.003427 min_lr: 0.003427 loss: 3.5930 (3.3192) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0011 max mem: 55573 Epoch: [172] [140/156] eta: 0:00:10 lr: 0.003424 min_lr: 0.003424 loss: 3.4827 (3.3188) weight_decay: 0.0500 (0.0500) time: 0.5854 data: 0.0009 max mem: 55573 Epoch: [172] [150/156] eta: 0:00:04 lr: 0.003421 min_lr: 0.003421 loss: 3.2962 (3.3084) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [172] [155/156] eta: 0:00:00 lr: 0.003419 min_lr: 0.003419 loss: 3.2962 (3.3137) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [172] Total time: 0:01:45 (0.6783 s / it) Averaged stats: lr: 0.003419 min_lr: 0.003419 loss: 3.2962 (3.2792) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.0943 (1.0943) acc1: 79.1667 (79.1667) acc5: 95.8333 (95.8333) time: 6.6853 data: 6.4481 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2018 (1.2069) acc1: 77.4740 (75.5200) acc5: 95.4427 (93.5040) time: 1.4836 data: 1.2897 max mem: 55573 Test: Total time: 0:00:07 (1.5302 s / it) * Acc@1 75.894 Acc@5 93.172 loss 1.208 Accuracy of the model on the 50000 test images: 75.9% Max accuracy: 75.91% Test: [0/5] eta: 0:00:35 loss: 1.2695 (1.2695) acc1: 69.2708 (69.2708) acc5: 88.4115 (88.4115) time: 7.0569 data: 6.8209 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3176 (1.3228) acc1: 66.5365 (64.0640) acc5: 88.4115 (85.7920) time: 1.5567 data: 1.3643 max mem: 55573 Test: Total time: 0:00:07 (1.5717 s / it) * Acc@1 64.024 Acc@5 86.426 loss 1.366 Accuracy of the model EMA on 50000 test images: 64.0% Max EMA accuracy: 64.02% Epoch: [173] [ 0/156] eta: 0:33:39 lr: 0.003419 min_lr: 0.003419 loss: 2.9573 (2.9573) weight_decay: 0.0500 (0.0500) time: 12.9480 data: 11.8584 max mem: 55573 Epoch: [173] [ 10/156] eta: 0:04:10 lr: 0.003416 min_lr: 0.003416 loss: 3.3653 (3.3091) weight_decay: 0.0500 (0.0500) time: 1.7191 data: 1.0784 max mem: 55573 Epoch: [173] [ 20/156] eta: 0:02:40 lr: 0.003414 min_lr: 0.003414 loss: 3.1539 (3.2005) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [173] [ 30/156] eta: 0:02:04 lr: 0.003411 min_lr: 0.003411 loss: 3.1092 (3.1935) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [173] [ 40/156] eta: 0:01:43 lr: 0.003408 min_lr: 0.003408 loss: 3.3489 (3.2424) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [173] [ 50/156] eta: 0:01:28 lr: 0.003405 min_lr: 0.003405 loss: 3.4181 (3.2194) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [173] [ 60/156] eta: 0:01:16 lr: 0.003402 min_lr: 0.003402 loss: 3.3974 (3.2653) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [173] [ 70/156] eta: 0:01:05 lr: 0.003399 min_lr: 0.003399 loss: 3.3974 (3.2505) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [173] [ 80/156] eta: 0:00:56 lr: 0.003396 min_lr: 0.003396 loss: 3.2467 (3.2520) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [173] [ 90/156] eta: 0:00:47 lr: 0.003394 min_lr: 0.003394 loss: 3.4566 (3.2544) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [173] [100/156] eta: 0:00:39 lr: 0.003391 min_lr: 0.003391 loss: 3.4908 (3.2806) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [173] [110/156] eta: 0:00:32 lr: 0.003388 min_lr: 0.003388 loss: 3.5601 (3.2988) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [173] [120/156] eta: 0:00:24 lr: 0.003385 min_lr: 0.003385 loss: 3.4639 (3.2993) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [173] [130/156] eta: 0:00:17 lr: 0.003382 min_lr: 0.003382 loss: 3.4639 (3.3122) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0010 max mem: 55573 Epoch: [173] [140/156] eta: 0:00:10 lr: 0.003379 min_lr: 0.003379 loss: 3.5360 (3.3220) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0008 max mem: 55573 Epoch: [173] [150/156] eta: 0:00:04 lr: 0.003377 min_lr: 0.003377 loss: 3.5317 (3.3169) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [173] [155/156] eta: 0:00:00 lr: 0.003375 min_lr: 0.003375 loss: 3.4867 (3.3121) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [173] Total time: 0:01:45 (0.6754 s / it) Averaged stats: lr: 0.003375 min_lr: 0.003375 loss: 3.4867 (3.2824) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0118 (1.0118) acc1: 80.0781 (80.0781) acc5: 94.7917 (94.7917) time: 7.1260 data: 6.8885 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1646 (1.1686) acc1: 76.5625 (75.3280) acc5: 94.3396 (92.5760) time: 1.5717 data: 1.3778 max mem: 55573 Test: Total time: 0:00:08 (1.6105 s / it) * Acc@1 76.220 Acc@5 93.182 loss 1.138 Accuracy of the model on the 50000 test images: 76.2% Max accuracy: 76.22% Test: [0/5] eta: 0:00:35 loss: 1.2592 (1.2592) acc1: 69.1406 (69.1406) acc5: 88.6719 (88.6719) time: 7.1866 data: 6.9505 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.3073 (1.3125) acc1: 66.6667 (64.2880) acc5: 88.6719 (85.9840) time: 1.5826 data: 1.3902 max mem: 55573 Test: Total time: 0:00:07 (1.6000 s / it) * Acc@1 64.290 Acc@5 86.546 loss 1.355 Accuracy of the model EMA on 50000 test images: 64.3% Max EMA accuracy: 64.29% Epoch: [174] [ 0/156] eta: 0:36:34 lr: 0.003375 min_lr: 0.003375 loss: 3.1354 (3.1354) weight_decay: 0.0500 (0.0500) time: 14.0689 data: 13.4762 max mem: 55573 Epoch: [174] [ 10/156] eta: 0:04:26 lr: 0.003372 min_lr: 0.003372 loss: 3.4660 (3.5060) weight_decay: 0.0500 (0.0500) time: 1.8262 data: 1.2254 max mem: 55573 Epoch: [174] [ 20/156] eta: 0:02:48 lr: 0.003369 min_lr: 0.003369 loss: 3.5441 (3.4649) weight_decay: 0.0500 (0.0500) time: 0.5962 data: 0.0003 max mem: 55573 Epoch: [174] [ 30/156] eta: 0:02:09 lr: 0.003366 min_lr: 0.003366 loss: 3.3503 (3.3885) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [174] [ 40/156] eta: 0:01:47 lr: 0.003363 min_lr: 0.003363 loss: 3.1823 (3.3401) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [174] [ 50/156] eta: 0:01:30 lr: 0.003361 min_lr: 0.003361 loss: 3.4232 (3.3539) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [174] [ 60/156] eta: 0:01:18 lr: 0.003358 min_lr: 0.003358 loss: 3.4349 (3.3592) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [174] [ 70/156] eta: 0:01:07 lr: 0.003355 min_lr: 0.003355 loss: 3.1996 (3.3145) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [174] [ 80/156] eta: 0:00:57 lr: 0.003352 min_lr: 0.003352 loss: 3.0489 (3.2838) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [174] [ 90/156] eta: 0:00:48 lr: 0.003349 min_lr: 0.003349 loss: 3.0246 (3.2564) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [174] [100/156] eta: 0:00:40 lr: 0.003346 min_lr: 0.003346 loss: 3.0199 (3.2347) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [174] [110/156] eta: 0:00:32 lr: 0.003344 min_lr: 0.003344 loss: 3.3272 (3.2579) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [174] [120/156] eta: 0:00:25 lr: 0.003341 min_lr: 0.003341 loss: 3.5025 (3.2795) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [174] [130/156] eta: 0:00:18 lr: 0.003338 min_lr: 0.003338 loss: 3.4988 (3.2813) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0012 max mem: 55573 Epoch: [174] [140/156] eta: 0:00:10 lr: 0.003335 min_lr: 0.003335 loss: 3.2764 (3.2741) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0011 max mem: 55573 Epoch: [174] [150/156] eta: 0:00:04 lr: 0.003332 min_lr: 0.003332 loss: 3.4330 (3.2713) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [174] [155/156] eta: 0:00:00 lr: 0.003331 min_lr: 0.003331 loss: 3.4330 (3.2715) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [174] Total time: 0:01:46 (0.6829 s / it) Averaged stats: lr: 0.003331 min_lr: 0.003331 loss: 3.4330 (3.2832) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9766 (0.9766) acc1: 78.6458 (78.6458) acc5: 95.0521 (95.0521) time: 6.8768 data: 6.6381 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0571 (1.1047) acc1: 77.4740 (74.5280) acc5: 94.6615 (92.7040) time: 1.5221 data: 1.3277 max mem: 55573 Test: Total time: 0:00:07 (1.5687 s / it) * Acc@1 75.816 Acc@5 93.344 loss 1.060 Accuracy of the model on the 50000 test images: 75.8% Max accuracy: 76.22% Test: [0/5] eta: 0:00:36 loss: 1.2497 (1.2497) acc1: 69.1406 (69.1406) acc5: 88.9323 (88.9323) time: 7.2079 data: 6.9719 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2977 (1.3027) acc1: 66.9271 (64.4160) acc5: 88.9323 (86.1120) time: 1.5870 data: 1.3945 max mem: 55573 Test: Total time: 0:00:08 (1.6059 s / it) * Acc@1 64.544 Acc@5 86.674 loss 1.345 Accuracy of the model EMA on 50000 test images: 64.5% Max EMA accuracy: 64.54% Epoch: [175] [ 0/156] eta: 0:34:36 lr: 0.003331 min_lr: 0.003331 loss: 3.1027 (3.1027) weight_decay: 0.0500 (0.0500) time: 13.3134 data: 7.5627 max mem: 55573 Epoch: [175] [ 10/156] eta: 0:04:15 lr: 0.003328 min_lr: 0.003328 loss: 3.1883 (3.1366) weight_decay: 0.0500 (0.0500) time: 1.7513 data: 0.6880 max mem: 55573 Epoch: [175] [ 20/156] eta: 0:02:42 lr: 0.003325 min_lr: 0.003325 loss: 3.3915 (3.2378) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [175] [ 30/156] eta: 0:02:06 lr: 0.003322 min_lr: 0.003322 loss: 3.5003 (3.2963) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [175] [ 40/156] eta: 0:01:44 lr: 0.003319 min_lr: 0.003319 loss: 3.3575 (3.2458) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [175] [ 50/156] eta: 0:01:29 lr: 0.003316 min_lr: 0.003316 loss: 3.1779 (3.2560) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [175] [ 60/156] eta: 0:01:16 lr: 0.003314 min_lr: 0.003314 loss: 3.5103 (3.2500) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [175] [ 70/156] eta: 0:01:06 lr: 0.003311 min_lr: 0.003311 loss: 3.1300 (3.2308) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [175] [ 80/156] eta: 0:00:56 lr: 0.003308 min_lr: 0.003308 loss: 3.0766 (3.2538) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [175] [ 90/156] eta: 0:00:48 lr: 0.003305 min_lr: 0.003305 loss: 3.5213 (3.2723) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [175] [100/156] eta: 0:00:40 lr: 0.003302 min_lr: 0.003302 loss: 3.4727 (3.2774) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [175] [110/156] eta: 0:00:32 lr: 0.003299 min_lr: 0.003299 loss: 3.4463 (3.2674) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [175] [120/156] eta: 0:00:25 lr: 0.003297 min_lr: 0.003297 loss: 3.4256 (3.2642) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [175] [130/156] eta: 0:00:17 lr: 0.003294 min_lr: 0.003294 loss: 3.3418 (3.2566) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0010 max mem: 55573 Epoch: [175] [140/156] eta: 0:00:10 lr: 0.003291 min_lr: 0.003291 loss: 3.2226 (3.2663) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0009 max mem: 55573 Epoch: [175] [150/156] eta: 0:00:04 lr: 0.003288 min_lr: 0.003288 loss: 3.3414 (3.2593) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [175] [155/156] eta: 0:00:00 lr: 0.003287 min_lr: 0.003287 loss: 3.4696 (3.2616) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [175] Total time: 0:01:45 (0.6774 s / it) Averaged stats: lr: 0.003287 min_lr: 0.003287 loss: 3.4696 (3.2734) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.0584 (1.0584) acc1: 80.4688 (80.4688) acc5: 94.7917 (94.7917) time: 6.7403 data: 6.5015 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1665 (1.1906) acc1: 76.6927 (75.3280) acc5: 92.4528 (93.3760) time: 1.4946 data: 1.3004 max mem: 55573 Test: Total time: 0:00:07 (1.5343 s / it) * Acc@1 76.074 Acc@5 93.350 loss 1.179 Accuracy of the model on the 50000 test images: 76.1% Max accuracy: 76.22% Test: [0/5] eta: 0:00:33 loss: 1.2408 (1.2408) acc1: 69.7917 (69.7917) acc5: 88.9323 (88.9323) time: 6.7549 data: 6.5188 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2889 (1.2935) acc1: 67.0573 (64.7360) acc5: 88.9323 (86.3040) time: 1.5034 data: 1.3110 max mem: 55573 Test: Total time: 0:00:07 (1.5244 s / it) * Acc@1 64.796 Acc@5 86.796 loss 1.335 Accuracy of the model EMA on 50000 test images: 64.8% Max EMA accuracy: 64.80% Epoch: [176] [ 0/156] eta: 0:30:50 lr: 0.003286 min_lr: 0.003286 loss: 2.2741 (2.2741) weight_decay: 0.0500 (0.0500) time: 11.8596 data: 11.2657 max mem: 55573 Epoch: [176] [ 10/156] eta: 0:04:17 lr: 0.003284 min_lr: 0.003284 loss: 2.8888 (2.9603) weight_decay: 0.0500 (0.0500) time: 1.7665 data: 1.0245 max mem: 55573 Epoch: [176] [ 20/156] eta: 0:02:44 lr: 0.003281 min_lr: 0.003281 loss: 3.2091 (3.0977) weight_decay: 0.0500 (0.0500) time: 0.6735 data: 0.0004 max mem: 55573 Epoch: [176] [ 30/156] eta: 0:02:06 lr: 0.003278 min_lr: 0.003278 loss: 3.3040 (3.1744) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [176] [ 40/156] eta: 0:01:45 lr: 0.003275 min_lr: 0.003275 loss: 3.5474 (3.2061) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [176] [ 50/156] eta: 0:01:29 lr: 0.003272 min_lr: 0.003272 loss: 3.3302 (3.2094) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [176] [ 60/156] eta: 0:01:17 lr: 0.003269 min_lr: 0.003269 loss: 3.2380 (3.1904) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [176] [ 70/156] eta: 0:01:06 lr: 0.003267 min_lr: 0.003267 loss: 3.4476 (3.2058) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [176] [ 80/156] eta: 0:00:57 lr: 0.003264 min_lr: 0.003264 loss: 3.4541 (3.2250) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [176] [ 90/156] eta: 0:00:48 lr: 0.003261 min_lr: 0.003261 loss: 3.5296 (3.2374) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [176] [100/156] eta: 0:00:40 lr: 0.003258 min_lr: 0.003258 loss: 3.5583 (3.2590) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [176] [110/156] eta: 0:00:32 lr: 0.003255 min_lr: 0.003255 loss: 3.4813 (3.2486) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [176] [120/156] eta: 0:00:25 lr: 0.003252 min_lr: 0.003252 loss: 3.1747 (3.2366) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [176] [130/156] eta: 0:00:17 lr: 0.003250 min_lr: 0.003250 loss: 3.2247 (3.2421) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0010 max mem: 55573 Epoch: [176] [140/156] eta: 0:00:10 lr: 0.003247 min_lr: 0.003247 loss: 3.2247 (3.2356) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0008 max mem: 55573 Epoch: [176] [150/156] eta: 0:00:04 lr: 0.003244 min_lr: 0.003244 loss: 3.2452 (3.2419) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [176] [155/156] eta: 0:00:00 lr: 0.003243 min_lr: 0.003243 loss: 3.4042 (3.2463) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [176] Total time: 0:01:45 (0.6788 s / it) Averaged stats: lr: 0.003243 min_lr: 0.003243 loss: 3.4042 (3.2728) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0005 (1.0005) acc1: 79.8177 (79.8177) acc5: 95.4427 (95.4427) time: 6.9840 data: 6.7461 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0924 (1.1486) acc1: 77.0833 (74.8160) acc5: 94.7917 (93.0880) time: 1.5434 data: 1.3493 max mem: 55573 Test: Total time: 0:00:07 (1.5847 s / it) * Acc@1 76.128 Acc@5 93.500 loss 1.134 Accuracy of the model on the 50000 test images: 76.1% Max accuracy: 76.22% Test: [0/5] eta: 0:00:34 loss: 1.2325 (1.2325) acc1: 70.0521 (70.0521) acc5: 88.9323 (88.9323) time: 6.9733 data: 6.7371 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2799 (1.2845) acc1: 67.3177 (64.9600) acc5: 88.9323 (86.4000) time: 1.5399 data: 1.3475 max mem: 55573 Test: Total time: 0:00:07 (1.5565 s / it) * Acc@1 65.020 Acc@5 86.944 loss 1.325 Accuracy of the model EMA on 50000 test images: 65.0% Max EMA accuracy: 65.02% Epoch: [177] [ 0/156] eta: 0:36:45 lr: 0.003242 min_lr: 0.003242 loss: 3.3135 (3.3135) weight_decay: 0.0500 (0.0500) time: 14.1375 data: 13.5464 max mem: 55573 Epoch: [177] [ 10/156] eta: 0:04:27 lr: 0.003239 min_lr: 0.003239 loss: 2.9435 (3.0139) weight_decay: 0.0500 (0.0500) time: 1.8296 data: 1.2318 max mem: 55573 Epoch: [177] [ 20/156] eta: 0:02:48 lr: 0.003237 min_lr: 0.003237 loss: 3.1837 (3.1866) weight_decay: 0.0500 (0.0500) time: 0.5946 data: 0.0004 max mem: 55573 Epoch: [177] [ 30/156] eta: 0:02:09 lr: 0.003234 min_lr: 0.003234 loss: 3.2648 (3.1899) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [177] [ 40/156] eta: 0:01:47 lr: 0.003231 min_lr: 0.003231 loss: 3.2025 (3.1833) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [177] [ 50/156] eta: 0:01:30 lr: 0.003228 min_lr: 0.003228 loss: 3.3416 (3.2419) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [177] [ 60/156] eta: 0:01:18 lr: 0.003225 min_lr: 0.003225 loss: 3.5048 (3.2502) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [177] [ 70/156] eta: 0:01:07 lr: 0.003222 min_lr: 0.003222 loss: 3.3541 (3.2173) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [177] [ 80/156] eta: 0:00:57 lr: 0.003220 min_lr: 0.003220 loss: 3.3767 (3.2290) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [177] [ 90/156] eta: 0:00:48 lr: 0.003217 min_lr: 0.003217 loss: 3.4290 (3.2409) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [177] [100/156] eta: 0:00:40 lr: 0.003214 min_lr: 0.003214 loss: 3.3581 (3.2217) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [177] [110/156] eta: 0:00:32 lr: 0.003211 min_lr: 0.003211 loss: 3.2712 (3.2328) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0006 max mem: 55573 Epoch: [177] [120/156] eta: 0:00:25 lr: 0.003208 min_lr: 0.003208 loss: 3.4100 (3.2336) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [177] [130/156] eta: 0:00:18 lr: 0.003206 min_lr: 0.003206 loss: 3.1775 (3.2193) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0010 max mem: 55573 Epoch: [177] [140/156] eta: 0:00:10 lr: 0.003203 min_lr: 0.003203 loss: 3.1551 (3.2200) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0008 max mem: 55573 Epoch: [177] [150/156] eta: 0:00:04 lr: 0.003200 min_lr: 0.003200 loss: 3.2598 (3.2208) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [177] [155/156] eta: 0:00:00 lr: 0.003199 min_lr: 0.003199 loss: 3.0845 (3.2153) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [177] Total time: 0:01:46 (0.6834 s / it) Averaged stats: lr: 0.003199 min_lr: 0.003199 loss: 3.0845 (3.2573) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9392 (0.9392) acc1: 80.4688 (80.4688) acc5: 96.4844 (96.4844) time: 6.9673 data: 6.7299 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0856 (1.0878) acc1: 76.6927 (76.2560) acc5: 94.3396 (93.8560) time: 1.5398 data: 1.3461 max mem: 55573 Test: Total time: 0:00:07 (1.5866 s / it) * Acc@1 76.670 Acc@5 93.562 loss 1.056 Accuracy of the model on the 50000 test images: 76.7% Max accuracy: 76.67% Test: [0/5] eta: 0:00:35 loss: 1.2245 (1.2245) acc1: 70.1823 (70.1823) acc5: 88.8021 (88.8021) time: 7.1398 data: 6.9038 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2715 (1.2759) acc1: 67.4479 (65.1840) acc5: 88.8021 (86.3680) time: 1.5732 data: 1.3809 max mem: 55573 Test: Total time: 0:00:07 (1.5909 s / it) * Acc@1 65.230 Acc@5 87.042 loss 1.316 Accuracy of the model EMA on 50000 test images: 65.2% Max EMA accuracy: 65.23% Epoch: [178] [ 0/156] eta: 0:30:43 lr: 0.003198 min_lr: 0.003198 loss: 3.8978 (3.8978) weight_decay: 0.0500 (0.0500) time: 11.8202 data: 11.1327 max mem: 55573 Epoch: [178] [ 10/156] eta: 0:04:17 lr: 0.003195 min_lr: 0.003195 loss: 3.4391 (3.3445) weight_decay: 0.0500 (0.0500) time: 1.7658 data: 1.1576 max mem: 55573 Epoch: [178] [ 20/156] eta: 0:02:44 lr: 0.003193 min_lr: 0.003193 loss: 3.3267 (3.2882) weight_decay: 0.0500 (0.0500) time: 0.6758 data: 0.0802 max mem: 55573 Epoch: [178] [ 30/156] eta: 0:02:07 lr: 0.003190 min_lr: 0.003190 loss: 3.2584 (3.2715) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [178] [ 40/156] eta: 0:01:45 lr: 0.003187 min_lr: 0.003187 loss: 3.3984 (3.3107) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [178] [ 50/156] eta: 0:01:29 lr: 0.003184 min_lr: 0.003184 loss: 3.4751 (3.3009) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0005 max mem: 55573 Epoch: [178] [ 60/156] eta: 0:01:17 lr: 0.003181 min_lr: 0.003181 loss: 3.4751 (3.3146) weight_decay: 0.0500 (0.0500) time: 0.5944 data: 0.0005 max mem: 55573 Epoch: [178] [ 70/156] eta: 0:01:06 lr: 0.003179 min_lr: 0.003179 loss: 3.4792 (3.3157) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [178] [ 80/156] eta: 0:00:57 lr: 0.003176 min_lr: 0.003176 loss: 3.4922 (3.3336) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [178] [ 90/156] eta: 0:00:48 lr: 0.003173 min_lr: 0.003173 loss: 3.5301 (3.3507) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [178] [100/156] eta: 0:00:40 lr: 0.003170 min_lr: 0.003170 loss: 3.5301 (3.3532) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [178] [110/156] eta: 0:00:32 lr: 0.003167 min_lr: 0.003167 loss: 3.4994 (3.3586) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [178] [120/156] eta: 0:00:25 lr: 0.003164 min_lr: 0.003164 loss: 3.3173 (3.3428) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0005 max mem: 55573 Epoch: [178] [130/156] eta: 0:00:17 lr: 0.003162 min_lr: 0.003162 loss: 3.2322 (3.3241) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0011 max mem: 55573 Epoch: [178] [140/156] eta: 0:00:10 lr: 0.003159 min_lr: 0.003159 loss: 3.1083 (3.3030) weight_decay: 0.0500 (0.0500) time: 0.5850 data: 0.0009 max mem: 55573 Epoch: [178] [150/156] eta: 0:00:04 lr: 0.003156 min_lr: 0.003156 loss: 3.1298 (3.3003) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [178] [155/156] eta: 0:00:00 lr: 0.003155 min_lr: 0.003155 loss: 3.2504 (3.3022) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [178] Total time: 0:01:45 (0.6790 s / it) Averaged stats: lr: 0.003155 min_lr: 0.003155 loss: 3.2504 (3.2503) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0119 (1.0119) acc1: 79.4271 (79.4271) acc5: 96.2240 (96.2240) time: 6.9978 data: 6.7609 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0559 (1.0775) acc1: 78.5156 (76.0960) acc5: 94.3396 (94.0160) time: 1.5459 data: 1.3522 max mem: 55573 Test: Total time: 0:00:07 (1.5887 s / it) * Acc@1 76.692 Acc@5 93.662 loss 1.067 Accuracy of the model on the 50000 test images: 76.7% Max accuracy: 76.69% Test: [0/5] eta: 0:00:34 loss: 1.2167 (1.2167) acc1: 70.3125 (70.3125) acc5: 88.8021 (88.8021) time: 6.8675 data: 6.6313 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2630 (1.2676) acc1: 67.7083 (65.5040) acc5: 88.8021 (86.4640) time: 1.5189 data: 1.3264 max mem: 55573 Test: Total time: 0:00:07 (1.5341 s / it) * Acc@1 65.470 Acc@5 87.162 loss 1.307 Accuracy of the model EMA on 50000 test images: 65.5% Max EMA accuracy: 65.47% Epoch: [179] [ 0/156] eta: 0:35:14 lr: 0.003154 min_lr: 0.003154 loss: 3.7500 (3.7500) weight_decay: 0.0500 (0.0500) time: 13.5523 data: 9.3909 max mem: 55573 Epoch: [179] [ 10/156] eta: 0:04:19 lr: 0.003152 min_lr: 0.003152 loss: 3.2805 (3.2471) weight_decay: 0.0500 (0.0500) time: 1.7768 data: 0.8541 max mem: 55573 Epoch: [179] [ 20/156] eta: 0:02:44 lr: 0.003149 min_lr: 0.003149 loss: 3.1178 (3.1645) weight_decay: 0.0500 (0.0500) time: 0.5947 data: 0.0004 max mem: 55573 Epoch: [179] [ 30/156] eta: 0:02:07 lr: 0.003146 min_lr: 0.003146 loss: 3.2448 (3.1877) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [179] [ 40/156] eta: 0:01:45 lr: 0.003143 min_lr: 0.003143 loss: 3.3944 (3.2358) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [179] [ 50/156] eta: 0:01:29 lr: 0.003140 min_lr: 0.003140 loss: 3.4239 (3.2637) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [179] [ 60/156] eta: 0:01:17 lr: 0.003137 min_lr: 0.003137 loss: 3.3902 (3.2418) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [179] [ 70/156] eta: 0:01:06 lr: 0.003135 min_lr: 0.003135 loss: 3.1629 (3.2435) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [179] [ 80/156] eta: 0:00:57 lr: 0.003132 min_lr: 0.003132 loss: 3.2138 (3.2530) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [179] [ 90/156] eta: 0:00:48 lr: 0.003129 min_lr: 0.003129 loss: 3.3655 (3.2405) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [179] [100/156] eta: 0:00:40 lr: 0.003126 min_lr: 0.003126 loss: 3.3510 (3.2380) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [179] [110/156] eta: 0:00:32 lr: 0.003123 min_lr: 0.003123 loss: 3.3510 (3.2357) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [179] [120/156] eta: 0:00:25 lr: 0.003121 min_lr: 0.003121 loss: 3.3592 (3.2391) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [179] [130/156] eta: 0:00:17 lr: 0.003118 min_lr: 0.003118 loss: 3.3588 (3.2358) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [179] [140/156] eta: 0:00:10 lr: 0.003115 min_lr: 0.003115 loss: 3.2952 (3.2326) weight_decay: 0.0500 (0.0500) time: 0.5848 data: 0.0009 max mem: 55573 Epoch: [179] [150/156] eta: 0:00:04 lr: 0.003112 min_lr: 0.003112 loss: 3.3565 (3.2305) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [179] [155/156] eta: 0:00:00 lr: 0.003111 min_lr: 0.003111 loss: 3.3778 (3.2419) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [179] Total time: 0:01:46 (0.6800 s / it) Averaged stats: lr: 0.003111 min_lr: 0.003111 loss: 3.3778 (3.2505) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.1875 (1.1875) acc1: 79.1667 (79.1667) acc5: 94.5312 (94.5312) time: 6.6622 data: 6.4249 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2378 (1.2913) acc1: 76.9531 (74.4960) acc5: 94.5312 (92.9920) time: 1.5158 data: 1.3230 max mem: 55573 Test: Total time: 0:00:07 (1.5331 s / it) * Acc@1 75.910 Acc@5 93.216 loss 1.293 Accuracy of the model on the 50000 test images: 75.9% Max accuracy: 76.69% Test: [0/5] eta: 0:00:34 loss: 1.2094 (1.2094) acc1: 70.5729 (70.5729) acc5: 89.0625 (89.0625) time: 6.9014 data: 6.6654 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2551 (1.2597) acc1: 67.8385 (65.6640) acc5: 89.0625 (86.6880) time: 1.5256 data: 1.3332 max mem: 55573 Test: Total time: 0:00:07 (1.5468 s / it) * Acc@1 65.654 Acc@5 87.280 loss 1.298 Accuracy of the model EMA on 50000 test images: 65.7% Max EMA accuracy: 65.65% Epoch: [180] [ 0/156] eta: 0:34:33 lr: 0.003111 min_lr: 0.003111 loss: 2.7331 (2.7331) weight_decay: 0.0500 (0.0500) time: 13.2908 data: 12.6910 max mem: 55573 Epoch: [180] [ 10/156] eta: 0:04:14 lr: 0.003108 min_lr: 0.003108 loss: 3.5256 (3.5054) weight_decay: 0.0500 (0.0500) time: 1.7459 data: 1.1541 max mem: 55573 Epoch: [180] [ 20/156] eta: 0:02:42 lr: 0.003105 min_lr: 0.003105 loss: 3.4225 (3.2326) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [180] [ 30/156] eta: 0:02:06 lr: 0.003102 min_lr: 0.003102 loss: 3.1784 (3.2419) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [180] [ 40/156] eta: 0:01:44 lr: 0.003099 min_lr: 0.003099 loss: 3.2303 (3.2277) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [180] [ 50/156] eta: 0:01:29 lr: 0.003097 min_lr: 0.003097 loss: 3.2303 (3.2251) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [180] [ 60/156] eta: 0:01:16 lr: 0.003094 min_lr: 0.003094 loss: 3.0405 (3.1715) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [180] [ 70/156] eta: 0:01:06 lr: 0.003091 min_lr: 0.003091 loss: 3.3871 (3.2284) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [180] [ 80/156] eta: 0:00:56 lr: 0.003088 min_lr: 0.003088 loss: 3.5254 (3.2172) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [180] [ 90/156] eta: 0:00:48 lr: 0.003085 min_lr: 0.003085 loss: 3.2835 (3.2224) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [180] [100/156] eta: 0:00:40 lr: 0.003083 min_lr: 0.003083 loss: 3.3057 (3.2360) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0005 max mem: 55573 Epoch: [180] [110/156] eta: 0:00:32 lr: 0.003080 min_lr: 0.003080 loss: 3.3057 (3.2285) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [180] [120/156] eta: 0:00:25 lr: 0.003077 min_lr: 0.003077 loss: 3.2532 (3.2218) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [180] [130/156] eta: 0:00:17 lr: 0.003074 min_lr: 0.003074 loss: 3.3593 (3.2270) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0011 max mem: 55573 Epoch: [180] [140/156] eta: 0:00:10 lr: 0.003071 min_lr: 0.003071 loss: 3.1723 (3.2194) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [180] [150/156] eta: 0:00:04 lr: 0.003069 min_lr: 0.003069 loss: 3.3338 (3.2348) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [180] [155/156] eta: 0:00:00 lr: 0.003067 min_lr: 0.003067 loss: 3.3246 (3.2286) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [180] Total time: 0:01:45 (0.6773 s / it) Averaged stats: lr: 0.003067 min_lr: 0.003067 loss: 3.3246 (3.2505) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.1408 (1.1408) acc1: 80.2083 (80.2083) acc5: 95.5729 (95.5729) time: 6.9384 data: 6.7011 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2563 (1.2629) acc1: 78.5156 (75.9360) acc5: 94.5312 (92.8320) time: 1.5346 data: 1.3410 max mem: 55573 Test: Total time: 0:00:07 (1.5759 s / it) * Acc@1 76.466 Acc@5 93.314 loss 1.258 Accuracy of the model on the 50000 test images: 76.5% Max accuracy: 76.69% Test: [0/5] eta: 0:00:36 loss: 1.2014 (1.2014) acc1: 70.7031 (70.7031) acc5: 89.0625 (89.0625) time: 7.2497 data: 7.0137 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2470 (1.2517) acc1: 67.8385 (65.6640) acc5: 89.0625 (86.7520) time: 1.5955 data: 1.4028 max mem: 55573 Test: Total time: 0:00:08 (1.6208 s / it) * Acc@1 65.812 Acc@5 87.378 loss 1.290 Accuracy of the model EMA on 50000 test images: 65.8% Max EMA accuracy: 65.81% Epoch: [181] [ 0/156] eta: 0:30:43 lr: 0.003067 min_lr: 0.003067 loss: 3.8795 (3.8795) weight_decay: 0.0500 (0.0500) time: 11.8165 data: 8.0116 max mem: 55573 Epoch: [181] [ 10/156] eta: 0:04:25 lr: 0.003064 min_lr: 0.003064 loss: 3.1953 (3.0868) weight_decay: 0.0500 (0.0500) time: 1.8181 data: 0.9163 max mem: 55573 Epoch: [181] [ 20/156] eta: 0:02:47 lr: 0.003061 min_lr: 0.003061 loss: 3.2930 (3.2422) weight_decay: 0.0500 (0.0500) time: 0.7034 data: 0.1036 max mem: 55573 Epoch: [181] [ 30/156] eta: 0:02:09 lr: 0.003058 min_lr: 0.003058 loss: 3.4727 (3.3112) weight_decay: 0.0500 (0.0500) time: 0.5985 data: 0.0005 max mem: 55573 Epoch: [181] [ 40/156] eta: 0:01:47 lr: 0.003056 min_lr: 0.003056 loss: 3.3798 (3.3053) weight_decay: 0.0500 (0.0500) time: 0.6012 data: 0.0005 max mem: 55573 Epoch: [181] [ 50/156] eta: 0:01:31 lr: 0.003053 min_lr: 0.003053 loss: 3.1702 (3.2594) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0005 max mem: 55573 Epoch: [181] [ 60/156] eta: 0:01:18 lr: 0.003050 min_lr: 0.003050 loss: 3.2154 (3.2493) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [181] [ 70/156] eta: 0:01:07 lr: 0.003047 min_lr: 0.003047 loss: 3.3312 (3.2508) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [181] [ 80/156] eta: 0:00:57 lr: 0.003044 min_lr: 0.003044 loss: 3.3312 (3.2487) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [181] [ 90/156] eta: 0:00:48 lr: 0.003042 min_lr: 0.003042 loss: 3.3662 (3.2483) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0004 max mem: 55573 Epoch: [181] [100/156] eta: 0:00:40 lr: 0.003039 min_lr: 0.003039 loss: 3.3909 (3.2391) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [181] [110/156] eta: 0:00:32 lr: 0.003036 min_lr: 0.003036 loss: 3.2581 (3.2302) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [181] [120/156] eta: 0:00:25 lr: 0.003033 min_lr: 0.003033 loss: 3.3226 (3.2322) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [181] [130/156] eta: 0:00:18 lr: 0.003031 min_lr: 0.003031 loss: 3.3870 (3.2419) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0013 max mem: 55573 Epoch: [181] [140/156] eta: 0:00:10 lr: 0.003028 min_lr: 0.003028 loss: 3.3601 (3.2421) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0012 max mem: 55573 Epoch: [181] [150/156] eta: 0:00:04 lr: 0.003025 min_lr: 0.003025 loss: 3.3109 (3.2349) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [181] [155/156] eta: 0:00:00 lr: 0.003024 min_lr: 0.003024 loss: 3.3109 (3.2290) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [181] Total time: 0:01:46 (0.6842 s / it) Averaged stats: lr: 0.003024 min_lr: 0.003024 loss: 3.3109 (3.2564) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9303 (0.9303) acc1: 81.2500 (81.2500) acc5: 95.7031 (95.7031) time: 6.9971 data: 6.7598 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0760 (1.0633) acc1: 77.4740 (76.5120) acc5: 95.1823 (94.0480) time: 1.5460 data: 1.3520 max mem: 55573 Test: Total time: 0:00:07 (1.5872 s / it) * Acc@1 76.872 Acc@5 93.734 loss 1.070 Accuracy of the model on the 50000 test images: 76.9% Max accuracy: 76.87% Test: [0/5] eta: 0:00:35 loss: 1.1945 (1.1945) acc1: 71.0938 (71.0938) acc5: 89.4531 (89.4531) time: 7.0515 data: 6.8150 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2397 (1.2444) acc1: 68.4896 (66.1120) acc5: 89.4531 (86.9120) time: 1.5556 data: 1.3631 max mem: 55573 Test: Total time: 0:00:07 (1.5731 s / it) * Acc@1 66.014 Acc@5 87.524 loss 1.282 Accuracy of the model EMA on 50000 test images: 66.0% Max EMA accuracy: 66.01% Epoch: [182] [ 0/156] eta: 0:32:18 lr: 0.003023 min_lr: 0.003023 loss: 3.0108 (3.0108) weight_decay: 0.0500 (0.0500) time: 12.4232 data: 8.1473 max mem: 55573 Epoch: [182] [ 10/156] eta: 0:04:11 lr: 0.003020 min_lr: 0.003020 loss: 3.4079 (3.2103) weight_decay: 0.0500 (0.0500) time: 1.7235 data: 0.7411 max mem: 55573 Epoch: [182] [ 20/156] eta: 0:02:41 lr: 0.003018 min_lr: 0.003018 loss: 3.3198 (3.2118) weight_decay: 0.0500 (0.0500) time: 0.6219 data: 0.0004 max mem: 55573 Epoch: [182] [ 30/156] eta: 0:02:05 lr: 0.003015 min_lr: 0.003015 loss: 3.2976 (3.2019) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [182] [ 40/156] eta: 0:01:43 lr: 0.003012 min_lr: 0.003012 loss: 3.2976 (3.2004) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [182] [ 50/156] eta: 0:01:28 lr: 0.003009 min_lr: 0.003009 loss: 3.2143 (3.1786) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [182] [ 60/156] eta: 0:01:16 lr: 0.003007 min_lr: 0.003007 loss: 3.2522 (3.1861) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [182] [ 70/156] eta: 0:01:05 lr: 0.003004 min_lr: 0.003004 loss: 3.3403 (3.2079) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [182] [ 80/156] eta: 0:00:56 lr: 0.003001 min_lr: 0.003001 loss: 3.4297 (3.2138) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [182] [ 90/156] eta: 0:00:48 lr: 0.002998 min_lr: 0.002998 loss: 3.2822 (3.2052) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [182] [100/156] eta: 0:00:40 lr: 0.002995 min_lr: 0.002995 loss: 3.2822 (3.2084) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [182] [110/156] eta: 0:00:32 lr: 0.002993 min_lr: 0.002993 loss: 3.4121 (3.2113) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [182] [120/156] eta: 0:00:24 lr: 0.002990 min_lr: 0.002990 loss: 3.4462 (3.2314) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [182] [130/156] eta: 0:00:17 lr: 0.002987 min_lr: 0.002987 loss: 3.2780 (3.2272) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0011 max mem: 55573 Epoch: [182] [140/156] eta: 0:00:10 lr: 0.002984 min_lr: 0.002984 loss: 3.2710 (3.2267) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0010 max mem: 55573 Epoch: [182] [150/156] eta: 0:00:04 lr: 0.002981 min_lr: 0.002981 loss: 3.3509 (3.2337) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0002 max mem: 55573 Epoch: [182] [155/156] eta: 0:00:00 lr: 0.002980 min_lr: 0.002980 loss: 3.4053 (3.2448) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [182] Total time: 0:01:45 (0.6761 s / it) Averaged stats: lr: 0.002980 min_lr: 0.002980 loss: 3.4053 (3.2321) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:32 loss: 1.0684 (1.0684) acc1: 79.0365 (79.0365) acc5: 95.9635 (95.9635) time: 6.5946 data: 6.3528 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1802 (1.2200) acc1: 78.5156 (75.6480) acc5: 94.3396 (93.6640) time: 1.4642 data: 1.2706 max mem: 55573 Test: Total time: 0:00:07 (1.4808 s / it) * Acc@1 76.650 Acc@5 93.520 loss 1.218 Accuracy of the model on the 50000 test images: 76.7% Max accuracy: 76.87% Test: [0/5] eta: 0:00:36 loss: 1.1881 (1.1881) acc1: 70.9635 (70.9635) acc5: 89.4531 (89.4531) time: 7.3287 data: 7.0926 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2323 (1.2376) acc1: 68.4896 (66.1440) acc5: 89.4531 (87.0720) time: 1.6114 data: 1.4186 max mem: 55573 Test: Total time: 0:00:08 (1.6285 s / it) * Acc@1 66.164 Acc@5 87.628 loss 1.274 Accuracy of the model EMA on 50000 test images: 66.2% Max EMA accuracy: 66.16% Epoch: [183] [ 0/156] eta: 0:35:33 lr: 0.002980 min_lr: 0.002980 loss: 2.2130 (2.2130) weight_decay: 0.0500 (0.0500) time: 13.6775 data: 13.0242 max mem: 55573 Epoch: [183] [ 10/156] eta: 0:04:19 lr: 0.002977 min_lr: 0.002977 loss: 3.3503 (3.2007) weight_decay: 0.0500 (0.0500) time: 1.7790 data: 1.1844 max mem: 55573 Epoch: [183] [ 20/156] eta: 0:02:45 lr: 0.002974 min_lr: 0.002974 loss: 3.4577 (3.2364) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [183] [ 30/156] eta: 0:02:07 lr: 0.002971 min_lr: 0.002971 loss: 3.4483 (3.2228) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [183] [ 40/156] eta: 0:01:45 lr: 0.002969 min_lr: 0.002969 loss: 3.4067 (3.2531) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [183] [ 50/156] eta: 0:01:29 lr: 0.002966 min_lr: 0.002966 loss: 3.3724 (3.2576) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [183] [ 60/156] eta: 0:01:17 lr: 0.002963 min_lr: 0.002963 loss: 3.2861 (3.2539) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [183] [ 70/156] eta: 0:01:06 lr: 0.002960 min_lr: 0.002960 loss: 3.3110 (3.2809) weight_decay: 0.0500 (0.0500) time: 0.5945 data: 0.0004 max mem: 55573 Epoch: [183] [ 80/156] eta: 0:00:57 lr: 0.002958 min_lr: 0.002958 loss: 3.4234 (3.2807) weight_decay: 0.0500 (0.0500) time: 0.5942 data: 0.0005 max mem: 55573 Epoch: [183] [ 90/156] eta: 0:00:48 lr: 0.002955 min_lr: 0.002955 loss: 3.5089 (3.2881) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [183] [100/156] eta: 0:00:40 lr: 0.002952 min_lr: 0.002952 loss: 3.4098 (3.2751) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [183] [110/156] eta: 0:00:32 lr: 0.002949 min_lr: 0.002949 loss: 3.4098 (3.2882) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [183] [120/156] eta: 0:00:25 lr: 0.002946 min_lr: 0.002946 loss: 3.3333 (3.2783) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [183] [130/156] eta: 0:00:17 lr: 0.002944 min_lr: 0.002944 loss: 3.1872 (3.2764) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0010 max mem: 55573 Epoch: [183] [140/156] eta: 0:00:10 lr: 0.002941 min_lr: 0.002941 loss: 3.2095 (3.2748) weight_decay: 0.0500 (0.0500) time: 0.5847 data: 0.0009 max mem: 55573 Epoch: [183] [150/156] eta: 0:00:04 lr: 0.002938 min_lr: 0.002938 loss: 3.3796 (3.2853) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [183] [155/156] eta: 0:00:00 lr: 0.002937 min_lr: 0.002937 loss: 3.5447 (3.2882) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [183] Total time: 0:01:46 (0.6802 s / it) Averaged stats: lr: 0.002937 min_lr: 0.002937 loss: 3.5447 (3.2490) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0098 (1.0098) acc1: 79.5573 (79.5573) acc5: 95.8333 (95.8333) time: 6.9975 data: 6.7603 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1053 (1.1461) acc1: 78.2552 (75.4240) acc5: 94.3396 (93.4400) time: 1.5459 data: 1.3521 max mem: 55573 Test: Total time: 0:00:07 (1.5891 s / it) * Acc@1 76.308 Acc@5 93.374 loss 1.130 Accuracy of the model on the 50000 test images: 76.3% Max accuracy: 76.87% Test: [0/5] eta: 0:00:35 loss: 1.1814 (1.1814) acc1: 70.9635 (70.9635) acc5: 89.4531 (89.4531) time: 7.1935 data: 6.9575 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2248 (1.2305) acc1: 68.6198 (66.2080) acc5: 89.4531 (87.1680) time: 1.5839 data: 1.3916 max mem: 55573 Test: Total time: 0:00:08 (1.6404 s / it) * Acc@1 66.368 Acc@5 87.740 loss 1.267 Accuracy of the model EMA on 50000 test images: 66.4% Max EMA accuracy: 66.37% Epoch: [184] [ 0/156] eta: 0:33:16 lr: 0.002936 min_lr: 0.002936 loss: 3.8038 (3.8038) weight_decay: 0.0500 (0.0500) time: 12.7990 data: 11.5828 max mem: 55573 Epoch: [184] [ 10/156] eta: 0:04:11 lr: 0.002934 min_lr: 0.002934 loss: 3.4077 (3.1375) weight_decay: 0.0500 (0.0500) time: 1.7193 data: 1.0534 max mem: 55573 Epoch: [184] [ 20/156] eta: 0:02:40 lr: 0.002931 min_lr: 0.002931 loss: 3.4077 (3.2460) weight_decay: 0.0500 (0.0500) time: 0.6012 data: 0.0004 max mem: 55573 Epoch: [184] [ 30/156] eta: 0:02:04 lr: 0.002928 min_lr: 0.002928 loss: 3.6354 (3.3181) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [184] [ 40/156] eta: 0:01:43 lr: 0.002925 min_lr: 0.002925 loss: 3.4021 (3.2635) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [184] [ 50/156] eta: 0:01:28 lr: 0.002923 min_lr: 0.002923 loss: 3.4021 (3.2677) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [184] [ 60/156] eta: 0:01:16 lr: 0.002920 min_lr: 0.002920 loss: 3.2744 (3.2238) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [184] [ 70/156] eta: 0:01:05 lr: 0.002917 min_lr: 0.002917 loss: 3.2348 (3.1896) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [184] [ 80/156] eta: 0:00:56 lr: 0.002914 min_lr: 0.002914 loss: 3.2022 (3.1858) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [184] [ 90/156] eta: 0:00:47 lr: 0.002912 min_lr: 0.002912 loss: 3.3696 (3.2028) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [184] [100/156] eta: 0:00:39 lr: 0.002909 min_lr: 0.002909 loss: 3.4283 (3.2156) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [184] [110/156] eta: 0:00:32 lr: 0.002906 min_lr: 0.002906 loss: 3.3826 (3.2118) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [184] [120/156] eta: 0:00:24 lr: 0.002903 min_lr: 0.002903 loss: 3.3474 (3.2079) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [184] [130/156] eta: 0:00:17 lr: 0.002900 min_lr: 0.002900 loss: 3.3527 (3.2122) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0011 max mem: 55573 Epoch: [184] [140/156] eta: 0:00:10 lr: 0.002898 min_lr: 0.002898 loss: 3.3512 (3.2037) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0009 max mem: 55573 Epoch: [184] [150/156] eta: 0:00:04 lr: 0.002895 min_lr: 0.002895 loss: 3.3180 (3.2163) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [184] [155/156] eta: 0:00:00 lr: 0.002894 min_lr: 0.002894 loss: 3.2268 (3.2198) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [184] Total time: 0:01:45 (0.6753 s / it) Averaged stats: lr: 0.002894 min_lr: 0.002894 loss: 3.2268 (3.2357) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9881 (0.9881) acc1: 80.7292 (80.7292) acc5: 95.7031 (95.7031) time: 6.9944 data: 6.7570 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0513 (1.1286) acc1: 78.9062 (76.8000) acc5: 94.3396 (93.7280) time: 1.5451 data: 1.3515 max mem: 55573 Test: Total time: 0:00:07 (1.5890 s / it) * Acc@1 76.480 Acc@5 93.368 loss 1.119 Accuracy of the model on the 50000 test images: 76.5% Max accuracy: 76.87% Test: [0/5] eta: 0:00:33 loss: 1.1752 (1.1752) acc1: 71.0938 (71.0938) acc5: 89.4531 (89.4531) time: 6.7697 data: 6.5337 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2178 (1.2239) acc1: 68.6198 (66.3360) acc5: 89.4531 (87.2640) time: 1.5557 data: 1.3634 max mem: 55573 Test: Total time: 0:00:07 (1.5769 s / it) * Acc@1 66.534 Acc@5 87.846 loss 1.259 Accuracy of the model EMA on 50000 test images: 66.5% Max EMA accuracy: 66.53% Epoch: [185] [ 0/156] eta: 0:31:09 lr: 0.002893 min_lr: 0.002893 loss: 3.5611 (3.5611) weight_decay: 0.0500 (0.0500) time: 11.9842 data: 9.9705 max mem: 55573 Epoch: [185] [ 10/156] eta: 0:03:57 lr: 0.002891 min_lr: 0.002891 loss: 3.5167 (3.4833) weight_decay: 0.0500 (0.0500) time: 1.6289 data: 0.9069 max mem: 55573 Epoch: [185] [ 20/156] eta: 0:02:34 lr: 0.002888 min_lr: 0.002888 loss: 3.4463 (3.3564) weight_decay: 0.0500 (0.0500) time: 0.5944 data: 0.0005 max mem: 55573 Epoch: [185] [ 30/156] eta: 0:02:01 lr: 0.002885 min_lr: 0.002885 loss: 3.1794 (3.2729) weight_decay: 0.0500 (0.0500) time: 0.5941 data: 0.0005 max mem: 55573 Epoch: [185] [ 40/156] eta: 0:01:41 lr: 0.002882 min_lr: 0.002882 loss: 3.1548 (3.2093) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [185] [ 50/156] eta: 0:01:26 lr: 0.002879 min_lr: 0.002879 loss: 3.1190 (3.1750) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [185] [ 60/156] eta: 0:01:14 lr: 0.002877 min_lr: 0.002877 loss: 3.3157 (3.1945) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [185] [ 70/156] eta: 0:01:04 lr: 0.002874 min_lr: 0.002874 loss: 3.3157 (3.1680) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [185] [ 80/156] eta: 0:00:55 lr: 0.002871 min_lr: 0.002871 loss: 2.9700 (3.1594) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [185] [ 90/156] eta: 0:00:47 lr: 0.002868 min_lr: 0.002868 loss: 3.0236 (3.1349) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [185] [100/156] eta: 0:00:39 lr: 0.002866 min_lr: 0.002866 loss: 3.1305 (3.1537) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [185] [110/156] eta: 0:00:31 lr: 0.002863 min_lr: 0.002863 loss: 3.4949 (3.1680) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [185] [120/156] eta: 0:00:24 lr: 0.002860 min_lr: 0.002860 loss: 3.3425 (3.1795) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [185] [130/156] eta: 0:00:17 lr: 0.002857 min_lr: 0.002857 loss: 3.3425 (3.1844) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0008 max mem: 55573 Epoch: [185] [140/156] eta: 0:00:10 lr: 0.002855 min_lr: 0.002855 loss: 3.3929 (3.1810) weight_decay: 0.0500 (0.0500) time: 0.5871 data: 0.0007 max mem: 55573 Epoch: [185] [150/156] eta: 0:00:03 lr: 0.002852 min_lr: 0.002852 loss: 3.3929 (3.1804) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [185] [155/156] eta: 0:00:00 lr: 0.002851 min_lr: 0.002851 loss: 3.3365 (3.1797) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [185] Total time: 0:01:44 (0.6701 s / it) Averaged stats: lr: 0.002851 min_lr: 0.002851 loss: 3.3365 (3.2328) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.9899 (0.9899) acc1: 80.9896 (80.9896) acc5: 95.9635 (95.9635) time: 7.0366 data: 6.7992 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1795 (1.1571) acc1: 77.3438 (76.0640) acc5: 95.0521 (93.6640) time: 1.5535 data: 1.3599 max mem: 55573 Test: Total time: 0:00:08 (1.6050 s / it) * Acc@1 76.920 Acc@5 93.532 loss 1.140 Accuracy of the model on the 50000 test images: 76.9% Max accuracy: 76.92% Test: [0/5] eta: 0:00:34 loss: 1.1688 (1.1688) acc1: 71.0938 (71.0938) acc5: 89.4531 (89.4531) time: 6.8455 data: 6.6096 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2106 (1.2172) acc1: 68.8802 (66.4960) acc5: 89.4531 (87.3280) time: 1.5144 data: 1.3220 max mem: 55573 Test: Total time: 0:00:07 (1.5296 s / it) * Acc@1 66.658 Acc@5 87.962 loss 1.252 Accuracy of the model EMA on 50000 test images: 66.7% Max EMA accuracy: 66.66% Epoch: [186] [ 0/156] eta: 0:35:53 lr: 0.002850 min_lr: 0.002850 loss: 3.5596 (3.5596) weight_decay: 0.0500 (0.0500) time: 13.8052 data: 13.2242 max mem: 55573 Epoch: [186] [ 10/156] eta: 0:04:21 lr: 0.002847 min_lr: 0.002847 loss: 3.4514 (3.2920) weight_decay: 0.0500 (0.0500) time: 1.7893 data: 1.2025 max mem: 55573 Epoch: [186] [ 20/156] eta: 0:02:45 lr: 0.002845 min_lr: 0.002845 loss: 3.3248 (3.2776) weight_decay: 0.0500 (0.0500) time: 0.5882 data: 0.0004 max mem: 55573 Epoch: [186] [ 30/156] eta: 0:02:07 lr: 0.002842 min_lr: 0.002842 loss: 3.3028 (3.2486) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [186] [ 40/156] eta: 0:01:45 lr: 0.002839 min_lr: 0.002839 loss: 3.3076 (3.2460) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [186] [ 50/156] eta: 0:01:29 lr: 0.002836 min_lr: 0.002836 loss: 3.3202 (3.2676) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [186] [ 60/156] eta: 0:01:17 lr: 0.002834 min_lr: 0.002834 loss: 3.3136 (3.2363) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [186] [ 70/156] eta: 0:01:06 lr: 0.002831 min_lr: 0.002831 loss: 3.3415 (3.2634) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [186] [ 80/156] eta: 0:00:57 lr: 0.002828 min_lr: 0.002828 loss: 3.3858 (3.2579) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [186] [ 90/156] eta: 0:00:48 lr: 0.002825 min_lr: 0.002825 loss: 3.2667 (3.2434) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [186] [100/156] eta: 0:00:40 lr: 0.002823 min_lr: 0.002823 loss: 3.3298 (3.2401) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [186] [110/156] eta: 0:00:32 lr: 0.002820 min_lr: 0.002820 loss: 3.2344 (3.2295) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [186] [120/156] eta: 0:00:25 lr: 0.002817 min_lr: 0.002817 loss: 3.2458 (3.2254) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [186] [130/156] eta: 0:00:17 lr: 0.002814 min_lr: 0.002814 loss: 3.3755 (3.2402) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0010 max mem: 55573 Epoch: [186] [140/156] eta: 0:00:10 lr: 0.002812 min_lr: 0.002812 loss: 3.5053 (3.2421) weight_decay: 0.0500 (0.0500) time: 0.5868 data: 0.0009 max mem: 55573 Epoch: [186] [150/156] eta: 0:00:04 lr: 0.002809 min_lr: 0.002809 loss: 3.3024 (3.2417) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [186] [155/156] eta: 0:00:00 lr: 0.002808 min_lr: 0.002808 loss: 3.3024 (3.2468) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [186] Total time: 0:01:46 (0.6803 s / it) Averaged stats: lr: 0.002808 min_lr: 0.002808 loss: 3.3024 (3.2308) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.9392 (0.9392) acc1: 81.7708 (81.7708) acc5: 96.3542 (96.3542) time: 6.7609 data: 6.5236 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0668 (1.0585) acc1: 77.3438 (76.6720) acc5: 95.0521 (93.9840) time: 1.4983 data: 1.3048 max mem: 55573 Test: Total time: 0:00:07 (1.5444 s / it) * Acc@1 76.788 Acc@5 93.778 loss 1.054 Accuracy of the model on the 50000 test images: 76.8% Max accuracy: 76.92% Test: [0/5] eta: 0:00:36 loss: 1.1630 (1.1630) acc1: 71.2240 (71.2240) acc5: 89.7135 (89.7135) time: 7.3076 data: 7.0715 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.2039 (1.2110) acc1: 69.1406 (66.6880) acc5: 89.7135 (87.4560) time: 1.6069 data: 1.4144 max mem: 55573 Test: Total time: 0:00:08 (1.6247 s / it) * Acc@1 66.828 Acc@5 88.064 loss 1.245 Accuracy of the model EMA on 50000 test images: 66.8% Max EMA accuracy: 66.83% Epoch: [187] [ 0/156] eta: 0:36:28 lr: 0.002807 min_lr: 0.002807 loss: 2.0667 (2.0667) weight_decay: 0.0500 (0.0500) time: 14.0274 data: 9.1959 max mem: 55573 Epoch: [187] [ 10/156] eta: 0:04:24 lr: 0.002805 min_lr: 0.002805 loss: 3.0681 (3.0275) weight_decay: 0.0500 (0.0500) time: 1.8141 data: 0.8363 max mem: 55573 Epoch: [187] [ 20/156] eta: 0:02:47 lr: 0.002802 min_lr: 0.002802 loss: 3.0681 (3.0709) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0003 max mem: 55573 Epoch: [187] [ 30/156] eta: 0:02:09 lr: 0.002799 min_lr: 0.002799 loss: 3.3357 (3.0963) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [187] [ 40/156] eta: 0:01:46 lr: 0.002796 min_lr: 0.002796 loss: 3.3704 (3.1222) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [187] [ 50/156] eta: 0:01:30 lr: 0.002794 min_lr: 0.002794 loss: 3.3704 (3.1512) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [187] [ 60/156] eta: 0:01:17 lr: 0.002791 min_lr: 0.002791 loss: 3.3523 (3.1463) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0004 max mem: 55573 Epoch: [187] [ 70/156] eta: 0:01:07 lr: 0.002788 min_lr: 0.002788 loss: 3.3704 (3.1536) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [187] [ 80/156] eta: 0:00:57 lr: 0.002785 min_lr: 0.002785 loss: 3.4753 (3.1657) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [187] [ 90/156] eta: 0:00:48 lr: 0.002783 min_lr: 0.002783 loss: 3.3368 (3.1652) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [187] [100/156] eta: 0:00:40 lr: 0.002780 min_lr: 0.002780 loss: 3.2172 (3.1714) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [187] [110/156] eta: 0:00:32 lr: 0.002777 min_lr: 0.002777 loss: 3.3266 (3.2033) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [187] [120/156] eta: 0:00:25 lr: 0.002774 min_lr: 0.002774 loss: 3.5355 (3.2293) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [187] [130/156] eta: 0:00:18 lr: 0.002772 min_lr: 0.002772 loss: 3.4696 (3.2269) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0010 max mem: 55573 Epoch: [187] [140/156] eta: 0:00:10 lr: 0.002769 min_lr: 0.002769 loss: 3.2161 (3.2305) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0009 max mem: 55573 Epoch: [187] [150/156] eta: 0:00:04 lr: 0.002766 min_lr: 0.002766 loss: 3.2161 (3.2280) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [187] [155/156] eta: 0:00:00 lr: 0.002765 min_lr: 0.002765 loss: 3.2098 (3.2340) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [187] Total time: 0:01:46 (0.6824 s / it) Averaged stats: lr: 0.002765 min_lr: 0.002765 loss: 3.2098 (3.2215) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0909 (1.0909) acc1: 78.3854 (78.3854) acc5: 95.5729 (95.5729) time: 6.9602 data: 6.7229 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1016 (1.1206) acc1: 78.3854 (75.7120) acc5: 94.9219 (93.4080) time: 1.5385 data: 1.3446 max mem: 55573 Test: Total time: 0:00:07 (1.5804 s / it) * Acc@1 77.026 Acc@5 93.730 loss 1.085 Accuracy of the model on the 50000 test images: 77.0% Max accuracy: 77.03% Test: [0/5] eta: 0:00:35 loss: 1.1574 (1.1574) acc1: 71.4844 (71.4844) acc5: 89.7135 (89.7135) time: 7.0831 data: 6.8470 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1974 (1.2053) acc1: 69.4010 (66.9440) acc5: 89.7135 (87.4880) time: 1.5620 data: 1.3695 max mem: 55573 Test: Total time: 0:00:07 (1.5772 s / it) * Acc@1 66.986 Acc@5 88.160 loss 1.239 Accuracy of the model EMA on 50000 test images: 67.0% Max EMA accuracy: 66.99% Epoch: [188] [ 0/156] eta: 0:34:18 lr: 0.002765 min_lr: 0.002765 loss: 3.5915 (3.5915) weight_decay: 0.0500 (0.0500) time: 13.1977 data: 8.0941 max mem: 55573 Epoch: [188] [ 10/156] eta: 0:04:14 lr: 0.002762 min_lr: 0.002762 loss: 3.5683 (3.4056) weight_decay: 0.0500 (0.0500) time: 1.7426 data: 0.7427 max mem: 55573 Epoch: [188] [ 20/156] eta: 0:02:42 lr: 0.002759 min_lr: 0.002759 loss: 3.4918 (3.3817) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0040 max mem: 55573 Epoch: [188] [ 30/156] eta: 0:02:05 lr: 0.002756 min_lr: 0.002756 loss: 3.4228 (3.2887) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [188] [ 40/156] eta: 0:01:44 lr: 0.002754 min_lr: 0.002754 loss: 3.1838 (3.2655) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [188] [ 50/156] eta: 0:01:28 lr: 0.002751 min_lr: 0.002751 loss: 3.2439 (3.2638) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [188] [ 60/156] eta: 0:01:16 lr: 0.002748 min_lr: 0.002748 loss: 3.2607 (3.2262) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [188] [ 70/156] eta: 0:01:06 lr: 0.002745 min_lr: 0.002745 loss: 3.3995 (3.2509) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [188] [ 80/156] eta: 0:00:56 lr: 0.002743 min_lr: 0.002743 loss: 3.3107 (3.2302) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [188] [ 90/156] eta: 0:00:48 lr: 0.002740 min_lr: 0.002740 loss: 3.1691 (3.2281) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [188] [100/156] eta: 0:00:40 lr: 0.002737 min_lr: 0.002737 loss: 3.4045 (3.2285) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [188] [110/156] eta: 0:00:32 lr: 0.002735 min_lr: 0.002735 loss: 3.4793 (3.2268) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [188] [120/156] eta: 0:00:25 lr: 0.002732 min_lr: 0.002732 loss: 3.0874 (3.2025) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [188] [130/156] eta: 0:00:17 lr: 0.002729 min_lr: 0.002729 loss: 3.0874 (3.2060) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0010 max mem: 55573 Epoch: [188] [140/156] eta: 0:00:10 lr: 0.002726 min_lr: 0.002726 loss: 3.4273 (3.2095) weight_decay: 0.0500 (0.0500) time: 0.5867 data: 0.0009 max mem: 55573 Epoch: [188] [150/156] eta: 0:00:04 lr: 0.002724 min_lr: 0.002724 loss: 3.4721 (3.2221) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [188] [155/156] eta: 0:00:00 lr: 0.002722 min_lr: 0.002722 loss: 3.4721 (3.2259) weight_decay: 0.0500 (0.0500) time: 0.5833 data: 0.0001 max mem: 55573 Epoch: [188] Total time: 0:01:45 (0.6770 s / it) Averaged stats: lr: 0.002722 min_lr: 0.002722 loss: 3.4721 (3.2076) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9819 (0.9819) acc1: 81.6406 (81.6406) acc5: 95.8333 (95.8333) time: 6.8679 data: 6.6304 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1305 (1.1283) acc1: 78.5156 (77.2160) acc5: 95.4427 (93.8880) time: 1.5201 data: 1.3262 max mem: 55573 Test: Total time: 0:00:07 (1.5664 s / it) * Acc@1 76.980 Acc@5 93.738 loss 1.119 Accuracy of the model on the 50000 test images: 77.0% Max accuracy: 77.03% Test: [0/5] eta: 0:00:36 loss: 1.1518 (1.1518) acc1: 71.7448 (71.7448) acc5: 89.7135 (89.7135) time: 7.2111 data: 6.9749 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1911 (1.1996) acc1: 69.2708 (67.0400) acc5: 89.7135 (87.6160) time: 1.5881 data: 1.3953 max mem: 55573 Test: Total time: 0:00:08 (1.6109 s / it) * Acc@1 67.126 Acc@5 88.226 loss 1.232 Accuracy of the model EMA on 50000 test images: 67.1% Max EMA accuracy: 67.13% Epoch: [189] [ 0/156] eta: 0:31:30 lr: 0.002722 min_lr: 0.002722 loss: 1.9360 (1.9360) weight_decay: 0.0500 (0.0500) time: 12.1157 data: 11.5220 max mem: 55573 Epoch: [189] [ 10/156] eta: 0:04:16 lr: 0.002719 min_lr: 0.002719 loss: 3.5613 (3.2376) weight_decay: 0.0500 (0.0500) time: 1.7538 data: 1.0479 max mem: 55573 Epoch: [189] [ 20/156] eta: 0:02:43 lr: 0.002717 min_lr: 0.002717 loss: 3.4457 (3.2704) weight_decay: 0.0500 (0.0500) time: 0.6544 data: 0.0005 max mem: 55573 Epoch: [189] [ 30/156] eta: 0:02:06 lr: 0.002714 min_lr: 0.002714 loss: 3.3625 (3.3048) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [189] [ 40/156] eta: 0:01:44 lr: 0.002711 min_lr: 0.002711 loss: 3.1551 (3.2103) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [189] [ 50/156] eta: 0:01:29 lr: 0.002708 min_lr: 0.002708 loss: 2.8943 (3.1838) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [189] [ 60/156] eta: 0:01:16 lr: 0.002706 min_lr: 0.002706 loss: 3.2175 (3.2106) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [189] [ 70/156] eta: 0:01:06 lr: 0.002703 min_lr: 0.002703 loss: 3.3108 (3.1875) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [189] [ 80/156] eta: 0:00:56 lr: 0.002700 min_lr: 0.002700 loss: 3.1881 (3.2044) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0005 max mem: 55573 Epoch: [189] [ 90/156] eta: 0:00:48 lr: 0.002697 min_lr: 0.002697 loss: 3.3450 (3.2173) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [189] [100/156] eta: 0:00:40 lr: 0.002695 min_lr: 0.002695 loss: 3.3450 (3.2016) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [189] [110/156] eta: 0:00:32 lr: 0.002692 min_lr: 0.002692 loss: 3.1717 (3.1954) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [189] [120/156] eta: 0:00:25 lr: 0.002689 min_lr: 0.002689 loss: 3.1299 (3.1845) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [189] [130/156] eta: 0:00:17 lr: 0.002687 min_lr: 0.002687 loss: 3.0536 (3.1764) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0011 max mem: 55573 Epoch: [189] [140/156] eta: 0:00:10 lr: 0.002684 min_lr: 0.002684 loss: 2.9250 (3.1613) weight_decay: 0.0500 (0.0500) time: 0.5876 data: 0.0009 max mem: 55573 Epoch: [189] [150/156] eta: 0:00:04 lr: 0.002681 min_lr: 0.002681 loss: 3.0987 (3.1745) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [189] [155/156] eta: 0:00:00 lr: 0.002680 min_lr: 0.002680 loss: 3.3348 (3.1828) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0002 max mem: 55573 Epoch: [189] Total time: 0:01:45 (0.6787 s / it) Averaged stats: lr: 0.002680 min_lr: 0.002680 loss: 3.3348 (3.2260) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0570 (1.0570) acc1: 81.5104 (81.5104) acc5: 96.4844 (96.4844) time: 6.8337 data: 6.5957 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1187 (1.1703) acc1: 79.2969 (76.2880) acc5: 92.7083 (93.8880) time: 1.5134 data: 1.3192 max mem: 55573 Test: Total time: 0:00:07 (1.5543 s / it) * Acc@1 77.166 Acc@5 93.948 loss 1.130 Accuracy of the model on the 50000 test images: 77.2% Max accuracy: 77.17% Test: [0/5] eta: 0:00:34 loss: 1.1464 (1.1464) acc1: 71.8750 (71.8750) acc5: 89.8438 (89.8438) time: 6.8328 data: 6.5968 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1851 (1.1941) acc1: 69.7917 (67.2000) acc5: 89.8438 (87.7120) time: 1.5121 data: 1.3195 max mem: 55573 Test: Total time: 0:00:07 (1.5264 s / it) * Acc@1 67.294 Acc@5 88.330 loss 1.226 Accuracy of the model EMA on 50000 test images: 67.3% Max EMA accuracy: 67.29% Epoch: [190] [ 0/156] eta: 0:33:36 lr: 0.002680 min_lr: 0.002680 loss: 3.1399 (3.1399) weight_decay: 0.0500 (0.0500) time: 12.9246 data: 9.3370 max mem: 55573 Epoch: [190] [ 10/156] eta: 0:04:11 lr: 0.002677 min_lr: 0.002677 loss: 3.2426 (3.2261) weight_decay: 0.0500 (0.0500) time: 1.7202 data: 0.8493 max mem: 55573 Epoch: [190] [ 20/156] eta: 0:02:40 lr: 0.002674 min_lr: 0.002674 loss: 3.2092 (3.1566) weight_decay: 0.0500 (0.0500) time: 0.5954 data: 0.0005 max mem: 55573 Epoch: [190] [ 30/156] eta: 0:02:04 lr: 0.002671 min_lr: 0.002671 loss: 3.3151 (3.2010) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [190] [ 40/156] eta: 0:01:43 lr: 0.002669 min_lr: 0.002669 loss: 3.3151 (3.2313) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [190] [ 50/156] eta: 0:01:28 lr: 0.002666 min_lr: 0.002666 loss: 3.3165 (3.2510) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [190] [ 60/156] eta: 0:01:16 lr: 0.002663 min_lr: 0.002663 loss: 3.4989 (3.2658) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [190] [ 70/156] eta: 0:01:06 lr: 0.002661 min_lr: 0.002661 loss: 3.3541 (3.2537) weight_decay: 0.0500 (0.0500) time: 0.5971 data: 0.0004 max mem: 55573 Epoch: [190] [ 80/156] eta: 0:00:56 lr: 0.002658 min_lr: 0.002658 loss: 3.3283 (3.2591) weight_decay: 0.0500 (0.0500) time: 0.5963 data: 0.0004 max mem: 55573 Epoch: [190] [ 90/156] eta: 0:00:48 lr: 0.002655 min_lr: 0.002655 loss: 3.2152 (3.2452) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [190] [100/156] eta: 0:00:40 lr: 0.002652 min_lr: 0.002652 loss: 3.1488 (3.2335) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [190] [110/156] eta: 0:00:32 lr: 0.002650 min_lr: 0.002650 loss: 3.2181 (3.2294) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [190] [120/156] eta: 0:00:24 lr: 0.002647 min_lr: 0.002647 loss: 3.2196 (3.2238) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [190] [130/156] eta: 0:00:17 lr: 0.002644 min_lr: 0.002644 loss: 3.3169 (3.2262) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0010 max mem: 55573 Epoch: [190] [140/156] eta: 0:00:10 lr: 0.002642 min_lr: 0.002642 loss: 3.0724 (3.2010) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0008 max mem: 55573 Epoch: [190] [150/156] eta: 0:00:04 lr: 0.002639 min_lr: 0.002639 loss: 3.0698 (3.2103) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [190] [155/156] eta: 0:00:00 lr: 0.002638 min_lr: 0.002638 loss: 3.1872 (3.2135) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [190] Total time: 0:01:45 (0.6766 s / it) Averaged stats: lr: 0.002638 min_lr: 0.002638 loss: 3.1872 (3.2202) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9742 (0.9742) acc1: 79.0365 (79.0365) acc5: 96.0938 (96.0938) time: 6.9862 data: 6.7488 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0895 (1.0707) acc1: 76.5625 (75.4240) acc5: 94.3396 (93.8560) time: 1.5438 data: 1.3498 max mem: 55573 Test: Total time: 0:00:07 (1.5881 s / it) * Acc@1 76.704 Acc@5 93.926 loss 1.045 Accuracy of the model on the 50000 test images: 76.7% Max accuracy: 77.17% Test: [0/5] eta: 0:00:35 loss: 1.1415 (1.1415) acc1: 71.7448 (71.7448) acc5: 90.2344 (90.2344) time: 7.0130 data: 6.7764 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1796 (1.1891) acc1: 70.0521 (67.3280) acc5: 90.2344 (87.8400) time: 1.5482 data: 1.3554 max mem: 55573 Test: Total time: 0:00:07 (1.5684 s / it) * Acc@1 67.472 Acc@5 88.434 loss 1.221 Accuracy of the model EMA on 50000 test images: 67.5% Max EMA accuracy: 67.47% Epoch: [191] [ 0/156] eta: 0:39:07 lr: 0.002637 min_lr: 0.002637 loss: 3.6013 (3.6013) weight_decay: 0.0500 (0.0500) time: 15.0451 data: 12.1409 max mem: 55573 Epoch: [191] [ 10/156] eta: 0:04:37 lr: 0.002635 min_lr: 0.002635 loss: 3.3266 (3.3452) weight_decay: 0.0500 (0.0500) time: 1.9023 data: 1.1040 max mem: 55573 Epoch: [191] [ 20/156] eta: 0:02:53 lr: 0.002632 min_lr: 0.002632 loss: 3.3266 (3.2858) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [191] [ 30/156] eta: 0:02:13 lr: 0.002629 min_lr: 0.002629 loss: 3.4833 (3.2951) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [191] [ 40/156] eta: 0:01:49 lr: 0.002626 min_lr: 0.002626 loss: 3.4182 (3.2816) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [191] [ 50/156] eta: 0:01:32 lr: 0.002624 min_lr: 0.002624 loss: 3.3461 (3.2842) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0005 max mem: 55573 Epoch: [191] [ 60/156] eta: 0:01:19 lr: 0.002621 min_lr: 0.002621 loss: 3.3426 (3.2708) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [191] [ 70/156] eta: 0:01:08 lr: 0.002618 min_lr: 0.002618 loss: 3.3053 (3.2586) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [191] [ 80/156] eta: 0:00:58 lr: 0.002616 min_lr: 0.002616 loss: 3.2522 (3.2523) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0005 max mem: 55573 Epoch: [191] [ 90/156] eta: 0:00:49 lr: 0.002613 min_lr: 0.002613 loss: 3.0390 (3.2256) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [191] [100/156] eta: 0:00:41 lr: 0.002610 min_lr: 0.002610 loss: 2.9266 (3.2170) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [191] [110/156] eta: 0:00:33 lr: 0.002608 min_lr: 0.002608 loss: 3.3290 (3.2178) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [191] [120/156] eta: 0:00:25 lr: 0.002605 min_lr: 0.002605 loss: 3.3273 (3.2095) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [191] [130/156] eta: 0:00:18 lr: 0.002602 min_lr: 0.002602 loss: 3.1458 (3.2040) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0011 max mem: 55573 Epoch: [191] [140/156] eta: 0:00:11 lr: 0.002599 min_lr: 0.002599 loss: 3.1147 (3.1950) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0009 max mem: 55573 Epoch: [191] [150/156] eta: 0:00:04 lr: 0.002597 min_lr: 0.002597 loss: 3.2805 (3.2008) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [191] [155/156] eta: 0:00:00 lr: 0.002595 min_lr: 0.002595 loss: 3.4516 (3.2145) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [191] Total time: 0:01:47 (0.6896 s / it) Averaged stats: lr: 0.002595 min_lr: 0.002595 loss: 3.4516 (3.2164) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 1.0109 (1.0109) acc1: 79.1667 (79.1667) acc5: 95.7031 (95.7031) time: 6.7377 data: 6.5008 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0695 (1.0803) acc1: 78.5156 (76.2880) acc5: 95.5729 (93.9840) time: 1.4940 data: 1.3002 max mem: 55573 Test: Total time: 0:00:07 (1.5410 s / it) * Acc@1 77.460 Acc@5 94.098 loss 1.058 Accuracy of the model on the 50000 test images: 77.5% Max accuracy: 77.46% Test: [0/5] eta: 0:00:33 loss: 1.1365 (1.1365) acc1: 71.7448 (71.7448) acc5: 90.2344 (90.2344) time: 6.6712 data: 6.4352 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1739 (1.1839) acc1: 70.1823 (67.3600) acc5: 90.2344 (87.8400) time: 1.4798 data: 1.2872 max mem: 55573 Test: Total time: 0:00:07 (1.5160 s / it) * Acc@1 67.598 Acc@5 88.512 loss 1.215 Accuracy of the model EMA on 50000 test images: 67.6% Max EMA accuracy: 67.60% Epoch: [192] [ 0/156] eta: 0:33:56 lr: 0.002595 min_lr: 0.002595 loss: 3.8815 (3.8815) weight_decay: 0.0500 (0.0500) time: 13.0549 data: 12.3962 max mem: 55573 Epoch: [192] [ 10/156] eta: 0:04:12 lr: 0.002592 min_lr: 0.002592 loss: 3.5053 (3.3257) weight_decay: 0.0500 (0.0500) time: 1.7325 data: 1.1273 max mem: 55573 Epoch: [192] [ 20/156] eta: 0:02:41 lr: 0.002590 min_lr: 0.002590 loss: 3.4964 (3.3681) weight_decay: 0.0500 (0.0500) time: 0.5946 data: 0.0004 max mem: 55573 Epoch: [192] [ 30/156] eta: 0:02:05 lr: 0.002587 min_lr: 0.002587 loss: 3.4487 (3.2427) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [192] [ 40/156] eta: 0:01:44 lr: 0.002584 min_lr: 0.002584 loss: 3.3523 (3.2497) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [192] [ 50/156] eta: 0:01:28 lr: 0.002582 min_lr: 0.002582 loss: 3.3523 (3.2641) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0005 max mem: 55573 Epoch: [192] [ 60/156] eta: 0:01:16 lr: 0.002579 min_lr: 0.002579 loss: 3.2216 (3.2604) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [192] [ 70/156] eta: 0:01:06 lr: 0.002576 min_lr: 0.002576 loss: 3.3593 (3.2548) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [192] [ 80/156] eta: 0:00:56 lr: 0.002574 min_lr: 0.002574 loss: 3.2720 (3.2417) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [192] [ 90/156] eta: 0:00:48 lr: 0.002571 min_lr: 0.002571 loss: 3.0868 (3.2237) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [192] [100/156] eta: 0:00:40 lr: 0.002568 min_lr: 0.002568 loss: 3.3129 (3.2274) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0005 max mem: 55573 Epoch: [192] [110/156] eta: 0:00:32 lr: 0.002566 min_lr: 0.002566 loss: 3.3813 (3.2407) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [192] [120/156] eta: 0:00:24 lr: 0.002563 min_lr: 0.002563 loss: 3.3388 (3.2259) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [192] [130/156] eta: 0:00:17 lr: 0.002560 min_lr: 0.002560 loss: 3.0472 (3.2104) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0010 max mem: 55573 Epoch: [192] [140/156] eta: 0:00:10 lr: 0.002558 min_lr: 0.002558 loss: 3.3000 (3.2088) weight_decay: 0.0500 (0.0500) time: 0.5872 data: 0.0009 max mem: 55573 Epoch: [192] [150/156] eta: 0:00:04 lr: 0.002555 min_lr: 0.002555 loss: 3.4346 (3.2284) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [192] [155/156] eta: 0:00:00 lr: 0.002554 min_lr: 0.002554 loss: 3.3399 (3.2217) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [192] Total time: 0:01:45 (0.6764 s / it) Averaged stats: lr: 0.002554 min_lr: 0.002554 loss: 3.3399 (3.2187) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9590 (0.9590) acc1: 80.4688 (80.4688) acc5: 95.5729 (95.5729) time: 6.9760 data: 6.7386 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0488 (1.0613) acc1: 78.5156 (76.4480) acc5: 94.6615 (93.9200) time: 1.5417 data: 1.3478 max mem: 55573 Test: Total time: 0:00:07 (1.5853 s / it) * Acc@1 77.262 Acc@5 93.948 loss 1.052 Accuracy of the model on the 50000 test images: 77.3% Max accuracy: 77.46% Test: [0/5] eta: 0:00:35 loss: 1.1318 (1.1318) acc1: 71.8750 (71.8750) acc5: 90.4948 (90.4948) time: 7.0321 data: 6.7960 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1685 (1.1790) acc1: 70.1823 (67.4880) acc5: 90.4948 (88.1920) time: 1.5518 data: 1.3593 max mem: 55573 Test: Total time: 0:00:07 (1.5724 s / it) * Acc@1 67.768 Acc@5 88.606 loss 1.210 Accuracy of the model EMA on 50000 test images: 67.8% Max EMA accuracy: 67.77% Epoch: [193] [ 0/156] eta: 0:31:22 lr: 0.002553 min_lr: 0.002553 loss: 3.7320 (3.7320) weight_decay: 0.0500 (0.0500) time: 12.0658 data: 7.4114 max mem: 55573 Epoch: [193] [ 10/156] eta: 0:04:19 lr: 0.002551 min_lr: 0.002551 loss: 3.4360 (3.4249) weight_decay: 0.0500 (0.0500) time: 1.7740 data: 0.8031 max mem: 55573 Epoch: [193] [ 20/156] eta: 0:02:44 lr: 0.002548 min_lr: 0.002548 loss: 3.4360 (3.3710) weight_decay: 0.0500 (0.0500) time: 0.6682 data: 0.0713 max mem: 55573 Epoch: [193] [ 30/156] eta: 0:02:07 lr: 0.002545 min_lr: 0.002545 loss: 3.4057 (3.3568) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [193] [ 40/156] eta: 0:01:45 lr: 0.002543 min_lr: 0.002543 loss: 3.3367 (3.3031) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [193] [ 50/156] eta: 0:01:29 lr: 0.002540 min_lr: 0.002540 loss: 3.3447 (3.3000) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [193] [ 60/156] eta: 0:01:17 lr: 0.002537 min_lr: 0.002537 loss: 3.3896 (3.2692) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [193] [ 70/156] eta: 0:01:06 lr: 0.002534 min_lr: 0.002534 loss: 3.3669 (3.2821) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [193] [ 80/156] eta: 0:00:57 lr: 0.002532 min_lr: 0.002532 loss: 3.4105 (3.2809) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [193] [ 90/156] eta: 0:00:48 lr: 0.002529 min_lr: 0.002529 loss: 3.3024 (3.2780) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0005 max mem: 55573 Epoch: [193] [100/156] eta: 0:00:40 lr: 0.002526 min_lr: 0.002526 loss: 3.3058 (3.2895) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0004 max mem: 55573 Epoch: [193] [110/156] eta: 0:00:32 lr: 0.002524 min_lr: 0.002524 loss: 3.4545 (3.2766) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [193] [120/156] eta: 0:00:25 lr: 0.002521 min_lr: 0.002521 loss: 2.9233 (3.2300) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [193] [130/156] eta: 0:00:17 lr: 0.002518 min_lr: 0.002518 loss: 2.9402 (3.2338) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0010 max mem: 55573 Epoch: [193] [140/156] eta: 0:00:10 lr: 0.002516 min_lr: 0.002516 loss: 3.2175 (3.2314) weight_decay: 0.0500 (0.0500) time: 0.5855 data: 0.0009 max mem: 55573 Epoch: [193] [150/156] eta: 0:00:04 lr: 0.002513 min_lr: 0.002513 loss: 3.3746 (3.2458) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [193] [155/156] eta: 0:00:00 lr: 0.002512 min_lr: 0.002512 loss: 3.3966 (3.2483) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [193] Total time: 0:01:46 (0.6799 s / it) Averaged stats: lr: 0.002512 min_lr: 0.002512 loss: 3.3966 (3.2096) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0452 (1.0452) acc1: 80.2083 (80.2083) acc5: 96.0938 (96.0938) time: 6.8145 data: 6.5771 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1297 (1.1634) acc1: 78.5156 (75.9040) acc5: 94.3396 (94.3680) time: 1.5093 data: 1.3155 max mem: 55573 Test: Total time: 0:00:07 (1.5525 s / it) * Acc@1 77.184 Acc@5 93.912 loss 1.141 Accuracy of the model on the 50000 test images: 77.2% Max accuracy: 77.46% Test: [0/5] eta: 0:00:35 loss: 1.1272 (1.1272) acc1: 72.0052 (72.0052) acc5: 90.4948 (90.4948) time: 7.0445 data: 6.8084 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1632 (1.1740) acc1: 70.0521 (67.6160) acc5: 90.4948 (88.2560) time: 1.5542 data: 1.3618 max mem: 55573 Test: Total time: 0:00:07 (1.5748 s / it) * Acc@1 67.868 Acc@5 88.692 loss 1.204 Accuracy of the model EMA on 50000 test images: 67.9% Max EMA accuracy: 67.87% Epoch: [194] [ 0/156] eta: 0:33:01 lr: 0.002512 min_lr: 0.002512 loss: 3.1631 (3.1631) weight_decay: 0.0500 (0.0500) time: 12.7024 data: 11.9698 max mem: 55573 Epoch: [194] [ 10/156] eta: 0:04:12 lr: 0.002509 min_lr: 0.002509 loss: 3.2741 (3.1570) weight_decay: 0.0500 (0.0500) time: 1.7299 data: 1.0885 max mem: 55573 Epoch: [194] [ 20/156] eta: 0:02:41 lr: 0.002506 min_lr: 0.002506 loss: 3.2381 (3.1009) weight_decay: 0.0500 (0.0500) time: 0.6130 data: 0.0004 max mem: 55573 Epoch: [194] [ 30/156] eta: 0:02:05 lr: 0.002504 min_lr: 0.002504 loss: 3.1094 (3.0951) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [194] [ 40/156] eta: 0:01:44 lr: 0.002501 min_lr: 0.002501 loss: 3.3718 (3.1512) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [194] [ 50/156] eta: 0:01:28 lr: 0.002498 min_lr: 0.002498 loss: 3.3873 (3.1628) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [194] [ 60/156] eta: 0:01:16 lr: 0.002496 min_lr: 0.002496 loss: 3.2858 (3.1807) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [194] [ 70/156] eta: 0:01:06 lr: 0.002493 min_lr: 0.002493 loss: 3.2858 (3.1829) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [194] [ 80/156] eta: 0:00:56 lr: 0.002490 min_lr: 0.002490 loss: 3.4258 (3.1924) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [194] [ 90/156] eta: 0:00:48 lr: 0.002488 min_lr: 0.002488 loss: 3.2744 (3.1802) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [194] [100/156] eta: 0:00:40 lr: 0.002485 min_lr: 0.002485 loss: 3.3547 (3.2005) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [194] [110/156] eta: 0:00:32 lr: 0.002482 min_lr: 0.002482 loss: 3.2581 (3.1896) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [194] [120/156] eta: 0:00:25 lr: 0.002480 min_lr: 0.002480 loss: 3.1892 (3.1862) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [194] [130/156] eta: 0:00:17 lr: 0.002477 min_lr: 0.002477 loss: 3.2395 (3.1917) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0011 max mem: 55573 Epoch: [194] [140/156] eta: 0:00:10 lr: 0.002474 min_lr: 0.002474 loss: 3.2573 (3.1784) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0009 max mem: 55573 Epoch: [194] [150/156] eta: 0:00:04 lr: 0.002472 min_lr: 0.002472 loss: 2.8678 (3.1621) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [194] [155/156] eta: 0:00:00 lr: 0.002470 min_lr: 0.002470 loss: 2.9706 (3.1749) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [194] Total time: 0:01:45 (0.6766 s / it) Averaged stats: lr: 0.002470 min_lr: 0.002470 loss: 2.9706 (3.1925) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9751 (0.9751) acc1: 81.3802 (81.3802) acc5: 95.3125 (95.3125) time: 6.8338 data: 6.5962 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1542 (1.1502) acc1: 75.0000 (75.3920) acc5: 94.3396 (93.7280) time: 1.5132 data: 1.3193 max mem: 55573 Test: Total time: 0:00:07 (1.5601 s / it) * Acc@1 77.064 Acc@5 93.736 loss 1.116 Accuracy of the model on the 50000 test images: 77.1% Max accuracy: 77.46% Test: [0/5] eta: 0:00:34 loss: 1.1228 (1.1228) acc1: 71.8750 (71.8750) acc5: 90.7552 (90.7552) time: 6.8604 data: 6.6241 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1583 (1.1695) acc1: 70.1823 (67.6480) acc5: 90.7552 (88.4160) time: 1.5175 data: 1.3249 max mem: 55573 Test: Total time: 0:00:07 (1.5345 s / it) * Acc@1 68.000 Acc@5 88.784 loss 1.199 Accuracy of the model EMA on 50000 test images: 68.0% Max EMA accuracy: 68.00% Epoch: [195] [ 0/156] eta: 0:31:43 lr: 0.002470 min_lr: 0.002470 loss: 3.4368 (3.4368) weight_decay: 0.0500 (0.0500) time: 12.2019 data: 10.9759 max mem: 55573 Epoch: [195] [ 10/156] eta: 0:04:13 lr: 0.002467 min_lr: 0.002467 loss: 3.2946 (3.2547) weight_decay: 0.0500 (0.0500) time: 1.7367 data: 1.0902 max mem: 55573 Epoch: [195] [ 20/156] eta: 0:02:41 lr: 0.002465 min_lr: 0.002465 loss: 3.2979 (3.2534) weight_decay: 0.0500 (0.0500) time: 0.6403 data: 0.0511 max mem: 55573 Epoch: [195] [ 30/156] eta: 0:02:05 lr: 0.002462 min_lr: 0.002462 loss: 3.2979 (3.1595) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [195] [ 40/156] eta: 0:01:44 lr: 0.002459 min_lr: 0.002459 loss: 3.1676 (3.1580) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [195] [ 50/156] eta: 0:01:28 lr: 0.002457 min_lr: 0.002457 loss: 3.3173 (3.1854) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [195] [ 60/156] eta: 0:01:16 lr: 0.002454 min_lr: 0.002454 loss: 3.4038 (3.2061) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [195] [ 70/156] eta: 0:01:06 lr: 0.002451 min_lr: 0.002451 loss: 3.4667 (3.2372) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [195] [ 80/156] eta: 0:00:56 lr: 0.002449 min_lr: 0.002449 loss: 3.3488 (3.2289) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [195] [ 90/156] eta: 0:00:48 lr: 0.002446 min_lr: 0.002446 loss: 3.0412 (3.2064) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [195] [100/156] eta: 0:00:40 lr: 0.002443 min_lr: 0.002443 loss: 2.8850 (3.1887) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [195] [110/156] eta: 0:00:32 lr: 0.002441 min_lr: 0.002441 loss: 2.8376 (3.1585) weight_decay: 0.0500 (0.0500) time: 0.5946 data: 0.0005 max mem: 55573 Epoch: [195] [120/156] eta: 0:00:25 lr: 0.002438 min_lr: 0.002438 loss: 2.8376 (3.1663) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [195] [130/156] eta: 0:00:17 lr: 0.002435 min_lr: 0.002435 loss: 3.3049 (3.1690) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0011 max mem: 55573 Epoch: [195] [140/156] eta: 0:00:10 lr: 0.002433 min_lr: 0.002433 loss: 3.0706 (3.1571) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0009 max mem: 55573 Epoch: [195] [150/156] eta: 0:00:04 lr: 0.002430 min_lr: 0.002430 loss: 3.0581 (3.1466) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [195] [155/156] eta: 0:00:00 lr: 0.002429 min_lr: 0.002429 loss: 3.0182 (3.1430) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [195] Total time: 0:01:45 (0.6778 s / it) Averaged stats: lr: 0.002429 min_lr: 0.002429 loss: 3.0182 (3.1971) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0491 (1.0491) acc1: 80.3385 (80.3385) acc5: 95.4427 (95.4427) time: 6.8767 data: 6.6393 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0591 (1.0708) acc1: 78.9062 (77.1840) acc5: 95.4427 (94.2400) time: 1.5218 data: 1.3279 max mem: 55573 Test: Total time: 0:00:07 (1.5663 s / it) * Acc@1 77.902 Acc@5 94.064 loss 1.069 Accuracy of the model on the 50000 test images: 77.9% Max accuracy: 77.90% Test: [0/5] eta: 0:00:33 loss: 1.1186 (1.1186) acc1: 71.8750 (71.8750) acc5: 90.7552 (90.7552) time: 6.7053 data: 6.4693 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1535 (1.1648) acc1: 70.1823 (67.7120) acc5: 90.7552 (88.5440) time: 1.4864 data: 1.2940 max mem: 55573 Test: Total time: 0:00:07 (1.5025 s / it) * Acc@1 68.124 Acc@5 88.874 loss 1.194 Accuracy of the model EMA on 50000 test images: 68.1% Max EMA accuracy: 68.12% Epoch: [196] [ 0/156] eta: 0:34:18 lr: 0.002429 min_lr: 0.002429 loss: 3.4004 (3.4004) weight_decay: 0.0500 (0.0500) time: 13.1935 data: 7.8697 max mem: 55573 Epoch: [196] [ 10/156] eta: 0:04:14 lr: 0.002426 min_lr: 0.002426 loss: 3.2792 (3.1829) weight_decay: 0.0500 (0.0500) time: 1.7418 data: 0.7164 max mem: 55573 Epoch: [196] [ 20/156] eta: 0:02:42 lr: 0.002423 min_lr: 0.002423 loss: 3.2792 (3.2306) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0008 max mem: 55573 Epoch: [196] [ 30/156] eta: 0:02:05 lr: 0.002421 min_lr: 0.002421 loss: 3.3236 (3.2092) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [196] [ 40/156] eta: 0:01:44 lr: 0.002418 min_lr: 0.002418 loss: 3.3154 (3.2165) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [196] [ 50/156] eta: 0:01:28 lr: 0.002415 min_lr: 0.002415 loss: 3.3744 (3.2319) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [196] [ 60/156] eta: 0:01:16 lr: 0.002413 min_lr: 0.002413 loss: 3.2714 (3.2016) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [196] [ 70/156] eta: 0:01:06 lr: 0.002410 min_lr: 0.002410 loss: 3.1755 (3.1955) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [196] [ 80/156] eta: 0:00:56 lr: 0.002407 min_lr: 0.002407 loss: 3.4343 (3.2301) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [196] [ 90/156] eta: 0:00:48 lr: 0.002405 min_lr: 0.002405 loss: 3.4393 (3.2346) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [196] [100/156] eta: 0:00:40 lr: 0.002402 min_lr: 0.002402 loss: 3.3632 (3.2146) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [196] [110/156] eta: 0:00:32 lr: 0.002400 min_lr: 0.002400 loss: 2.7672 (3.1717) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [196] [120/156] eta: 0:00:25 lr: 0.002397 min_lr: 0.002397 loss: 2.8517 (3.1718) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [196] [130/156] eta: 0:00:17 lr: 0.002394 min_lr: 0.002394 loss: 3.3091 (3.1677) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0024 max mem: 55573 Epoch: [196] [140/156] eta: 0:00:10 lr: 0.002392 min_lr: 0.002392 loss: 3.3511 (3.1792) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0022 max mem: 55573 Epoch: [196] [150/156] eta: 0:00:04 lr: 0.002389 min_lr: 0.002389 loss: 3.3389 (3.1857) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [196] [155/156] eta: 0:00:00 lr: 0.002388 min_lr: 0.002388 loss: 3.2413 (3.1877) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [196] Total time: 0:01:45 (0.6772 s / it) Averaged stats: lr: 0.002388 min_lr: 0.002388 loss: 3.2413 (3.1814) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0246 (1.0246) acc1: 79.9479 (79.9479) acc5: 96.4844 (96.4844) time: 7.0283 data: 6.7908 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1213 (1.0939) acc1: 78.5156 (76.4160) acc5: 95.3125 (94.2400) time: 1.5522 data: 1.3583 max mem: 55573 Test: Total time: 0:00:07 (1.5969 s / it) * Acc@1 77.768 Acc@5 94.232 loss 1.083 Accuracy of the model on the 50000 test images: 77.8% Max accuracy: 77.90% Test: [0/5] eta: 0:00:36 loss: 1.1147 (1.1147) acc1: 72.0052 (72.0052) acc5: 90.7552 (90.7552) time: 7.3874 data: 7.1514 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1491 (1.1606) acc1: 70.0521 (67.6800) acc5: 90.7552 (88.5120) time: 1.6228 data: 1.4304 max mem: 55573 Test: Total time: 0:00:08 (1.6435 s / it) * Acc@1 68.252 Acc@5 88.930 loss 1.189 Accuracy of the model EMA on 50000 test images: 68.3% Max EMA accuracy: 68.25% Epoch: [197] [ 0/156] eta: 0:33:04 lr: 0.002387 min_lr: 0.002387 loss: 2.9719 (2.9719) weight_decay: 0.0500 (0.0500) time: 12.7199 data: 11.2353 max mem: 55573 Epoch: [197] [ 10/156] eta: 0:04:09 lr: 0.002385 min_lr: 0.002385 loss: 3.2471 (3.1098) weight_decay: 0.0500 (0.0500) time: 1.7113 data: 1.0217 max mem: 55573 Epoch: [197] [ 20/156] eta: 0:02:40 lr: 0.002382 min_lr: 0.002382 loss: 3.2734 (3.1847) weight_decay: 0.0500 (0.0500) time: 0.5998 data: 0.0004 max mem: 55573 Epoch: [197] [ 30/156] eta: 0:02:04 lr: 0.002380 min_lr: 0.002380 loss: 3.4286 (3.2111) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0004 max mem: 55573 Epoch: [197] [ 40/156] eta: 0:01:43 lr: 0.002377 min_lr: 0.002377 loss: 3.4139 (3.2442) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [197] [ 50/156] eta: 0:01:28 lr: 0.002374 min_lr: 0.002374 loss: 3.4073 (3.2395) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [197] [ 60/156] eta: 0:01:16 lr: 0.002372 min_lr: 0.002372 loss: 3.2567 (3.2557) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [197] [ 70/156] eta: 0:01:05 lr: 0.002369 min_lr: 0.002369 loss: 3.3944 (3.2669) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [197] [ 80/156] eta: 0:00:56 lr: 0.002366 min_lr: 0.002366 loss: 3.4803 (3.2894) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [197] [ 90/156] eta: 0:00:47 lr: 0.002364 min_lr: 0.002364 loss: 3.4791 (3.3069) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [197] [100/156] eta: 0:00:39 lr: 0.002361 min_lr: 0.002361 loss: 3.4712 (3.3115) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [197] [110/156] eta: 0:00:32 lr: 0.002359 min_lr: 0.002359 loss: 3.4712 (3.3022) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [197] [120/156] eta: 0:00:24 lr: 0.002356 min_lr: 0.002356 loss: 3.3071 (3.3000) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [197] [130/156] eta: 0:00:17 lr: 0.002353 min_lr: 0.002353 loss: 3.3538 (3.3043) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0010 max mem: 55573 Epoch: [197] [140/156] eta: 0:00:10 lr: 0.002351 min_lr: 0.002351 loss: 3.4081 (3.3140) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0009 max mem: 55573 Epoch: [197] [150/156] eta: 0:00:04 lr: 0.002348 min_lr: 0.002348 loss: 3.3769 (3.3015) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [197] [155/156] eta: 0:00:00 lr: 0.002347 min_lr: 0.002347 loss: 3.3586 (3.3020) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [197] Total time: 0:01:45 (0.6749 s / it) Averaged stats: lr: 0.002347 min_lr: 0.002347 loss: 3.3586 (3.1811) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9462 (0.9462) acc1: 80.5990 (80.5990) acc5: 96.2240 (96.2240) time: 6.8391 data: 6.6019 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0211 (1.0297) acc1: 77.4740 (76.2880) acc5: 95.1823 (93.7600) time: 1.5143 data: 1.3205 max mem: 55573 Test: Total time: 0:00:07 (1.5577 s / it) * Acc@1 77.788 Acc@5 94.078 loss 0.991 Accuracy of the model on the 50000 test images: 77.8% Max accuracy: 77.90% Test: [0/5] eta: 0:00:35 loss: 1.1108 (1.1108) acc1: 72.1354 (72.1354) acc5: 90.7552 (90.7552) time: 7.1176 data: 6.8816 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1446 (1.1563) acc1: 70.0521 (67.7760) acc5: 90.7552 (88.5760) time: 1.5689 data: 1.3764 max mem: 55573 Test: Total time: 0:00:07 (1.5985 s / it) * Acc@1 68.368 Acc@5 89.012 loss 1.185 Accuracy of the model EMA on 50000 test images: 68.4% Max EMA accuracy: 68.37% Epoch: [198] [ 0/156] eta: 0:35:41 lr: 0.002346 min_lr: 0.002346 loss: 3.2688 (3.2688) weight_decay: 0.0500 (0.0500) time: 13.7250 data: 7.8232 max mem: 55573 Epoch: [198] [ 10/156] eta: 0:04:20 lr: 0.002344 min_lr: 0.002344 loss: 3.1401 (3.0362) weight_decay: 0.0500 (0.0500) time: 1.7840 data: 0.7116 max mem: 55573 Epoch: [198] [ 20/156] eta: 0:02:45 lr: 0.002341 min_lr: 0.002341 loss: 3.1947 (3.1191) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [198] [ 30/156] eta: 0:02:07 lr: 0.002339 min_lr: 0.002339 loss: 3.3506 (3.1763) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [198] [ 40/156] eta: 0:01:45 lr: 0.002336 min_lr: 0.002336 loss: 3.2985 (3.1718) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [198] [ 50/156] eta: 0:01:29 lr: 0.002333 min_lr: 0.002333 loss: 3.3139 (3.1798) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [198] [ 60/156] eta: 0:01:17 lr: 0.002331 min_lr: 0.002331 loss: 3.3942 (3.1651) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [198] [ 70/156] eta: 0:01:06 lr: 0.002328 min_lr: 0.002328 loss: 3.2112 (3.1412) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [198] [ 80/156] eta: 0:00:57 lr: 0.002326 min_lr: 0.002326 loss: 3.1987 (3.1262) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [198] [ 90/156] eta: 0:00:48 lr: 0.002323 min_lr: 0.002323 loss: 3.2460 (3.1424) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [198] [100/156] eta: 0:00:40 lr: 0.002320 min_lr: 0.002320 loss: 3.2617 (3.1391) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [198] [110/156] eta: 0:00:32 lr: 0.002318 min_lr: 0.002318 loss: 3.2389 (3.1393) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [198] [120/156] eta: 0:00:25 lr: 0.002315 min_lr: 0.002315 loss: 3.1864 (3.1355) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [198] [130/156] eta: 0:00:17 lr: 0.002312 min_lr: 0.002312 loss: 3.2383 (3.1416) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0009 max mem: 55573 Epoch: [198] [140/156] eta: 0:00:10 lr: 0.002310 min_lr: 0.002310 loss: 3.2370 (3.1436) weight_decay: 0.0500 (0.0500) time: 0.5849 data: 0.0008 max mem: 55573 Epoch: [198] [150/156] eta: 0:00:04 lr: 0.002307 min_lr: 0.002307 loss: 3.1694 (3.1485) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [198] [155/156] eta: 0:00:00 lr: 0.002306 min_lr: 0.002306 loss: 3.3500 (3.1642) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [198] Total time: 0:01:46 (0.6804 s / it) Averaged stats: lr: 0.002306 min_lr: 0.002306 loss: 3.3500 (3.1597) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.9912 (0.9912) acc1: 79.4271 (79.4271) acc5: 95.7031 (95.7031) time: 6.6440 data: 6.4056 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0772 (1.1039) acc1: 78.3854 (76.4480) acc5: 95.7031 (94.0160) time: 1.4756 data: 1.2813 max mem: 55573 Test: Total time: 0:00:07 (1.5187 s / it) * Acc@1 77.674 Acc@5 93.964 loss 1.090 Accuracy of the model on the 50000 test images: 77.7% Max accuracy: 77.90% Test: [0/5] eta: 0:00:36 loss: 1.1075 (1.1075) acc1: 72.3958 (72.3958) acc5: 90.7552 (90.7552) time: 7.3137 data: 7.0777 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1406 (1.1525) acc1: 69.9219 (68.1280) acc5: 90.7552 (88.7040) time: 1.6081 data: 1.4157 max mem: 55573 Test: Total time: 0:00:08 (1.6280 s / it) * Acc@1 68.484 Acc@5 89.088 loss 1.180 Accuracy of the model EMA on 50000 test images: 68.5% Max EMA accuracy: 68.48% Epoch: [199] [ 0/156] eta: 0:34:45 lr: 0.002306 min_lr: 0.002306 loss: 3.4276 (3.4276) weight_decay: 0.0500 (0.0500) time: 13.3662 data: 8.4396 max mem: 55573 Epoch: [199] [ 10/156] eta: 0:04:17 lr: 0.002303 min_lr: 0.002303 loss: 3.4276 (3.3329) weight_decay: 0.0500 (0.0500) time: 1.7630 data: 0.7676 max mem: 55573 Epoch: [199] [ 20/156] eta: 0:02:43 lr: 0.002301 min_lr: 0.002301 loss: 3.4383 (3.3307) weight_decay: 0.0500 (0.0500) time: 0.5959 data: 0.0004 max mem: 55573 Epoch: [199] [ 30/156] eta: 0:02:06 lr: 0.002298 min_lr: 0.002298 loss: 3.4018 (3.2858) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [199] [ 40/156] eta: 0:01:44 lr: 0.002295 min_lr: 0.002295 loss: 3.3156 (3.3120) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [199] [ 50/156] eta: 0:01:29 lr: 0.002293 min_lr: 0.002293 loss: 3.3797 (3.3227) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [199] [ 60/156] eta: 0:01:16 lr: 0.002290 min_lr: 0.002290 loss: 3.3797 (3.3268) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [199] [ 70/156] eta: 0:01:06 lr: 0.002288 min_lr: 0.002288 loss: 3.3458 (3.3219) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [199] [ 80/156] eta: 0:00:56 lr: 0.002285 min_lr: 0.002285 loss: 3.3432 (3.3103) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [199] [ 90/156] eta: 0:00:48 lr: 0.002282 min_lr: 0.002282 loss: 3.3707 (3.3204) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [199] [100/156] eta: 0:00:40 lr: 0.002280 min_lr: 0.002280 loss: 3.4297 (3.3212) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [199] [110/156] eta: 0:00:32 lr: 0.002277 min_lr: 0.002277 loss: 3.4297 (3.3163) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [199] [120/156] eta: 0:00:25 lr: 0.002275 min_lr: 0.002275 loss: 3.4113 (3.2954) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [199] [130/156] eta: 0:00:17 lr: 0.002272 min_lr: 0.002272 loss: 3.2418 (3.2922) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0010 max mem: 55573 Epoch: [199] [140/156] eta: 0:00:10 lr: 0.002269 min_lr: 0.002269 loss: 3.1603 (3.2741) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0009 max mem: 55573 Epoch: [199] [150/156] eta: 0:00:04 lr: 0.002267 min_lr: 0.002267 loss: 3.0150 (3.2542) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [199] [155/156] eta: 0:00:00 lr: 0.002265 min_lr: 0.002265 loss: 3.0823 (3.2528) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [199] Total time: 0:01:45 (0.6788 s / it) Averaged stats: lr: 0.002265 min_lr: 0.002265 loss: 3.0823 (3.1837) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.9412 (0.9412) acc1: 82.0312 (82.0312) acc5: 96.8750 (96.8750) time: 6.7189 data: 6.4819 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0488 (1.0516) acc1: 77.9948 (76.6080) acc5: 95.3125 (94.1440) time: 1.4904 data: 1.2965 max mem: 55573 Test: Total time: 0:00:07 (1.5329 s / it) * Acc@1 77.810 Acc@5 94.264 loss 1.044 Accuracy of the model on the 50000 test images: 77.8% Max accuracy: 77.90% Test: [0/5] eta: 0:00:34 loss: 1.1036 (1.1036) acc1: 72.3958 (72.3958) acc5: 90.7552 (90.7552) time: 6.9047 data: 6.6687 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1362 (1.1484) acc1: 69.9219 (68.1600) acc5: 90.7552 (88.7680) time: 1.5270 data: 1.3346 max mem: 55573 Test: Total time: 0:00:07 (1.5496 s / it) * Acc@1 68.600 Acc@5 89.140 loss 1.176 Accuracy of the model EMA on 50000 test images: 68.6% Max EMA accuracy: 68.60% Epoch: [200] [ 0/156] eta: 0:35:51 lr: 0.002265 min_lr: 0.002265 loss: 2.3705 (2.3705) weight_decay: 0.0500 (0.0500) time: 13.7893 data: 10.5216 max mem: 55573 Epoch: [200] [ 10/156] eta: 0:04:23 lr: 0.002263 min_lr: 0.002263 loss: 3.0911 (2.9640) weight_decay: 0.0500 (0.0500) time: 1.8024 data: 0.9568 max mem: 55573 Epoch: [200] [ 20/156] eta: 0:02:46 lr: 0.002260 min_lr: 0.002260 loss: 3.1619 (3.0533) weight_decay: 0.0500 (0.0500) time: 0.5994 data: 0.0004 max mem: 55573 Epoch: [200] [ 30/156] eta: 0:02:08 lr: 0.002257 min_lr: 0.002257 loss: 3.2936 (3.1186) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [200] [ 40/156] eta: 0:01:46 lr: 0.002255 min_lr: 0.002255 loss: 3.3707 (3.1640) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [200] [ 50/156] eta: 0:01:30 lr: 0.002252 min_lr: 0.002252 loss: 3.3865 (3.1818) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [200] [ 60/156] eta: 0:01:17 lr: 0.002250 min_lr: 0.002250 loss: 3.3963 (3.1715) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [200] [ 70/156] eta: 0:01:06 lr: 0.002247 min_lr: 0.002247 loss: 3.2504 (3.1693) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [200] [ 80/156] eta: 0:00:57 lr: 0.002244 min_lr: 0.002244 loss: 3.3572 (3.1834) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [200] [ 90/156] eta: 0:00:48 lr: 0.002242 min_lr: 0.002242 loss: 3.3492 (3.1818) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [200] [100/156] eta: 0:00:40 lr: 0.002239 min_lr: 0.002239 loss: 2.8973 (3.1462) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [200] [110/156] eta: 0:00:32 lr: 0.002237 min_lr: 0.002237 loss: 3.2032 (3.1548) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [200] [120/156] eta: 0:00:25 lr: 0.002234 min_lr: 0.002234 loss: 3.2032 (3.1324) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [200] [130/156] eta: 0:00:18 lr: 0.002232 min_lr: 0.002232 loss: 3.1666 (3.1448) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0010 max mem: 55573 Epoch: [200] [140/156] eta: 0:00:10 lr: 0.002229 min_lr: 0.002229 loss: 3.3805 (3.1680) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0009 max mem: 55573 Epoch: [200] [150/156] eta: 0:00:04 lr: 0.002226 min_lr: 0.002226 loss: 3.3074 (3.1576) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [200] [155/156] eta: 0:00:00 lr: 0.002225 min_lr: 0.002225 loss: 3.2587 (3.1537) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [200] Total time: 0:01:46 (0.6822 s / it) Averaged stats: lr: 0.002225 min_lr: 0.002225 loss: 3.2587 (3.1742) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 0.9264 (0.9264) acc1: 83.0729 (83.0729) acc5: 96.4844 (96.4844) time: 7.2012 data: 6.9638 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0494 (1.0349) acc1: 78.3854 (76.8640) acc5: 96.4844 (94.4000) time: 1.5868 data: 1.3928 max mem: 55573 Test: Total time: 0:00:08 (1.6308 s / it) * Acc@1 78.102 Acc@5 94.266 loss 1.036 Accuracy of the model on the 50000 test images: 78.1% Max accuracy: 78.10% Test: [0/5] eta: 0:00:33 loss: 1.0998 (1.0998) acc1: 72.1354 (72.1354) acc5: 90.8854 (90.8854) time: 6.7816 data: 6.5454 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1323 (1.1447) acc1: 69.9219 (68.1280) acc5: 90.8854 (88.8640) time: 1.5016 data: 1.3092 max mem: 55573 Test: Total time: 0:00:07 (1.5187 s / it) * Acc@1 68.728 Acc@5 89.206 loss 1.172 Accuracy of the model EMA on 50000 test images: 68.7% Max EMA accuracy: 68.73% Epoch: [201] [ 0/156] eta: 0:33:15 lr: 0.002225 min_lr: 0.002225 loss: 3.0937 (3.0937) weight_decay: 0.0500 (0.0500) time: 12.7918 data: 8.3029 max mem: 55573 Epoch: [201] [ 10/156] eta: 0:04:17 lr: 0.002222 min_lr: 0.002222 loss: 3.4455 (3.1910) weight_decay: 0.0500 (0.0500) time: 1.7648 data: 0.8070 max mem: 55573 Epoch: [201] [ 20/156] eta: 0:02:43 lr: 0.002220 min_lr: 0.002220 loss: 3.3113 (3.2003) weight_decay: 0.0500 (0.0500) time: 0.6263 data: 0.0289 max mem: 55573 Epoch: [201] [ 30/156] eta: 0:02:06 lr: 0.002217 min_lr: 0.002217 loss: 3.3113 (3.2429) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [201] [ 40/156] eta: 0:01:45 lr: 0.002215 min_lr: 0.002215 loss: 3.3717 (3.2091) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [201] [ 50/156] eta: 0:01:29 lr: 0.002212 min_lr: 0.002212 loss: 3.2744 (3.2015) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0005 max mem: 55573 Epoch: [201] [ 60/156] eta: 0:01:17 lr: 0.002209 min_lr: 0.002209 loss: 3.1519 (3.1913) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.0005 max mem: 55573 Epoch: [201] [ 70/156] eta: 0:01:06 lr: 0.002207 min_lr: 0.002207 loss: 3.4164 (3.2306) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0004 max mem: 55573 Epoch: [201] [ 80/156] eta: 0:00:57 lr: 0.002204 min_lr: 0.002204 loss: 3.5206 (3.2336) weight_decay: 0.0500 (0.0500) time: 0.5939 data: 0.0004 max mem: 55573 Epoch: [201] [ 90/156] eta: 0:00:48 lr: 0.002202 min_lr: 0.002202 loss: 3.3027 (3.2144) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0004 max mem: 55573 Epoch: [201] [100/156] eta: 0:00:40 lr: 0.002199 min_lr: 0.002199 loss: 3.1643 (3.2063) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0005 max mem: 55573 Epoch: [201] [110/156] eta: 0:00:32 lr: 0.002197 min_lr: 0.002197 loss: 3.3621 (3.2173) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0005 max mem: 55573 Epoch: [201] [120/156] eta: 0:00:25 lr: 0.002194 min_lr: 0.002194 loss: 3.4017 (3.2256) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [201] [130/156] eta: 0:00:17 lr: 0.002191 min_lr: 0.002191 loss: 3.3310 (3.2204) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0010 max mem: 55573 Epoch: [201] [140/156] eta: 0:00:10 lr: 0.002189 min_lr: 0.002189 loss: 3.3310 (3.2139) weight_decay: 0.0500 (0.0500) time: 0.5854 data: 0.0009 max mem: 55573 Epoch: [201] [150/156] eta: 0:00:04 lr: 0.002186 min_lr: 0.002186 loss: 3.3787 (3.2119) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [201] [155/156] eta: 0:00:00 lr: 0.002185 min_lr: 0.002185 loss: 3.3466 (3.2048) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [201] Total time: 0:01:46 (0.6801 s / it) Averaged stats: lr: 0.002185 min_lr: 0.002185 loss: 3.3466 (3.1602) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0282 (1.0282) acc1: 80.2083 (80.2083) acc5: 95.5729 (95.5729) time: 7.0999 data: 6.8625 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1335 (1.1146) acc1: 77.6042 (76.8640) acc5: 95.5729 (94.2080) time: 1.5667 data: 1.3726 max mem: 55573 Test: Total time: 0:00:08 (1.6139 s / it) * Acc@1 77.604 Acc@5 94.058 loss 1.109 Accuracy of the model on the 50000 test images: 77.6% Max accuracy: 78.10% Test: [0/5] eta: 0:00:35 loss: 1.0958 (1.0958) acc1: 72.3958 (72.3958) acc5: 90.8854 (90.8854) time: 7.1427 data: 6.9064 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1281 (1.1406) acc1: 70.0521 (68.3200) acc5: 90.8854 (88.9280) time: 1.5738 data: 1.3814 max mem: 55573 Test: Total time: 0:00:07 (1.5953 s / it) * Acc@1 68.836 Acc@5 89.284 loss 1.168 Accuracy of the model EMA on 50000 test images: 68.8% Max EMA accuracy: 68.84% Epoch: [202] [ 0/156] eta: 0:37:03 lr: 0.002185 min_lr: 0.002185 loss: 3.0723 (3.0723) weight_decay: 0.0500 (0.0500) time: 14.2536 data: 13.6746 max mem: 55573 Epoch: [202] [ 10/156] eta: 0:04:29 lr: 0.002182 min_lr: 0.002182 loss: 3.3788 (3.3455) weight_decay: 0.0500 (0.0500) time: 1.8453 data: 1.2435 max mem: 55573 Epoch: [202] [ 20/156] eta: 0:02:49 lr: 0.002180 min_lr: 0.002180 loss: 3.2982 (3.2663) weight_decay: 0.0500 (0.0500) time: 0.5985 data: 0.0004 max mem: 55573 Epoch: [202] [ 30/156] eta: 0:02:10 lr: 0.002177 min_lr: 0.002177 loss: 3.2272 (3.1804) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [202] [ 40/156] eta: 0:01:47 lr: 0.002175 min_lr: 0.002175 loss: 2.6154 (3.1181) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [202] [ 50/156] eta: 0:01:31 lr: 0.002172 min_lr: 0.002172 loss: 3.1227 (3.1118) weight_decay: 0.0500 (0.0500) time: 0.5945 data: 0.0005 max mem: 55573 Epoch: [202] [ 60/156] eta: 0:01:18 lr: 0.002169 min_lr: 0.002169 loss: 3.2846 (3.1393) weight_decay: 0.0500 (0.0500) time: 0.5963 data: 0.0005 max mem: 55573 Epoch: [202] [ 70/156] eta: 0:01:07 lr: 0.002167 min_lr: 0.002167 loss: 3.3999 (3.1712) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [202] [ 80/156] eta: 0:00:57 lr: 0.002164 min_lr: 0.002164 loss: 3.4238 (3.1899) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [202] [ 90/156] eta: 0:00:49 lr: 0.002162 min_lr: 0.002162 loss: 3.3560 (3.1787) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [202] [100/156] eta: 0:00:40 lr: 0.002159 min_lr: 0.002159 loss: 2.9044 (3.1548) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [202] [110/156] eta: 0:00:32 lr: 0.002157 min_lr: 0.002157 loss: 2.8731 (3.1531) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [202] [120/156] eta: 0:00:25 lr: 0.002154 min_lr: 0.002154 loss: 3.1289 (3.1470) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [202] [130/156] eta: 0:00:18 lr: 0.002152 min_lr: 0.002152 loss: 3.2330 (3.1584) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0012 max mem: 55573 Epoch: [202] [140/156] eta: 0:00:11 lr: 0.002149 min_lr: 0.002149 loss: 3.3453 (3.1697) weight_decay: 0.0500 (0.0500) time: 0.5849 data: 0.0011 max mem: 55573 Epoch: [202] [150/156] eta: 0:00:04 lr: 0.002146 min_lr: 0.002146 loss: 3.3241 (3.1612) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [202] [155/156] eta: 0:00:00 lr: 0.002145 min_lr: 0.002145 loss: 3.2790 (3.1641) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [202] Total time: 0:01:46 (0.6853 s / it) Averaged stats: lr: 0.002145 min_lr: 0.002145 loss: 3.2790 (3.1660) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9807 (0.9807) acc1: 79.6875 (79.6875) acc5: 96.3542 (96.3542) time: 6.9257 data: 6.6884 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0838 (1.0768) acc1: 76.6927 (76.6080) acc5: 95.7031 (94.5600) time: 1.5316 data: 1.3378 max mem: 55573 Test: Total time: 0:00:07 (1.5747 s / it) * Acc@1 78.048 Acc@5 94.368 loss 1.054 Accuracy of the model on the 50000 test images: 78.0% Max accuracy: 78.10% Test: [0/5] eta: 0:00:36 loss: 1.0923 (1.0923) acc1: 72.5260 (72.5260) acc5: 90.8854 (90.8854) time: 7.3976 data: 7.1616 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1241 (1.1368) acc1: 70.0521 (68.4160) acc5: 90.8854 (88.9600) time: 1.6248 data: 1.4324 max mem: 55573 Test: Total time: 0:00:08 (1.6442 s / it) * Acc@1 68.938 Acc@5 89.372 loss 1.163 Accuracy of the model EMA on 50000 test images: 68.9% Max EMA accuracy: 68.94% Epoch: [203] [ 0/156] eta: 0:30:52 lr: 0.002145 min_lr: 0.002145 loss: 3.5272 (3.5272) weight_decay: 0.0500 (0.0500) time: 11.8759 data: 8.9975 max mem: 55573 Epoch: [203] [ 10/156] eta: 0:04:08 lr: 0.002142 min_lr: 0.002142 loss: 3.3233 (3.2705) weight_decay: 0.0500 (0.0500) time: 1.7038 data: 0.8836 max mem: 55573 Epoch: [203] [ 20/156] eta: 0:02:39 lr: 0.002140 min_lr: 0.002140 loss: 3.3233 (3.2598) weight_decay: 0.0500 (0.0500) time: 0.6383 data: 0.0363 max mem: 55573 Epoch: [203] [ 30/156] eta: 0:02:04 lr: 0.002137 min_lr: 0.002137 loss: 3.4116 (3.2300) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [203] [ 40/156] eta: 0:01:43 lr: 0.002135 min_lr: 0.002135 loss: 3.4116 (3.2541) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [203] [ 50/156] eta: 0:01:28 lr: 0.002132 min_lr: 0.002132 loss: 3.3588 (3.2331) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [203] [ 60/156] eta: 0:01:16 lr: 0.002130 min_lr: 0.002130 loss: 3.3588 (3.2334) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [203] [ 70/156] eta: 0:01:05 lr: 0.002127 min_lr: 0.002127 loss: 3.1398 (3.1937) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [203] [ 80/156] eta: 0:00:56 lr: 0.002125 min_lr: 0.002125 loss: 3.0675 (3.1899) weight_decay: 0.0500 (0.0500) time: 0.6004 data: 0.0004 max mem: 55573 Epoch: [203] [ 90/156] eta: 0:00:48 lr: 0.002122 min_lr: 0.002122 loss: 3.2544 (3.1950) weight_decay: 0.0500 (0.0500) time: 0.5997 data: 0.0005 max mem: 55573 Epoch: [203] [100/156] eta: 0:00:39 lr: 0.002119 min_lr: 0.002119 loss: 3.4657 (3.2157) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [203] [110/156] eta: 0:00:32 lr: 0.002117 min_lr: 0.002117 loss: 3.4873 (3.2119) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [203] [120/156] eta: 0:00:24 lr: 0.002114 min_lr: 0.002114 loss: 3.3537 (3.2139) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [203] [130/156] eta: 0:00:17 lr: 0.002112 min_lr: 0.002112 loss: 3.3537 (3.2334) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0011 max mem: 55573 Epoch: [203] [140/156] eta: 0:00:10 lr: 0.002109 min_lr: 0.002109 loss: 3.2723 (3.2190) weight_decay: 0.0500 (0.0500) time: 0.5868 data: 0.0009 max mem: 55573 Epoch: [203] [150/156] eta: 0:00:04 lr: 0.002107 min_lr: 0.002107 loss: 3.2779 (3.2158) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [203] [155/156] eta: 0:00:00 lr: 0.002106 min_lr: 0.002106 loss: 3.2736 (3.2152) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [203] Total time: 0:01:45 (0.6758 s / it) Averaged stats: lr: 0.002106 min_lr: 0.002106 loss: 3.2736 (3.1672) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:32 loss: 0.9962 (0.9962) acc1: 81.7708 (81.7708) acc5: 96.2240 (96.2240) time: 6.4186 data: 6.1816 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0960 (1.0894) acc1: 78.5156 (76.5760) acc5: 94.3396 (94.5280) time: 1.4794 data: 1.2867 max mem: 55573 Test: Total time: 0:00:07 (1.5125 s / it) * Acc@1 77.496 Acc@5 94.088 loss 1.075 Accuracy of the model on the 50000 test images: 77.5% Max accuracy: 78.10% Test: [0/5] eta: 0:00:37 loss: 1.0890 (1.0890) acc1: 72.6562 (72.6562) acc5: 90.8854 (90.8854) time: 7.4994 data: 7.2633 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1202 (1.1335) acc1: 70.0521 (68.5440) acc5: 90.8854 (88.9920) time: 1.6453 data: 1.4528 max mem: 55573 Test: Total time: 0:00:08 (1.6656 s / it) * Acc@1 69.032 Acc@5 89.416 loss 1.160 Accuracy of the model EMA on 50000 test images: 69.0% Max EMA accuracy: 69.03% Epoch: [204] [ 0/156] eta: 0:34:36 lr: 0.002105 min_lr: 0.002105 loss: 3.7590 (3.7590) weight_decay: 0.0500 (0.0500) time: 13.3097 data: 12.7007 max mem: 55573 Epoch: [204] [ 10/156] eta: 0:04:16 lr: 0.002103 min_lr: 0.002103 loss: 3.4372 (3.2891) weight_decay: 0.0500 (0.0500) time: 1.7576 data: 1.1552 max mem: 55573 Epoch: [204] [ 20/156] eta: 0:02:43 lr: 0.002100 min_lr: 0.002100 loss: 3.1794 (3.1846) weight_decay: 0.0500 (0.0500) time: 0.5962 data: 0.0006 max mem: 55573 Epoch: [204] [ 30/156] eta: 0:02:06 lr: 0.002098 min_lr: 0.002098 loss: 3.0461 (3.1003) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [204] [ 40/156] eta: 0:01:44 lr: 0.002095 min_lr: 0.002095 loss: 3.2007 (3.1128) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [204] [ 50/156] eta: 0:01:29 lr: 0.002093 min_lr: 0.002093 loss: 3.2854 (3.1286) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [204] [ 60/156] eta: 0:01:16 lr: 0.002090 min_lr: 0.002090 loss: 3.1414 (3.1182) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [204] [ 70/156] eta: 0:01:06 lr: 0.002088 min_lr: 0.002088 loss: 3.0275 (3.1139) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [204] [ 80/156] eta: 0:00:57 lr: 0.002085 min_lr: 0.002085 loss: 3.0275 (3.1173) weight_decay: 0.0500 (0.0500) time: 0.5953 data: 0.0004 max mem: 55573 Epoch: [204] [ 90/156] eta: 0:00:48 lr: 0.002083 min_lr: 0.002083 loss: 3.2065 (3.1264) weight_decay: 0.0500 (0.0500) time: 0.5957 data: 0.0004 max mem: 55573 Epoch: [204] [100/156] eta: 0:00:40 lr: 0.002080 min_lr: 0.002080 loss: 3.3588 (3.1433) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [204] [110/156] eta: 0:00:32 lr: 0.002077 min_lr: 0.002077 loss: 3.3307 (3.1532) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [204] [120/156] eta: 0:00:25 lr: 0.002075 min_lr: 0.002075 loss: 3.3036 (3.1600) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [204] [130/156] eta: 0:00:17 lr: 0.002072 min_lr: 0.002072 loss: 3.4102 (3.1702) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0010 max mem: 55573 Epoch: [204] [140/156] eta: 0:00:10 lr: 0.002070 min_lr: 0.002070 loss: 3.2683 (3.1586) weight_decay: 0.0500 (0.0500) time: 0.5870 data: 0.0009 max mem: 55573 Epoch: [204] [150/156] eta: 0:00:04 lr: 0.002067 min_lr: 0.002067 loss: 3.2109 (3.1530) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [204] [155/156] eta: 0:00:00 lr: 0.002066 min_lr: 0.002066 loss: 3.2109 (3.1439) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [204] Total time: 0:01:45 (0.6787 s / it) Averaged stats: lr: 0.002066 min_lr: 0.002066 loss: 3.2109 (3.1467) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9725 (0.9725) acc1: 80.7292 (80.7292) acc5: 96.2240 (96.2240) time: 6.9708 data: 6.7333 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0318 (1.0763) acc1: 79.1667 (76.8960) acc5: 95.5729 (94.3040) time: 1.5406 data: 1.3467 max mem: 55573 Test: Total time: 0:00:07 (1.5830 s / it) * Acc@1 77.786 Acc@5 94.210 loss 1.035 Accuracy of the model on the 50000 test images: 77.8% Max accuracy: 78.10% Test: [0/5] eta: 0:00:34 loss: 1.0851 (1.0851) acc1: 72.7865 (72.7865) acc5: 91.0156 (91.0156) time: 6.9418 data: 6.7057 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1158 (1.1296) acc1: 70.0521 (68.6080) acc5: 91.0156 (89.1840) time: 1.5340 data: 1.3414 max mem: 55573 Test: Total time: 0:00:07 (1.5559 s / it) * Acc@1 69.116 Acc@5 89.504 loss 1.155 Accuracy of the model EMA on 50000 test images: 69.1% Max EMA accuracy: 69.12% Epoch: [205] [ 0/156] eta: 0:37:45 lr: 0.002066 min_lr: 0.002066 loss: 3.4646 (3.4646) weight_decay: 0.0500 (0.0500) time: 14.5196 data: 10.9171 max mem: 55573 Epoch: [205] [ 10/156] eta: 0:04:30 lr: 0.002063 min_lr: 0.002063 loss: 3.1164 (3.0108) weight_decay: 0.0500 (0.0500) time: 1.8536 data: 0.9928 max mem: 55573 Epoch: [205] [ 20/156] eta: 0:02:50 lr: 0.002061 min_lr: 0.002061 loss: 2.8680 (3.0119) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [205] [ 30/156] eta: 0:02:11 lr: 0.002058 min_lr: 0.002058 loss: 3.2336 (3.0582) weight_decay: 0.0500 (0.0500) time: 0.5953 data: 0.0004 max mem: 55573 Epoch: [205] [ 40/156] eta: 0:01:48 lr: 0.002056 min_lr: 0.002056 loss: 3.2762 (3.0697) weight_decay: 0.0500 (0.0500) time: 0.5952 data: 0.0005 max mem: 55573 Epoch: [205] [ 50/156] eta: 0:01:31 lr: 0.002053 min_lr: 0.002053 loss: 3.3293 (3.0735) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [205] [ 60/156] eta: 0:01:18 lr: 0.002051 min_lr: 0.002051 loss: 2.9210 (3.0615) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [205] [ 70/156] eta: 0:01:07 lr: 0.002048 min_lr: 0.002048 loss: 3.0254 (3.0577) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [205] [ 80/156] eta: 0:00:58 lr: 0.002046 min_lr: 0.002046 loss: 3.1423 (3.0812) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [205] [ 90/156] eta: 0:00:49 lr: 0.002043 min_lr: 0.002043 loss: 3.2512 (3.0993) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [205] [100/156] eta: 0:00:40 lr: 0.002041 min_lr: 0.002041 loss: 3.2651 (3.1159) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0005 max mem: 55573 Epoch: [205] [110/156] eta: 0:00:32 lr: 0.002038 min_lr: 0.002038 loss: 3.2519 (3.1061) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [205] [120/156] eta: 0:00:25 lr: 0.002036 min_lr: 0.002036 loss: 3.2011 (3.1157) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [205] [130/156] eta: 0:00:18 lr: 0.002033 min_lr: 0.002033 loss: 3.3224 (3.1317) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0011 max mem: 55573 Epoch: [205] [140/156] eta: 0:00:11 lr: 0.002031 min_lr: 0.002031 loss: 3.3333 (3.1461) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0009 max mem: 55573 Epoch: [205] [150/156] eta: 0:00:04 lr: 0.002028 min_lr: 0.002028 loss: 3.3472 (3.1558) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [205] [155/156] eta: 0:00:00 lr: 0.002027 min_lr: 0.002027 loss: 3.2980 (3.1512) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [205] Total time: 0:01:47 (0.6864 s / it) Averaged stats: lr: 0.002027 min_lr: 0.002027 loss: 3.2980 (3.1566) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 0.9824 (0.9824) acc1: 81.5104 (81.5104) acc5: 96.3542 (96.3542) time: 7.2348 data: 6.9975 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0644 (1.0918) acc1: 79.0365 (77.0240) acc5: 94.3396 (94.3040) time: 1.5935 data: 1.3996 max mem: 55573 Test: Total time: 0:00:08 (1.6326 s / it) * Acc@1 78.154 Acc@5 94.342 loss 1.049 Accuracy of the model on the 50000 test images: 78.2% Max accuracy: 78.15% Test: [0/5] eta: 0:00:34 loss: 1.0817 (1.0817) acc1: 72.9167 (72.9167) acc5: 91.1458 (91.1458) time: 6.9899 data: 6.7538 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1121 (1.1262) acc1: 70.0521 (68.6080) acc5: 91.1458 (89.2160) time: 1.5435 data: 1.3509 max mem: 55573 Test: Total time: 0:00:07 (1.5622 s / it) * Acc@1 69.186 Acc@5 89.534 loss 1.152 Accuracy of the model EMA on 50000 test images: 69.2% Max EMA accuracy: 69.19% Epoch: [206] [ 0/156] eta: 0:34:33 lr: 0.002027 min_lr: 0.002027 loss: 3.7574 (3.7574) weight_decay: 0.0500 (0.0500) time: 13.2889 data: 9.4575 max mem: 55573 Epoch: [206] [ 10/156] eta: 0:04:15 lr: 0.002024 min_lr: 0.002024 loss: 3.4241 (3.3171) weight_decay: 0.0500 (0.0500) time: 1.7493 data: 0.8602 max mem: 55573 Epoch: [206] [ 20/156] eta: 0:02:42 lr: 0.002022 min_lr: 0.002022 loss: 3.4327 (3.2802) weight_decay: 0.0500 (0.0500) time: 0.5939 data: 0.0004 max mem: 55573 Epoch: [206] [ 30/156] eta: 0:02:06 lr: 0.002019 min_lr: 0.002019 loss: 3.3849 (3.2853) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [206] [ 40/156] eta: 0:01:44 lr: 0.002017 min_lr: 0.002017 loss: 3.3220 (3.2793) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [206] [ 50/156] eta: 0:01:29 lr: 0.002014 min_lr: 0.002014 loss: 3.3745 (3.2727) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [206] [ 60/156] eta: 0:01:16 lr: 0.002012 min_lr: 0.002012 loss: 3.3022 (3.2625) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [206] [ 70/156] eta: 0:01:06 lr: 0.002009 min_lr: 0.002009 loss: 3.2462 (3.2454) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [206] [ 80/156] eta: 0:00:56 lr: 0.002007 min_lr: 0.002007 loss: 3.2408 (3.2474) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [206] [ 90/156] eta: 0:00:48 lr: 0.002004 min_lr: 0.002004 loss: 3.3793 (3.2468) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [206] [100/156] eta: 0:00:40 lr: 0.002002 min_lr: 0.002002 loss: 3.2542 (3.2177) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [206] [110/156] eta: 0:00:32 lr: 0.001999 min_lr: 0.001999 loss: 3.3020 (3.2220) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [206] [120/156] eta: 0:00:25 lr: 0.001997 min_lr: 0.001997 loss: 3.3054 (3.2139) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [206] [130/156] eta: 0:00:17 lr: 0.001994 min_lr: 0.001994 loss: 3.3054 (3.2141) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0010 max mem: 55573 Epoch: [206] [140/156] eta: 0:00:10 lr: 0.001992 min_lr: 0.001992 loss: 3.3563 (3.2151) weight_decay: 0.0500 (0.0500) time: 0.5856 data: 0.0008 max mem: 55573 Epoch: [206] [150/156] eta: 0:00:04 lr: 0.001989 min_lr: 0.001989 loss: 3.3269 (3.2046) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [206] [155/156] eta: 0:00:00 lr: 0.001988 min_lr: 0.001988 loss: 2.9841 (3.1999) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [206] Total time: 0:01:45 (0.6776 s / it) Averaged stats: lr: 0.001988 min_lr: 0.001988 loss: 2.9841 (3.1443) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8816 (0.8816) acc1: 81.6406 (81.6406) acc5: 96.8750 (96.8750) time: 6.9754 data: 6.7380 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9718 (1.0324) acc1: 79.4271 (77.9520) acc5: 94.3396 (94.7200) time: 1.5415 data: 1.3477 max mem: 55573 Test: Total time: 0:00:07 (1.5884 s / it) * Acc@1 78.614 Acc@5 94.450 loss 0.999 Accuracy of the model on the 50000 test images: 78.6% Max accuracy: 78.61% Test: [0/5] eta: 0:00:34 loss: 1.0791 (1.0791) acc1: 73.0469 (73.0469) acc5: 91.2760 (91.2760) time: 6.8999 data: 6.6639 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1090 (1.1232) acc1: 69.9219 (68.6080) acc5: 91.2760 (89.3120) time: 1.5254 data: 1.3329 max mem: 55573 Test: Total time: 0:00:07 (1.5451 s / it) * Acc@1 69.286 Acc@5 89.598 loss 1.148 Accuracy of the model EMA on 50000 test images: 69.3% Max EMA accuracy: 69.29% Epoch: [207] [ 0/156] eta: 0:28:51 lr: 0.001988 min_lr: 0.001988 loss: 3.4963 (3.4963) weight_decay: 0.0500 (0.0500) time: 11.1023 data: 10.3627 max mem: 55573 Epoch: [207] [ 10/156] eta: 0:04:06 lr: 0.001985 min_lr: 0.001985 loss: 3.4446 (3.2823) weight_decay: 0.0500 (0.0500) time: 1.6873 data: 0.9479 max mem: 55573 Epoch: [207] [ 20/156] eta: 0:02:38 lr: 0.001983 min_lr: 0.001983 loss: 3.3179 (3.2431) weight_decay: 0.0500 (0.0500) time: 0.6695 data: 0.0034 max mem: 55573 Epoch: [207] [ 30/156] eta: 0:02:03 lr: 0.001980 min_lr: 0.001980 loss: 3.4041 (3.3167) weight_decay: 0.0500 (0.0500) time: 0.5943 data: 0.0004 max mem: 55573 Epoch: [207] [ 40/156] eta: 0:01:42 lr: 0.001978 min_lr: 0.001978 loss: 3.5015 (3.2918) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [207] [ 50/156] eta: 0:01:27 lr: 0.001975 min_lr: 0.001975 loss: 3.3859 (3.2985) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [207] [ 60/156] eta: 0:01:15 lr: 0.001973 min_lr: 0.001973 loss: 3.3883 (3.2791) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [207] [ 70/156] eta: 0:01:05 lr: 0.001970 min_lr: 0.001970 loss: 3.1001 (3.2644) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [207] [ 80/156] eta: 0:00:56 lr: 0.001968 min_lr: 0.001968 loss: 3.0928 (3.2259) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [207] [ 90/156] eta: 0:00:47 lr: 0.001965 min_lr: 0.001965 loss: 3.1793 (3.2205) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [207] [100/156] eta: 0:00:39 lr: 0.001963 min_lr: 0.001963 loss: 3.2989 (3.2172) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [207] [110/156] eta: 0:00:32 lr: 0.001961 min_lr: 0.001961 loss: 3.2936 (3.2037) weight_decay: 0.0500 (0.0500) time: 0.5950 data: 0.0005 max mem: 55573 Epoch: [207] [120/156] eta: 0:00:24 lr: 0.001958 min_lr: 0.001958 loss: 3.3338 (3.2107) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.0004 max mem: 55573 Epoch: [207] [130/156] eta: 0:00:17 lr: 0.001956 min_lr: 0.001956 loss: 3.3746 (3.2022) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0012 max mem: 55573 Epoch: [207] [140/156] eta: 0:00:10 lr: 0.001953 min_lr: 0.001953 loss: 3.0355 (3.1881) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0010 max mem: 55573 Epoch: [207] [150/156] eta: 0:00:04 lr: 0.001951 min_lr: 0.001951 loss: 3.3647 (3.2095) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [207] [155/156] eta: 0:00:00 lr: 0.001949 min_lr: 0.001949 loss: 3.4030 (3.1998) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [207] Total time: 0:01:45 (0.6742 s / it) Averaged stats: lr: 0.001949 min_lr: 0.001949 loss: 3.4030 (3.1422) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.9650 (0.9650) acc1: 81.2500 (81.2500) acc5: 95.9635 (95.9635) time: 6.6668 data: 6.4299 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0851 (1.0908) acc1: 78.7760 (77.0560) acc5: 95.9635 (94.4960) time: 1.4796 data: 1.2861 max mem: 55573 Test: Total time: 0:00:07 (1.5250 s / it) * Acc@1 78.116 Acc@5 94.318 loss 1.081 Accuracy of the model on the 50000 test images: 78.1% Max accuracy: 78.61% Test: [0/5] eta: 0:00:35 loss: 1.0763 (1.0763) acc1: 73.0469 (73.0469) acc5: 91.4062 (91.4062) time: 7.0868 data: 6.8509 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1058 (1.1203) acc1: 70.0521 (68.6080) acc5: 91.4062 (89.3760) time: 1.5627 data: 1.3703 max mem: 55573 Test: Total time: 0:00:07 (1.5815 s / it) * Acc@1 69.332 Acc@5 89.640 loss 1.145 Accuracy of the model EMA on 50000 test images: 69.3% Max EMA accuracy: 69.33% Epoch: [208] [ 0/156] eta: 0:33:29 lr: 0.001949 min_lr: 0.001949 loss: 2.4708 (2.4708) weight_decay: 0.0500 (0.0500) time: 12.8799 data: 11.1613 max mem: 55573 Epoch: [208] [ 10/156] eta: 0:04:09 lr: 0.001947 min_lr: 0.001947 loss: 2.9779 (3.0305) weight_decay: 0.0500 (0.0500) time: 1.7082 data: 1.0151 max mem: 55573 Epoch: [208] [ 20/156] eta: 0:02:39 lr: 0.001944 min_lr: 0.001944 loss: 3.0252 (3.0875) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [208] [ 30/156] eta: 0:02:04 lr: 0.001942 min_lr: 0.001942 loss: 3.4010 (3.1195) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [208] [ 40/156] eta: 0:01:43 lr: 0.001939 min_lr: 0.001939 loss: 3.3063 (3.0873) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [208] [ 50/156] eta: 0:01:28 lr: 0.001937 min_lr: 0.001937 loss: 2.8788 (3.0705) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [208] [ 60/156] eta: 0:01:15 lr: 0.001934 min_lr: 0.001934 loss: 3.0866 (3.0880) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [208] [ 70/156] eta: 0:01:05 lr: 0.001932 min_lr: 0.001932 loss: 3.0608 (3.0864) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [208] [ 80/156] eta: 0:00:56 lr: 0.001929 min_lr: 0.001929 loss: 3.0562 (3.0834) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [208] [ 90/156] eta: 0:00:47 lr: 0.001927 min_lr: 0.001927 loss: 3.1270 (3.0818) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [208] [100/156] eta: 0:00:39 lr: 0.001925 min_lr: 0.001925 loss: 3.1270 (3.0738) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [208] [110/156] eta: 0:00:32 lr: 0.001922 min_lr: 0.001922 loss: 3.0974 (3.0586) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [208] [120/156] eta: 0:00:24 lr: 0.001920 min_lr: 0.001920 loss: 3.2718 (3.0817) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [208] [130/156] eta: 0:00:17 lr: 0.001917 min_lr: 0.001917 loss: 3.3758 (3.0969) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0011 max mem: 55573 Epoch: [208] [140/156] eta: 0:00:10 lr: 0.001915 min_lr: 0.001915 loss: 3.3332 (3.1032) weight_decay: 0.0500 (0.0500) time: 0.5853 data: 0.0009 max mem: 55573 Epoch: [208] [150/156] eta: 0:00:04 lr: 0.001912 min_lr: 0.001912 loss: 3.1068 (3.0982) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [208] [155/156] eta: 0:00:00 lr: 0.001911 min_lr: 0.001911 loss: 2.9517 (3.0930) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [208] Total time: 0:01:45 (0.6744 s / it) Averaged stats: lr: 0.001911 min_lr: 0.001911 loss: 2.9517 (3.1491) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9631 (0.9631) acc1: 81.2500 (81.2500) acc5: 95.5729 (95.5729) time: 6.8543 data: 6.6172 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9761 (1.0095) acc1: 80.0781 (77.6640) acc5: 95.5729 (94.2080) time: 1.5173 data: 1.3235 max mem: 55573 Test: Total time: 0:00:07 (1.5645 s / it) * Acc@1 78.166 Acc@5 94.404 loss 0.998 Accuracy of the model on the 50000 test images: 78.2% Max accuracy: 78.61% Test: [0/5] eta: 0:00:36 loss: 1.0734 (1.0734) acc1: 73.0469 (73.0469) acc5: 91.5365 (91.5365) time: 7.2293 data: 6.9934 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1026 (1.1173) acc1: 70.1823 (68.6080) acc5: 91.5365 (89.4080) time: 1.5911 data: 1.3988 max mem: 55573 Test: Total time: 0:00:08 (1.6162 s / it) * Acc@1 69.408 Acc@5 89.692 loss 1.142 Accuracy of the model EMA on 50000 test images: 69.4% Max EMA accuracy: 69.41% Epoch: [209] [ 0/156] eta: 0:34:21 lr: 0.001911 min_lr: 0.001911 loss: 3.6690 (3.6690) weight_decay: 0.0500 (0.0500) time: 13.2140 data: 10.4374 max mem: 55573 Epoch: [209] [ 10/156] eta: 0:04:15 lr: 0.001908 min_lr: 0.001908 loss: 3.2353 (3.1254) weight_decay: 0.0500 (0.0500) time: 1.7511 data: 0.9493 max mem: 55573 Epoch: [209] [ 20/156] eta: 0:02:42 lr: 0.001906 min_lr: 0.001906 loss: 3.0032 (3.0763) weight_decay: 0.0500 (0.0500) time: 0.5970 data: 0.0004 max mem: 55573 Epoch: [209] [ 30/156] eta: 0:02:06 lr: 0.001903 min_lr: 0.001903 loss: 3.2847 (3.1201) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [209] [ 40/156] eta: 0:01:44 lr: 0.001901 min_lr: 0.001901 loss: 3.4727 (3.1834) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [209] [ 50/156] eta: 0:01:29 lr: 0.001899 min_lr: 0.001899 loss: 3.3518 (3.1506) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [209] [ 60/156] eta: 0:01:16 lr: 0.001896 min_lr: 0.001896 loss: 3.1544 (3.1475) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [209] [ 70/156] eta: 0:01:06 lr: 0.001894 min_lr: 0.001894 loss: 3.2288 (3.1499) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [209] [ 80/156] eta: 0:00:56 lr: 0.001891 min_lr: 0.001891 loss: 3.3405 (3.1569) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [209] [ 90/156] eta: 0:00:48 lr: 0.001889 min_lr: 0.001889 loss: 3.0906 (3.1289) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [209] [100/156] eta: 0:00:40 lr: 0.001886 min_lr: 0.001886 loss: 2.9302 (3.1016) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [209] [110/156] eta: 0:00:32 lr: 0.001884 min_lr: 0.001884 loss: 3.2180 (3.1239) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [209] [120/156] eta: 0:00:25 lr: 0.001881 min_lr: 0.001881 loss: 3.3017 (3.1314) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [209] [130/156] eta: 0:00:17 lr: 0.001879 min_lr: 0.001879 loss: 3.3347 (3.1563) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0024 max mem: 55573 Epoch: [209] [140/156] eta: 0:00:10 lr: 0.001877 min_lr: 0.001877 loss: 3.2794 (3.1434) weight_decay: 0.0500 (0.0500) time: 0.5871 data: 0.0022 max mem: 55573 Epoch: [209] [150/156] eta: 0:00:04 lr: 0.001874 min_lr: 0.001874 loss: 3.0019 (3.1330) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [209] [155/156] eta: 0:00:00 lr: 0.001873 min_lr: 0.001873 loss: 3.0067 (3.1443) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [209] Total time: 0:01:45 (0.6779 s / it) Averaged stats: lr: 0.001873 min_lr: 0.001873 loss: 3.0067 (3.1496) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.9831 (0.9831) acc1: 80.0781 (80.0781) acc5: 96.0938 (96.0938) time: 6.6074 data: 6.3692 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0735 (1.1035) acc1: 78.2552 (76.9920) acc5: 94.3396 (94.2720) time: 1.4669 data: 1.2740 max mem: 55573 Test: Total time: 0:00:07 (1.5141 s / it) * Acc@1 77.966 Acc@5 94.366 loss 1.072 Accuracy of the model on the 50000 test images: 78.0% Max accuracy: 78.61% Test: [0/5] eta: 0:00:32 loss: 1.0708 (1.0708) acc1: 73.0469 (73.0469) acc5: 91.5365 (91.5365) time: 6.5380 data: 6.3019 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0996 (1.1146) acc1: 70.5729 (68.7360) acc5: 91.5365 (89.5360) time: 1.5080 data: 1.3157 max mem: 55573 Test: Total time: 0:00:07 (1.5269 s / it) * Acc@1 69.514 Acc@5 89.734 loss 1.138 Accuracy of the model EMA on 50000 test images: 69.5% Max EMA accuracy: 69.51% Epoch: [210] [ 0/156] eta: 0:29:41 lr: 0.001873 min_lr: 0.001873 loss: 2.0959 (2.0959) weight_decay: 0.0500 (0.0500) time: 11.4205 data: 9.1617 max mem: 55573 Epoch: [210] [ 10/156] eta: 0:04:04 lr: 0.001870 min_lr: 0.001870 loss: 3.3309 (3.0669) weight_decay: 0.0500 (0.0500) time: 1.6769 data: 0.9129 max mem: 55573 Epoch: [210] [ 20/156] eta: 0:02:37 lr: 0.001868 min_lr: 0.001868 loss: 3.3289 (3.1228) weight_decay: 0.0500 (0.0500) time: 0.6471 data: 0.0442 max mem: 55573 Epoch: [210] [ 30/156] eta: 0:02:03 lr: 0.001865 min_lr: 0.001865 loss: 3.1541 (3.1505) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [210] [ 40/156] eta: 0:01:42 lr: 0.001863 min_lr: 0.001863 loss: 3.3196 (3.1643) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [210] [ 50/156] eta: 0:01:27 lr: 0.001860 min_lr: 0.001860 loss: 3.3196 (3.1454) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [210] [ 60/156] eta: 0:01:15 lr: 0.001858 min_lr: 0.001858 loss: 3.2585 (3.1659) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [210] [ 70/156] eta: 0:01:05 lr: 0.001856 min_lr: 0.001856 loss: 3.3563 (3.2044) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [210] [ 80/156] eta: 0:00:56 lr: 0.001853 min_lr: 0.001853 loss: 3.3563 (3.1772) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [210] [ 90/156] eta: 0:00:47 lr: 0.001851 min_lr: 0.001851 loss: 3.3470 (3.2091) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [210] [100/156] eta: 0:00:39 lr: 0.001848 min_lr: 0.001848 loss: 3.3504 (3.2131) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [210] [110/156] eta: 0:00:32 lr: 0.001846 min_lr: 0.001846 loss: 3.3256 (3.2012) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [210] [120/156] eta: 0:00:24 lr: 0.001843 min_lr: 0.001843 loss: 3.0427 (3.1804) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [210] [130/156] eta: 0:00:17 lr: 0.001841 min_lr: 0.001841 loss: 3.0427 (3.1736) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0011 max mem: 55573 Epoch: [210] [140/156] eta: 0:00:10 lr: 0.001839 min_lr: 0.001839 loss: 3.1942 (3.1638) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0009 max mem: 55573 Epoch: [210] [150/156] eta: 0:00:04 lr: 0.001836 min_lr: 0.001836 loss: 3.1942 (3.1618) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [210] [155/156] eta: 0:00:00 lr: 0.001835 min_lr: 0.001835 loss: 3.1942 (3.1635) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [210] Total time: 0:01:45 (0.6735 s / it) Averaged stats: lr: 0.001835 min_lr: 0.001835 loss: 3.1942 (3.1191) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0920 (1.0920) acc1: 80.9896 (80.9896) acc5: 96.7448 (96.7448) time: 6.9672 data: 6.7297 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1419 (1.1153) acc1: 79.2969 (77.6640) acc5: 95.8333 (94.7200) time: 1.5396 data: 1.3460 max mem: 55573 Test: Total time: 0:00:07 (1.5849 s / it) * Acc@1 78.314 Acc@5 94.408 loss 1.106 Accuracy of the model on the 50000 test images: 78.3% Max accuracy: 78.61% Test: [0/5] eta: 0:00:33 loss: 1.0685 (1.0685) acc1: 73.0469 (73.0469) acc5: 91.6667 (91.6667) time: 6.6596 data: 6.4235 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0971 (1.1121) acc1: 70.5729 (68.8320) acc5: 91.6667 (89.6320) time: 1.4773 data: 1.2848 max mem: 55573 Test: Total time: 0:00:07 (1.4923 s / it) * Acc@1 69.586 Acc@5 89.764 loss 1.136 Accuracy of the model EMA on 50000 test images: 69.6% Max EMA accuracy: 69.59% Epoch: [211] [ 0/156] eta: 0:35:57 lr: 0.001835 min_lr: 0.001835 loss: 2.3913 (2.3913) weight_decay: 0.0500 (0.0500) time: 13.8286 data: 12.7412 max mem: 55573 Epoch: [211] [ 10/156] eta: 0:04:22 lr: 0.001832 min_lr: 0.001832 loss: 3.1645 (3.0139) weight_decay: 0.0500 (0.0500) time: 1.7997 data: 1.1586 max mem: 55573 Epoch: [211] [ 20/156] eta: 0:02:46 lr: 0.001830 min_lr: 0.001830 loss: 3.2544 (3.0907) weight_decay: 0.0500 (0.0500) time: 0.5952 data: 0.0004 max mem: 55573 Epoch: [211] [ 30/156] eta: 0:02:08 lr: 0.001828 min_lr: 0.001828 loss: 3.3390 (3.1218) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [211] [ 40/156] eta: 0:01:46 lr: 0.001825 min_lr: 0.001825 loss: 3.1527 (3.1032) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [211] [ 50/156] eta: 0:01:30 lr: 0.001823 min_lr: 0.001823 loss: 3.0350 (3.0817) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [211] [ 60/156] eta: 0:01:17 lr: 0.001820 min_lr: 0.001820 loss: 3.1466 (3.0876) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0005 max mem: 55573 Epoch: [211] [ 70/156] eta: 0:01:06 lr: 0.001818 min_lr: 0.001818 loss: 2.9999 (3.0465) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [211] [ 80/156] eta: 0:00:57 lr: 0.001815 min_lr: 0.001815 loss: 2.9878 (3.0590) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [211] [ 90/156] eta: 0:00:48 lr: 0.001813 min_lr: 0.001813 loss: 3.1868 (3.0660) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [211] [100/156] eta: 0:00:40 lr: 0.001811 min_lr: 0.001811 loss: 3.1868 (3.0688) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [211] [110/156] eta: 0:00:32 lr: 0.001808 min_lr: 0.001808 loss: 3.3567 (3.0996) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [211] [120/156] eta: 0:00:25 lr: 0.001806 min_lr: 0.001806 loss: 3.3516 (3.1045) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [211] [130/156] eta: 0:00:17 lr: 0.001803 min_lr: 0.001803 loss: 3.3195 (3.1233) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0011 max mem: 55573 Epoch: [211] [140/156] eta: 0:00:10 lr: 0.001801 min_lr: 0.001801 loss: 3.3195 (3.1383) weight_decay: 0.0500 (0.0500) time: 0.5874 data: 0.0010 max mem: 55573 Epoch: [211] [150/156] eta: 0:00:04 lr: 0.001799 min_lr: 0.001799 loss: 3.2773 (3.1448) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [211] [155/156] eta: 0:00:00 lr: 0.001797 min_lr: 0.001797 loss: 3.1939 (3.1315) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [211] Total time: 0:01:46 (0.6813 s / it) Averaged stats: lr: 0.001797 min_lr: 0.001797 loss: 3.1939 (3.1266) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9255 (0.9255) acc1: 81.7708 (81.7708) acc5: 96.7448 (96.7448) time: 6.9882 data: 6.7502 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9730 (0.9559) acc1: 78.9062 (77.3120) acc5: 95.5729 (94.6560) time: 1.5441 data: 1.3501 max mem: 55573 Test: Total time: 0:00:07 (1.5872 s / it) * Acc@1 78.652 Acc@5 94.518 loss 0.955 Accuracy of the model on the 50000 test images: 78.7% Max accuracy: 78.65% Test: [0/5] eta: 0:00:35 loss: 1.0660 (1.0660) acc1: 73.1771 (73.1771) acc5: 91.9271 (91.9271) time: 7.0900 data: 6.8539 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0944 (1.1095) acc1: 70.8333 (68.9920) acc5: 91.9271 (89.7280) time: 1.5634 data: 1.3709 max mem: 55573 Test: Total time: 0:00:07 (1.5853 s / it) * Acc@1 69.664 Acc@5 89.812 loss 1.133 Accuracy of the model EMA on 50000 test images: 69.7% Max EMA accuracy: 69.66% Epoch: [212] [ 0/156] eta: 0:36:33 lr: 0.001797 min_lr: 0.001797 loss: 2.7085 (2.7085) weight_decay: 0.0500 (0.0500) time: 14.0600 data: 12.6390 max mem: 55573 Epoch: [212] [ 10/156] eta: 0:04:26 lr: 0.001795 min_lr: 0.001795 loss: 3.2344 (3.2011) weight_decay: 0.0500 (0.0500) time: 1.8266 data: 1.1493 max mem: 55573 Epoch: [212] [ 20/156] eta: 0:02:48 lr: 0.001792 min_lr: 0.001792 loss: 3.3377 (3.2170) weight_decay: 0.0500 (0.0500) time: 0.5973 data: 0.0004 max mem: 55573 Epoch: [212] [ 30/156] eta: 0:02:09 lr: 0.001790 min_lr: 0.001790 loss: 3.2983 (3.1623) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [212] [ 40/156] eta: 0:01:46 lr: 0.001788 min_lr: 0.001788 loss: 3.3371 (3.2168) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [212] [ 50/156] eta: 0:01:30 lr: 0.001785 min_lr: 0.001785 loss: 3.3583 (3.2174) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [212] [ 60/156] eta: 0:01:18 lr: 0.001783 min_lr: 0.001783 loss: 3.2825 (3.2037) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [212] [ 70/156] eta: 0:01:07 lr: 0.001780 min_lr: 0.001780 loss: 2.9533 (3.1330) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [212] [ 80/156] eta: 0:00:57 lr: 0.001778 min_lr: 0.001778 loss: 2.8816 (3.1092) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [212] [ 90/156] eta: 0:00:48 lr: 0.001776 min_lr: 0.001776 loss: 3.1878 (3.1240) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [212] [100/156] eta: 0:00:40 lr: 0.001773 min_lr: 0.001773 loss: 3.1878 (3.1286) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [212] [110/156] eta: 0:00:32 lr: 0.001771 min_lr: 0.001771 loss: 3.2324 (3.1239) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [212] [120/156] eta: 0:00:25 lr: 0.001768 min_lr: 0.001768 loss: 3.3208 (3.1313) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [212] [130/156] eta: 0:00:18 lr: 0.001766 min_lr: 0.001766 loss: 3.2068 (3.1336) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0010 max mem: 55573 Epoch: [212] [140/156] eta: 0:00:10 lr: 0.001764 min_lr: 0.001764 loss: 3.1470 (3.1226) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0009 max mem: 55573 Epoch: [212] [150/156] eta: 0:00:04 lr: 0.001761 min_lr: 0.001761 loss: 3.0337 (3.1111) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [212] [155/156] eta: 0:00:00 lr: 0.001760 min_lr: 0.001760 loss: 2.9615 (3.1158) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [212] Total time: 0:01:46 (0.6831 s / it) Averaged stats: lr: 0.001760 min_lr: 0.001760 loss: 2.9615 (3.1152) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.9168 (0.9168) acc1: 81.6406 (81.6406) acc5: 96.0938 (96.0938) time: 7.1023 data: 6.8649 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0173 (1.0017) acc1: 79.5573 (77.7920) acc5: 96.0938 (94.8480) time: 1.5670 data: 1.3730 max mem: 55573 Test: Total time: 0:00:08 (1.6102 s / it) * Acc@1 78.838 Acc@5 94.634 loss 0.985 Accuracy of the model on the 50000 test images: 78.8% Max accuracy: 78.84% Test: [0/5] eta: 0:00:33 loss: 1.0634 (1.0634) acc1: 73.1771 (73.1771) acc5: 91.9271 (91.9271) time: 6.7499 data: 6.5133 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0916 (1.1070) acc1: 70.8333 (68.9920) acc5: 91.9271 (89.6960) time: 1.4960 data: 1.3029 max mem: 55573 Test: Total time: 0:00:07 (1.5145 s / it) * Acc@1 69.732 Acc@5 89.836 loss 1.130 Accuracy of the model EMA on 50000 test images: 69.7% Max EMA accuracy: 69.73% Epoch: [213] [ 0/156] eta: 0:33:36 lr: 0.001760 min_lr: 0.001760 loss: 3.0784 (3.0784) weight_decay: 0.0500 (0.0500) time: 12.9233 data: 9.3686 max mem: 55573 Epoch: [213] [ 10/156] eta: 0:04:14 lr: 0.001757 min_lr: 0.001757 loss: 3.1413 (3.1599) weight_decay: 0.0500 (0.0500) time: 1.7406 data: 0.8520 max mem: 55573 Epoch: [213] [ 20/156] eta: 0:02:42 lr: 0.001755 min_lr: 0.001755 loss: 3.3922 (3.2307) weight_decay: 0.0500 (0.0500) time: 0.6062 data: 0.0004 max mem: 55573 Epoch: [213] [ 30/156] eta: 0:02:05 lr: 0.001753 min_lr: 0.001753 loss: 3.3571 (3.1660) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [213] [ 40/156] eta: 0:01:44 lr: 0.001750 min_lr: 0.001750 loss: 3.1094 (3.1780) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [213] [ 50/156] eta: 0:01:28 lr: 0.001748 min_lr: 0.001748 loss: 3.1200 (3.1489) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [213] [ 60/156] eta: 0:01:16 lr: 0.001746 min_lr: 0.001746 loss: 3.2226 (3.1607) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [213] [ 70/156] eta: 0:01:06 lr: 0.001743 min_lr: 0.001743 loss: 3.3075 (3.1746) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [213] [ 80/156] eta: 0:00:56 lr: 0.001741 min_lr: 0.001741 loss: 3.2598 (3.1758) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [213] [ 90/156] eta: 0:00:48 lr: 0.001738 min_lr: 0.001738 loss: 3.2812 (3.1849) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [213] [100/156] eta: 0:00:40 lr: 0.001736 min_lr: 0.001736 loss: 3.3209 (3.1903) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [213] [110/156] eta: 0:00:32 lr: 0.001734 min_lr: 0.001734 loss: 3.2692 (3.1919) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [213] [120/156] eta: 0:00:25 lr: 0.001731 min_lr: 0.001731 loss: 3.1429 (3.1840) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [213] [130/156] eta: 0:00:17 lr: 0.001729 min_lr: 0.001729 loss: 3.0150 (3.1823) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0011 max mem: 55573 Epoch: [213] [140/156] eta: 0:00:10 lr: 0.001727 min_lr: 0.001727 loss: 3.0771 (3.1649) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0009 max mem: 55573 Epoch: [213] [150/156] eta: 0:00:04 lr: 0.001724 min_lr: 0.001724 loss: 3.0788 (3.1627) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [213] [155/156] eta: 0:00:00 lr: 0.001723 min_lr: 0.001723 loss: 3.1482 (3.1652) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [213] Total time: 0:01:45 (0.6764 s / it) Averaged stats: lr: 0.001723 min_lr: 0.001723 loss: 3.1482 (3.1216) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 0.9814 (0.9814) acc1: 80.8594 (80.8594) acc5: 96.6146 (96.6146) time: 7.2083 data: 6.9709 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0367 (1.0449) acc1: 79.8177 (77.0560) acc5: 95.9635 (94.6240) time: 1.5881 data: 1.3943 max mem: 55573 Test: Total time: 0:00:08 (1.6284 s / it) * Acc@1 78.454 Acc@5 94.612 loss 1.015 Accuracy of the model on the 50000 test images: 78.5% Max accuracy: 78.84% Test: [0/5] eta: 0:00:34 loss: 1.0611 (1.0611) acc1: 73.3073 (73.3073) acc5: 91.9271 (91.9271) time: 6.9508 data: 6.7147 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0893 (1.1048) acc1: 70.8333 (69.0560) acc5: 91.9271 (89.6960) time: 1.5354 data: 1.3430 max mem: 55573 Test: Total time: 0:00:07 (1.5543 s / it) * Acc@1 69.828 Acc@5 89.870 loss 1.127 Accuracy of the model EMA on 50000 test images: 69.8% Max EMA accuracy: 69.83% Epoch: [214] [ 0/156] eta: 0:38:02 lr: 0.001723 min_lr: 0.001723 loss: 3.5207 (3.5207) weight_decay: 0.0500 (0.0500) time: 14.6312 data: 9.5946 max mem: 55573 Epoch: [214] [ 10/156] eta: 0:04:33 lr: 0.001720 min_lr: 0.001720 loss: 3.2970 (3.1041) weight_decay: 0.0500 (0.0500) time: 1.8717 data: 0.8726 max mem: 55573 Epoch: [214] [ 20/156] eta: 0:02:51 lr: 0.001718 min_lr: 0.001718 loss: 3.1082 (3.0622) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0003 max mem: 55573 Epoch: [214] [ 30/156] eta: 0:02:11 lr: 0.001716 min_lr: 0.001716 loss: 2.8791 (2.9601) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [214] [ 40/156] eta: 0:01:48 lr: 0.001713 min_lr: 0.001713 loss: 3.0819 (2.9951) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [214] [ 50/156] eta: 0:01:31 lr: 0.001711 min_lr: 0.001711 loss: 3.1240 (2.9942) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [214] [ 60/156] eta: 0:01:18 lr: 0.001709 min_lr: 0.001709 loss: 3.0816 (3.0036) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [214] [ 70/156] eta: 0:01:07 lr: 0.001706 min_lr: 0.001706 loss: 3.1293 (3.0124) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [214] [ 80/156] eta: 0:00:58 lr: 0.001704 min_lr: 0.001704 loss: 3.1293 (3.0294) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [214] [ 90/156] eta: 0:00:49 lr: 0.001702 min_lr: 0.001702 loss: 3.2887 (3.0675) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [214] [100/156] eta: 0:00:40 lr: 0.001699 min_lr: 0.001699 loss: 3.3691 (3.0988) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [214] [110/156] eta: 0:00:32 lr: 0.001697 min_lr: 0.001697 loss: 3.3346 (3.0873) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [214] [120/156] eta: 0:00:25 lr: 0.001695 min_lr: 0.001695 loss: 3.3139 (3.1101) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [214] [130/156] eta: 0:00:18 lr: 0.001692 min_lr: 0.001692 loss: 3.3611 (3.1234) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0010 max mem: 55573 Epoch: [214] [140/156] eta: 0:00:11 lr: 0.001690 min_lr: 0.001690 loss: 3.3584 (3.1303) weight_decay: 0.0500 (0.0500) time: 0.5868 data: 0.0008 max mem: 55573 Epoch: [214] [150/156] eta: 0:00:04 lr: 0.001688 min_lr: 0.001688 loss: 3.3108 (3.1370) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [214] [155/156] eta: 0:00:00 lr: 0.001686 min_lr: 0.001686 loss: 3.3009 (3.1334) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [214] Total time: 0:01:47 (0.6862 s / it) Averaged stats: lr: 0.001686 min_lr: 0.001686 loss: 3.3009 (3.0996) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.9133 (0.9133) acc1: 81.1198 (81.1198) acc5: 96.3542 (96.3542) time: 7.0429 data: 6.8054 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9660 (0.9932) acc1: 79.6875 (77.4720) acc5: 96.2264 (94.5280) time: 1.5550 data: 1.3612 max mem: 55573 Test: Total time: 0:00:08 (1.6169 s / it) * Acc@1 78.810 Acc@5 94.690 loss 0.990 Accuracy of the model on the 50000 test images: 78.8% Max accuracy: 78.84% Test: [0/5] eta: 0:00:36 loss: 1.0587 (1.0587) acc1: 73.4375 (73.4375) acc5: 92.0573 (92.0573) time: 7.3359 data: 7.0998 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0868 (1.1025) acc1: 71.2240 (69.2160) acc5: 92.0573 (89.7280) time: 1.6126 data: 1.4201 max mem: 55573 Test: Total time: 0:00:08 (1.6332 s / it) * Acc@1 69.906 Acc@5 89.906 loss 1.125 Accuracy of the model EMA on 50000 test images: 69.9% Max EMA accuracy: 69.91% Epoch: [215] [ 0/156] eta: 0:30:23 lr: 0.001686 min_lr: 0.001686 loss: 3.4968 (3.4968) weight_decay: 0.0500 (0.0500) time: 11.6875 data: 9.7937 max mem: 55573 Epoch: [215] [ 10/156] eta: 0:04:16 lr: 0.001684 min_lr: 0.001684 loss: 3.4175 (3.1414) weight_decay: 0.0500 (0.0500) time: 1.7590 data: 0.8907 max mem: 55573 Epoch: [215] [ 20/156] eta: 0:02:43 lr: 0.001681 min_lr: 0.001681 loss: 3.3409 (3.2307) weight_decay: 0.0500 (0.0500) time: 0.6792 data: 0.0004 max mem: 55573 Epoch: [215] [ 30/156] eta: 0:02:06 lr: 0.001679 min_lr: 0.001679 loss: 3.3023 (3.1564) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [215] [ 40/156] eta: 0:01:44 lr: 0.001677 min_lr: 0.001677 loss: 3.1451 (3.1838) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [215] [ 50/156] eta: 0:01:29 lr: 0.001674 min_lr: 0.001674 loss: 3.2846 (3.1691) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [215] [ 60/156] eta: 0:01:16 lr: 0.001672 min_lr: 0.001672 loss: 3.2832 (3.1867) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [215] [ 70/156] eta: 0:01:06 lr: 0.001670 min_lr: 0.001670 loss: 3.3194 (3.1957) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [215] [ 80/156] eta: 0:00:56 lr: 0.001667 min_lr: 0.001667 loss: 3.2864 (3.1868) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [215] [ 90/156] eta: 0:00:48 lr: 0.001665 min_lr: 0.001665 loss: 3.4295 (3.2122) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [215] [100/156] eta: 0:00:40 lr: 0.001663 min_lr: 0.001663 loss: 3.3292 (3.1779) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [215] [110/156] eta: 0:00:32 lr: 0.001660 min_lr: 0.001660 loss: 2.7526 (3.1438) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [215] [120/156] eta: 0:00:25 lr: 0.001658 min_lr: 0.001658 loss: 3.1280 (3.1461) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [215] [130/156] eta: 0:00:17 lr: 0.001656 min_lr: 0.001656 loss: 3.0126 (3.1309) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0010 max mem: 55573 Epoch: [215] [140/156] eta: 0:00:10 lr: 0.001653 min_lr: 0.001653 loss: 3.1092 (3.1377) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0009 max mem: 55573 Epoch: [215] [150/156] eta: 0:00:04 lr: 0.001651 min_lr: 0.001651 loss: 3.1092 (3.1202) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [215] [155/156] eta: 0:00:00 lr: 0.001650 min_lr: 0.001650 loss: 3.0807 (3.1247) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [215] Total time: 0:01:45 (0.6784 s / it) Averaged stats: lr: 0.001650 min_lr: 0.001650 loss: 3.0807 (3.1125) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9190 (0.9190) acc1: 82.5521 (82.5521) acc5: 95.9635 (95.9635) time: 6.9906 data: 6.7533 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0183 (1.0213) acc1: 79.4271 (77.8240) acc5: 94.3396 (94.4640) time: 1.5445 data: 1.3507 max mem: 55573 Test: Total time: 0:00:07 (1.5843 s / it) * Acc@1 78.648 Acc@5 94.648 loss 1.006 Accuracy of the model on the 50000 test images: 78.6% Max accuracy: 78.84% Test: [0/5] eta: 0:00:35 loss: 1.0564 (1.0564) acc1: 73.4375 (73.4375) acc5: 92.1875 (92.1875) time: 7.0016 data: 6.7656 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0846 (1.1003) acc1: 71.2240 (69.2160) acc5: 92.1875 (89.7600) time: 1.5890 data: 1.3966 max mem: 55573 Test: Total time: 0:00:08 (1.6108 s / it) * Acc@1 69.992 Acc@5 89.942 loss 1.122 Accuracy of the model EMA on 50000 test images: 70.0% Max EMA accuracy: 69.99% Epoch: [216] [ 0/156] eta: 0:31:46 lr: 0.001650 min_lr: 0.001650 loss: 3.3019 (3.3019) weight_decay: 0.0500 (0.0500) time: 12.2226 data: 11.6346 max mem: 55573 Epoch: [216] [ 10/156] eta: 0:04:03 lr: 0.001647 min_lr: 0.001647 loss: 3.3019 (3.0478) weight_decay: 0.0500 (0.0500) time: 1.6683 data: 1.0581 max mem: 55573 Epoch: [216] [ 20/156] eta: 0:02:37 lr: 0.001645 min_lr: 0.001645 loss: 3.2251 (3.0778) weight_decay: 0.0500 (0.0500) time: 0.6049 data: 0.0004 max mem: 55573 Epoch: [216] [ 30/156] eta: 0:02:02 lr: 0.001643 min_lr: 0.001643 loss: 2.9045 (2.9681) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0004 max mem: 55573 Epoch: [216] [ 40/156] eta: 0:01:42 lr: 0.001640 min_lr: 0.001640 loss: 2.8963 (2.9995) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [216] [ 50/156] eta: 0:01:27 lr: 0.001638 min_lr: 0.001638 loss: 3.3549 (3.0728) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [216] [ 60/156] eta: 0:01:15 lr: 0.001636 min_lr: 0.001636 loss: 3.1543 (3.0433) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [216] [ 70/156] eta: 0:01:05 lr: 0.001633 min_lr: 0.001633 loss: 2.9654 (3.0307) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [216] [ 80/156] eta: 0:00:56 lr: 0.001631 min_lr: 0.001631 loss: 3.0545 (3.0375) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [216] [ 90/156] eta: 0:00:47 lr: 0.001629 min_lr: 0.001629 loss: 3.0796 (3.0214) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [216] [100/156] eta: 0:00:39 lr: 0.001626 min_lr: 0.001626 loss: 3.0796 (3.0181) weight_decay: 0.0500 (0.0500) time: 0.6030 data: 0.0004 max mem: 55573 Epoch: [216] [110/156] eta: 0:00:32 lr: 0.001624 min_lr: 0.001624 loss: 3.3750 (3.0569) weight_decay: 0.0500 (0.0500) time: 0.6025 data: 0.0004 max mem: 55573 Epoch: [216] [120/156] eta: 0:00:24 lr: 0.001622 min_lr: 0.001622 loss: 3.4004 (3.0658) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [216] [130/156] eta: 0:00:17 lr: 0.001620 min_lr: 0.001620 loss: 3.2722 (3.0725) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0010 max mem: 55573 Epoch: [216] [140/156] eta: 0:00:10 lr: 0.001617 min_lr: 0.001617 loss: 3.3209 (3.0755) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0009 max mem: 55573 Epoch: [216] [150/156] eta: 0:00:04 lr: 0.001615 min_lr: 0.001615 loss: 3.3481 (3.0819) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [216] [155/156] eta: 0:00:00 lr: 0.001614 min_lr: 0.001614 loss: 3.3474 (3.0902) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [216] Total time: 0:01:45 (0.6737 s / it) Averaged stats: lr: 0.001614 min_lr: 0.001614 loss: 3.3474 (3.0937) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 1.0484 (1.0484) acc1: 81.7708 (81.7708) acc5: 96.7448 (96.7448) time: 7.0718 data: 6.8343 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.1035 (1.1054) acc1: 79.4271 (78.0800) acc5: 96.2264 (95.1680) time: 1.5608 data: 1.3670 max mem: 55573 Test: Total time: 0:00:08 (1.6048 s / it) * Acc@1 79.098 Acc@5 94.884 loss 1.082 Accuracy of the model on the 50000 test images: 79.1% Max accuracy: 79.10% Test: [0/5] eta: 0:00:33 loss: 1.0542 (1.0542) acc1: 73.4375 (73.4375) acc5: 92.1875 (92.1875) time: 6.6600 data: 6.4238 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0825 (1.0982) acc1: 71.3542 (69.2800) acc5: 92.1875 (89.8240) time: 1.4774 data: 1.2849 max mem: 55573 Test: Total time: 0:00:07 (1.4976 s / it) * Acc@1 70.066 Acc@5 90.000 loss 1.120 Accuracy of the model EMA on 50000 test images: 70.1% Max EMA accuracy: 70.07% Epoch: [217] [ 0/156] eta: 0:31:28 lr: 0.001613 min_lr: 0.001613 loss: 2.9173 (2.9173) weight_decay: 0.0500 (0.0500) time: 12.1040 data: 9.1032 max mem: 55573 Epoch: [217] [ 10/156] eta: 0:04:09 lr: 0.001611 min_lr: 0.001611 loss: 3.3209 (3.2667) weight_decay: 0.0500 (0.0500) time: 1.7105 data: 0.9016 max mem: 55573 Epoch: [217] [ 20/156] eta: 0:02:40 lr: 0.001609 min_lr: 0.001609 loss: 3.3010 (3.2396) weight_decay: 0.0500 (0.0500) time: 0.6301 data: 0.0410 max mem: 55573 Epoch: [217] [ 30/156] eta: 0:02:04 lr: 0.001607 min_lr: 0.001607 loss: 3.2626 (3.2161) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [217] [ 40/156] eta: 0:01:43 lr: 0.001604 min_lr: 0.001604 loss: 3.1680 (3.1901) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [217] [ 50/156] eta: 0:01:28 lr: 0.001602 min_lr: 0.001602 loss: 3.2731 (3.1772) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0007 max mem: 55573 Epoch: [217] [ 60/156] eta: 0:01:16 lr: 0.001600 min_lr: 0.001600 loss: 3.1267 (3.1162) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0007 max mem: 55573 Epoch: [217] [ 70/156] eta: 0:01:05 lr: 0.001597 min_lr: 0.001597 loss: 2.8833 (3.0758) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [217] [ 80/156] eta: 0:00:56 lr: 0.001595 min_lr: 0.001595 loss: 3.0442 (3.0974) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [217] [ 90/156] eta: 0:00:47 lr: 0.001593 min_lr: 0.001593 loss: 3.2931 (3.0884) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [217] [100/156] eta: 0:00:39 lr: 0.001590 min_lr: 0.001590 loss: 2.7470 (3.0515) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [217] [110/156] eta: 0:00:32 lr: 0.001588 min_lr: 0.001588 loss: 2.7427 (3.0537) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [217] [120/156] eta: 0:00:24 lr: 0.001586 min_lr: 0.001586 loss: 3.0582 (3.0505) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [217] [130/156] eta: 0:00:17 lr: 0.001584 min_lr: 0.001584 loss: 3.1775 (3.0656) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0015 max mem: 55573 Epoch: [217] [140/156] eta: 0:00:10 lr: 0.001581 min_lr: 0.001581 loss: 3.2510 (3.0581) weight_decay: 0.0500 (0.0500) time: 0.5874 data: 0.0014 max mem: 55573 Epoch: [217] [150/156] eta: 0:00:04 lr: 0.001579 min_lr: 0.001579 loss: 3.2061 (3.0598) weight_decay: 0.0500 (0.0500) time: 0.5834 data: 0.0002 max mem: 55573 Epoch: [217] [155/156] eta: 0:00:00 lr: 0.001578 min_lr: 0.001578 loss: 3.2061 (3.0627) weight_decay: 0.0500 (0.0500) time: 0.5833 data: 0.0001 max mem: 55573 Epoch: [217] Total time: 0:01:45 (0.6750 s / it) Averaged stats: lr: 0.001578 min_lr: 0.001578 loss: 3.2061 (3.1009) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8942 (0.8942) acc1: 81.7708 (81.7708) acc5: 96.3542 (96.3542) time: 6.7488 data: 6.5114 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9953 (1.0009) acc1: 80.8594 (78.1120) acc5: 95.8333 (94.4640) time: 1.4953 data: 1.3024 max mem: 55573 Test: Total time: 0:00:07 (1.5144 s / it) * Acc@1 78.890 Acc@5 94.752 loss 0.986 Accuracy of the model on the 50000 test images: 78.9% Max accuracy: 79.10% Test: [0/5] eta: 0:00:34 loss: 1.0521 (1.0521) acc1: 73.4375 (73.4375) acc5: 92.1875 (92.1875) time: 6.8657 data: 6.6295 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0804 (1.0961) acc1: 71.3542 (69.3440) acc5: 92.1875 (89.8560) time: 1.5183 data: 1.3260 max mem: 55573 Test: Total time: 0:00:07 (1.5358 s / it) * Acc@1 70.124 Acc@5 90.054 loss 1.118 Accuracy of the model EMA on 50000 test images: 70.1% Max EMA accuracy: 70.12% Epoch: [218] [ 0/156] eta: 0:36:41 lr: 0.001578 min_lr: 0.001578 loss: 2.4926 (2.4926) weight_decay: 0.0500 (0.0500) time: 14.1109 data: 9.5639 max mem: 55573 Epoch: [218] [ 10/156] eta: 0:04:25 lr: 0.001575 min_lr: 0.001575 loss: 3.2472 (3.0367) weight_decay: 0.0500 (0.0500) time: 1.8205 data: 0.8699 max mem: 55573 Epoch: [218] [ 20/156] eta: 0:02:47 lr: 0.001573 min_lr: 0.001573 loss: 3.1608 (3.0535) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [218] [ 30/156] eta: 0:02:09 lr: 0.001571 min_lr: 0.001571 loss: 3.1470 (3.0806) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [218] [ 40/156] eta: 0:01:46 lr: 0.001568 min_lr: 0.001568 loss: 3.2326 (3.0822) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0005 max mem: 55573 Epoch: [218] [ 50/156] eta: 0:01:30 lr: 0.001566 min_lr: 0.001566 loss: 3.1881 (3.0642) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [218] [ 60/156] eta: 0:01:17 lr: 0.001564 min_lr: 0.001564 loss: 3.1698 (3.0881) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [218] [ 70/156] eta: 0:01:07 lr: 0.001562 min_lr: 0.001562 loss: 3.2108 (3.0966) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [218] [ 80/156] eta: 0:00:57 lr: 0.001559 min_lr: 0.001559 loss: 2.9678 (3.0782) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [218] [ 90/156] eta: 0:00:48 lr: 0.001557 min_lr: 0.001557 loss: 3.1357 (3.0962) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0005 max mem: 55573 Epoch: [218] [100/156] eta: 0:00:40 lr: 0.001555 min_lr: 0.001555 loss: 3.0928 (3.0710) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0007 max mem: 55573 Epoch: [218] [110/156] eta: 0:00:32 lr: 0.001553 min_lr: 0.001553 loss: 3.0385 (3.0724) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0007 max mem: 55573 Epoch: [218] [120/156] eta: 0:00:25 lr: 0.001550 min_lr: 0.001550 loss: 3.1632 (3.0741) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [218] [130/156] eta: 0:00:18 lr: 0.001548 min_lr: 0.001548 loss: 3.2455 (3.0906) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0017 max mem: 55573 Epoch: [218] [140/156] eta: 0:00:10 lr: 0.001546 min_lr: 0.001546 loss: 3.1546 (3.0755) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0016 max mem: 55573 Epoch: [218] [150/156] eta: 0:00:04 lr: 0.001543 min_lr: 0.001543 loss: 3.0195 (3.0836) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [218] [155/156] eta: 0:00:00 lr: 0.001542 min_lr: 0.001542 loss: 3.0396 (3.0868) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [218] Total time: 0:01:46 (0.6828 s / it) Averaged stats: lr: 0.001542 min_lr: 0.001542 loss: 3.0396 (3.0910) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.9451 (0.9451) acc1: 83.0729 (83.0729) acc5: 96.7448 (96.7448) time: 6.7343 data: 6.4972 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0424 (1.0551) acc1: 79.5573 (78.2080) acc5: 96.2264 (94.7520) time: 1.4927 data: 1.2996 max mem: 55573 Test: Total time: 0:00:07 (1.5245 s / it) * Acc@1 79.088 Acc@5 94.710 loss 1.049 Accuracy of the model on the 50000 test images: 79.1% Max accuracy: 79.10% Test: [0/5] eta: 0:00:36 loss: 1.0501 (1.0501) acc1: 73.5677 (73.5677) acc5: 92.1875 (92.1875) time: 7.3706 data: 7.1345 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0784 (1.0942) acc1: 71.3542 (69.3760) acc5: 92.1875 (89.8880) time: 1.6195 data: 1.4270 max mem: 55573 Test: Total time: 0:00:08 (1.6351 s / it) * Acc@1 70.186 Acc@5 90.096 loss 1.116 Accuracy of the model EMA on 50000 test images: 70.2% Max EMA accuracy: 70.19% Epoch: [219] [ 0/156] eta: 0:31:28 lr: 0.001542 min_lr: 0.001542 loss: 3.4998 (3.4998) weight_decay: 0.0500 (0.0500) time: 12.1065 data: 8.2062 max mem: 55573 Epoch: [219] [ 10/156] eta: 0:04:16 lr: 0.001540 min_lr: 0.001540 loss: 2.9813 (2.9555) weight_decay: 0.0500 (0.0500) time: 1.7572 data: 0.8639 max mem: 55573 Epoch: [219] [ 20/156] eta: 0:02:43 lr: 0.001538 min_lr: 0.001538 loss: 3.1987 (3.0181) weight_decay: 0.0500 (0.0500) time: 0.6561 data: 0.0650 max mem: 55573 Epoch: [219] [ 30/156] eta: 0:02:06 lr: 0.001535 min_lr: 0.001535 loss: 3.1987 (3.0151) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [219] [ 40/156] eta: 0:01:44 lr: 0.001533 min_lr: 0.001533 loss: 3.0810 (2.9436) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [219] [ 50/156] eta: 0:01:29 lr: 0.001531 min_lr: 0.001531 loss: 2.7775 (2.9590) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [219] [ 60/156] eta: 0:01:16 lr: 0.001529 min_lr: 0.001529 loss: 2.7869 (2.9537) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [219] [ 70/156] eta: 0:01:06 lr: 0.001526 min_lr: 0.001526 loss: 2.9095 (2.9490) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [219] [ 80/156] eta: 0:00:56 lr: 0.001524 min_lr: 0.001524 loss: 3.3059 (2.9711) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [219] [ 90/156] eta: 0:00:48 lr: 0.001522 min_lr: 0.001522 loss: 3.2101 (2.9744) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [219] [100/156] eta: 0:00:40 lr: 0.001519 min_lr: 0.001519 loss: 3.1955 (2.9901) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [219] [110/156] eta: 0:00:32 lr: 0.001517 min_lr: 0.001517 loss: 3.2248 (3.0062) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [219] [120/156] eta: 0:00:25 lr: 0.001515 min_lr: 0.001515 loss: 3.2248 (3.0185) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [219] [130/156] eta: 0:00:17 lr: 0.001513 min_lr: 0.001513 loss: 3.1717 (3.0236) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0011 max mem: 55573 Epoch: [219] [140/156] eta: 0:00:10 lr: 0.001510 min_lr: 0.001510 loss: 3.1013 (3.0370) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0010 max mem: 55573 Epoch: [219] [150/156] eta: 0:00:04 lr: 0.001508 min_lr: 0.001508 loss: 3.1013 (3.0368) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [219] [155/156] eta: 0:00:00 lr: 0.001507 min_lr: 0.001507 loss: 3.3211 (3.0485) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [219] Total time: 0:01:45 (0.6783 s / it) Averaged stats: lr: 0.001507 min_lr: 0.001507 loss: 3.3211 (3.0949) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.9831 (0.9831) acc1: 81.9010 (81.9010) acc5: 96.7448 (96.7448) time: 7.1023 data: 6.8649 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0688 (1.0889) acc1: 79.8177 (77.9840) acc5: 95.7031 (94.6880) time: 1.5666 data: 1.3731 max mem: 55573 Test: Total time: 0:00:08 (1.6180 s / it) * Acc@1 78.978 Acc@5 94.882 loss 1.068 Accuracy of the model on the 50000 test images: 79.0% Max accuracy: 79.10% Test: [0/5] eta: 0:00:35 loss: 1.0484 (1.0484) acc1: 73.5677 (73.5677) acc5: 92.1875 (92.1875) time: 7.1107 data: 6.8745 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0767 (1.0926) acc1: 71.3542 (69.3760) acc5: 92.1875 (89.9200) time: 1.5675 data: 1.3750 max mem: 55573 Test: Total time: 0:00:07 (1.5904 s / it) * Acc@1 70.276 Acc@5 90.134 loss 1.114 Accuracy of the model EMA on 50000 test images: 70.3% Max EMA accuracy: 70.28% Epoch: [220] [ 0/156] eta: 0:35:38 lr: 0.001507 min_lr: 0.001507 loss: 3.3132 (3.3132) weight_decay: 0.0500 (0.0500) time: 13.7105 data: 13.0661 max mem: 55573 Epoch: [220] [ 10/156] eta: 0:04:20 lr: 0.001505 min_lr: 0.001505 loss: 3.3232 (3.2094) weight_decay: 0.0500 (0.0500) time: 1.7850 data: 1.1881 max mem: 55573 Epoch: [220] [ 20/156] eta: 0:02:45 lr: 0.001502 min_lr: 0.001502 loss: 3.2477 (3.0088) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [220] [ 30/156] eta: 0:02:07 lr: 0.001500 min_lr: 0.001500 loss: 2.8550 (3.0047) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [220] [ 40/156] eta: 0:01:45 lr: 0.001498 min_lr: 0.001498 loss: 3.2623 (3.0668) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [220] [ 50/156] eta: 0:01:30 lr: 0.001496 min_lr: 0.001496 loss: 3.3678 (3.1127) weight_decay: 0.0500 (0.0500) time: 0.5937 data: 0.0004 max mem: 55573 Epoch: [220] [ 60/156] eta: 0:01:17 lr: 0.001493 min_lr: 0.001493 loss: 3.2868 (3.1285) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0005 max mem: 55573 Epoch: [220] [ 70/156] eta: 0:01:06 lr: 0.001491 min_lr: 0.001491 loss: 3.1371 (3.1170) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [220] [ 80/156] eta: 0:00:57 lr: 0.001489 min_lr: 0.001489 loss: 3.0428 (3.1251) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [220] [ 90/156] eta: 0:00:48 lr: 0.001487 min_lr: 0.001487 loss: 3.2204 (3.1360) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [220] [100/156] eta: 0:00:40 lr: 0.001484 min_lr: 0.001484 loss: 3.1594 (3.1136) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [220] [110/156] eta: 0:00:32 lr: 0.001482 min_lr: 0.001482 loss: 2.9661 (3.1010) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [220] [120/156] eta: 0:00:25 lr: 0.001480 min_lr: 0.001480 loss: 3.0293 (3.1001) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [220] [130/156] eta: 0:00:17 lr: 0.001478 min_lr: 0.001478 loss: 3.2821 (3.1044) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0011 max mem: 55573 Epoch: [220] [140/156] eta: 0:00:10 lr: 0.001475 min_lr: 0.001475 loss: 3.0675 (3.0942) weight_decay: 0.0500 (0.0500) time: 0.5853 data: 0.0009 max mem: 55573 Epoch: [220] [150/156] eta: 0:00:04 lr: 0.001473 min_lr: 0.001473 loss: 2.9366 (3.0823) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [220] [155/156] eta: 0:00:00 lr: 0.001472 min_lr: 0.001472 loss: 3.0675 (3.0907) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [220] Total time: 0:01:46 (0.6808 s / it) Averaged stats: lr: 0.001472 min_lr: 0.001472 loss: 3.0675 (3.0775) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8588 (0.8588) acc1: 81.9010 (81.9010) acc5: 97.0052 (97.0052) time: 6.7872 data: 6.5502 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9503 (0.9631) acc1: 79.2969 (78.2080) acc5: 95.9635 (94.9760) time: 1.5040 data: 1.3101 max mem: 55573 Test: Total time: 0:00:07 (1.5509 s / it) * Acc@1 79.362 Acc@5 94.986 loss 0.950 Accuracy of the model on the 50000 test images: 79.4% Max accuracy: 79.36% Test: [0/5] eta: 0:00:34 loss: 1.0465 (1.0465) acc1: 73.5677 (73.5677) acc5: 92.1875 (92.1875) time: 6.8669 data: 6.6308 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0747 (1.0908) acc1: 71.3542 (69.3440) acc5: 92.1875 (89.9200) time: 1.5187 data: 1.3263 max mem: 55573 Test: Total time: 0:00:07 (1.5362 s / it) * Acc@1 70.312 Acc@5 90.154 loss 1.113 Accuracy of the model EMA on 50000 test images: 70.3% Max EMA accuracy: 70.31% Epoch: [221] [ 0/156] eta: 0:32:15 lr: 0.001472 min_lr: 0.001472 loss: 3.0050 (3.0050) weight_decay: 0.0500 (0.0500) time: 12.4081 data: 10.9137 max mem: 55573 Epoch: [221] [ 10/156] eta: 0:04:05 lr: 0.001470 min_lr: 0.001470 loss: 3.3590 (3.2396) weight_decay: 0.0500 (0.0500) time: 1.6841 data: 0.9926 max mem: 55573 Epoch: [221] [ 20/156] eta: 0:02:38 lr: 0.001467 min_lr: 0.001467 loss: 3.2841 (3.1706) weight_decay: 0.0500 (0.0500) time: 0.6014 data: 0.0004 max mem: 55573 Epoch: [221] [ 30/156] eta: 0:02:03 lr: 0.001465 min_lr: 0.001465 loss: 3.2090 (3.1463) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [221] [ 40/156] eta: 0:01:42 lr: 0.001463 min_lr: 0.001463 loss: 3.1260 (3.1055) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [221] [ 50/156] eta: 0:01:27 lr: 0.001461 min_lr: 0.001461 loss: 3.0490 (3.0919) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [221] [ 60/156] eta: 0:01:15 lr: 0.001459 min_lr: 0.001459 loss: 3.0490 (3.0787) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [221] [ 70/156] eta: 0:01:05 lr: 0.001456 min_lr: 0.001456 loss: 2.9179 (3.0591) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [221] [ 80/156] eta: 0:00:56 lr: 0.001454 min_lr: 0.001454 loss: 3.2821 (3.0723) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [221] [ 90/156] eta: 0:00:47 lr: 0.001452 min_lr: 0.001452 loss: 3.2060 (3.0731) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [221] [100/156] eta: 0:00:39 lr: 0.001450 min_lr: 0.001450 loss: 3.1849 (3.0763) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [221] [110/156] eta: 0:00:32 lr: 0.001447 min_lr: 0.001447 loss: 3.1874 (3.0836) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [221] [120/156] eta: 0:00:24 lr: 0.001445 min_lr: 0.001445 loss: 3.1119 (3.0664) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [221] [130/156] eta: 0:00:17 lr: 0.001443 min_lr: 0.001443 loss: 3.0463 (3.0730) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0011 max mem: 55573 Epoch: [221] [140/156] eta: 0:00:10 lr: 0.001441 min_lr: 0.001441 loss: 3.0765 (3.0510) weight_decay: 0.0500 (0.0500) time: 0.5849 data: 0.0009 max mem: 55573 Epoch: [221] [150/156] eta: 0:00:04 lr: 0.001439 min_lr: 0.001439 loss: 3.0765 (3.0417) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [221] [155/156] eta: 0:00:00 lr: 0.001438 min_lr: 0.001438 loss: 3.2079 (3.0461) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [221] Total time: 0:01:44 (0.6728 s / it) Averaged stats: lr: 0.001438 min_lr: 0.001438 loss: 3.2079 (3.0881) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8578 (0.8578) acc1: 82.2917 (82.2917) acc5: 96.3542 (96.3542) time: 6.9172 data: 6.6799 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9290 (0.9376) acc1: 81.2500 (78.4640) acc5: 96.3542 (95.0400) time: 1.5300 data: 1.3361 max mem: 55573 Test: Total time: 0:00:07 (1.5730 s / it) * Acc@1 79.484 Acc@5 95.018 loss 0.919 Accuracy of the model on the 50000 test images: 79.5% Max accuracy: 79.48% Test: [0/5] eta: 0:00:34 loss: 1.0451 (1.0451) acc1: 73.5677 (73.5677) acc5: 92.0573 (92.0573) time: 6.9333 data: 6.6972 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0733 (1.0894) acc1: 71.3542 (69.3440) acc5: 92.0573 (89.9200) time: 1.5320 data: 1.3395 max mem: 55573 Test: Total time: 0:00:07 (1.5491 s / it) * Acc@1 70.354 Acc@5 90.172 loss 1.111 Accuracy of the model EMA on 50000 test images: 70.4% Max EMA accuracy: 70.35% Epoch: [222] [ 0/156] eta: 0:29:38 lr: 0.001437 min_lr: 0.001437 loss: 3.4519 (3.4519) weight_decay: 0.0500 (0.0500) time: 11.4030 data: 10.8178 max mem: 55573 Epoch: [222] [ 10/156] eta: 0:04:20 lr: 0.001435 min_lr: 0.001435 loss: 2.8442 (2.9033) weight_decay: 0.0500 (0.0500) time: 1.7815 data: 1.1869 max mem: 55573 Epoch: [222] [ 20/156] eta: 0:02:45 lr: 0.001433 min_lr: 0.001433 loss: 2.8442 (2.9453) weight_decay: 0.0500 (0.0500) time: 0.7044 data: 0.1121 max mem: 55573 Epoch: [222] [ 30/156] eta: 0:02:07 lr: 0.001431 min_lr: 0.001431 loss: 3.0235 (2.9628) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [222] [ 40/156] eta: 0:01:45 lr: 0.001428 min_lr: 0.001428 loss: 3.1882 (3.0432) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [222] [ 50/156] eta: 0:01:29 lr: 0.001426 min_lr: 0.001426 loss: 3.2110 (3.0052) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [222] [ 60/156] eta: 0:01:17 lr: 0.001424 min_lr: 0.001424 loss: 3.2533 (3.0781) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [222] [ 70/156] eta: 0:01:06 lr: 0.001422 min_lr: 0.001422 loss: 3.3662 (3.1116) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [222] [ 80/156] eta: 0:00:57 lr: 0.001420 min_lr: 0.001420 loss: 3.3522 (3.1463) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [222] [ 90/156] eta: 0:00:48 lr: 0.001417 min_lr: 0.001417 loss: 3.2783 (3.1372) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [222] [100/156] eta: 0:00:40 lr: 0.001415 min_lr: 0.001415 loss: 3.1423 (3.1371) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [222] [110/156] eta: 0:00:32 lr: 0.001413 min_lr: 0.001413 loss: 2.9690 (3.1148) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [222] [120/156] eta: 0:00:25 lr: 0.001411 min_lr: 0.001411 loss: 3.0090 (3.1178) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [222] [130/156] eta: 0:00:17 lr: 0.001409 min_lr: 0.001409 loss: 3.2359 (3.1222) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0012 max mem: 55573 Epoch: [222] [140/156] eta: 0:00:10 lr: 0.001407 min_lr: 0.001407 loss: 3.3590 (3.1319) weight_decay: 0.0500 (0.0500) time: 0.5864 data: 0.0011 max mem: 55573 Epoch: [222] [150/156] eta: 0:00:04 lr: 0.001404 min_lr: 0.001404 loss: 3.3879 (3.1378) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [222] [155/156] eta: 0:00:00 lr: 0.001403 min_lr: 0.001403 loss: 3.1612 (3.1244) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [222] Total time: 0:01:46 (0.6804 s / it) Averaged stats: lr: 0.001403 min_lr: 0.001403 loss: 3.1612 (3.0765) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.9204 (0.9204) acc1: 83.4635 (83.4635) acc5: 96.6146 (96.6146) time: 6.7965 data: 6.5584 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0008 (0.9924) acc1: 80.8594 (78.8160) acc5: 96.3542 (95.0400) time: 1.5057 data: 1.3118 max mem: 55573 Test: Total time: 0:00:07 (1.5521 s / it) * Acc@1 79.414 Acc@5 94.944 loss 0.991 Accuracy of the model on the 50000 test images: 79.4% Max accuracy: 79.48% Test: [0/5] eta: 0:00:34 loss: 1.0431 (1.0431) acc1: 73.8281 (73.8281) acc5: 92.1875 (92.1875) time: 6.9747 data: 6.7386 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0714 (1.0877) acc1: 71.3542 (69.5040) acc5: 92.1875 (89.9200) time: 1.5406 data: 1.3478 max mem: 55573 Test: Total time: 0:00:07 (1.5630 s / it) * Acc@1 70.426 Acc@5 90.202 loss 1.109 Accuracy of the model EMA on 50000 test images: 70.4% Max EMA accuracy: 70.43% Epoch: [223] [ 0/156] eta: 0:36:54 lr: 0.001403 min_lr: 0.001403 loss: 3.1219 (3.1219) weight_decay: 0.0500 (0.0500) time: 14.1944 data: 7.4531 max mem: 55573 Epoch: [223] [ 10/156] eta: 0:04:27 lr: 0.001401 min_lr: 0.001401 loss: 3.1041 (2.9581) weight_decay: 0.0500 (0.0500) time: 1.8297 data: 0.6782 max mem: 55573 Epoch: [223] [ 20/156] eta: 0:02:48 lr: 0.001399 min_lr: 0.001399 loss: 3.2144 (3.0248) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0006 max mem: 55573 Epoch: [223] [ 30/156] eta: 0:02:09 lr: 0.001396 min_lr: 0.001396 loss: 3.2817 (3.0663) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [223] [ 40/156] eta: 0:01:47 lr: 0.001394 min_lr: 0.001394 loss: 3.0579 (3.0489) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [223] [ 50/156] eta: 0:01:30 lr: 0.001392 min_lr: 0.001392 loss: 3.0579 (3.0716) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [223] [ 60/156] eta: 0:01:18 lr: 0.001390 min_lr: 0.001390 loss: 3.2482 (3.0608) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [223] [ 70/156] eta: 0:01:07 lr: 0.001388 min_lr: 0.001388 loss: 3.1407 (3.0718) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [223] [ 80/156] eta: 0:00:57 lr: 0.001386 min_lr: 0.001386 loss: 3.2461 (3.0884) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [223] [ 90/156] eta: 0:00:48 lr: 0.001383 min_lr: 0.001383 loss: 3.3058 (3.1108) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [223] [100/156] eta: 0:00:40 lr: 0.001381 min_lr: 0.001381 loss: 3.2627 (3.0787) weight_decay: 0.0500 (0.0500) time: 0.5882 data: 0.0004 max mem: 55573 Epoch: [223] [110/156] eta: 0:00:32 lr: 0.001379 min_lr: 0.001379 loss: 2.9406 (3.0698) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [223] [120/156] eta: 0:00:25 lr: 0.001377 min_lr: 0.001377 loss: 3.1233 (3.0809) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [223] [130/156] eta: 0:00:18 lr: 0.001375 min_lr: 0.001375 loss: 3.2467 (3.0865) weight_decay: 0.0500 (0.0500) time: 0.5879 data: 0.0011 max mem: 55573 Epoch: [223] [140/156] eta: 0:00:10 lr: 0.001373 min_lr: 0.001373 loss: 3.0770 (3.0908) weight_decay: 0.0500 (0.0500) time: 0.5849 data: 0.0009 max mem: 55573 Epoch: [223] [150/156] eta: 0:00:04 lr: 0.001370 min_lr: 0.001370 loss: 3.1268 (3.0883) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [223] [155/156] eta: 0:00:00 lr: 0.001369 min_lr: 0.001369 loss: 3.0422 (3.0906) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [223] Total time: 0:01:46 (0.6834 s / it) Averaged stats: lr: 0.001369 min_lr: 0.001369 loss: 3.0422 (3.0706) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9181 (0.9181) acc1: 82.5521 (82.5521) acc5: 96.3542 (96.3542) time: 6.9728 data: 6.7353 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9862 (0.9869) acc1: 80.0781 (78.4320) acc5: 96.3542 (95.1360) time: 1.5411 data: 1.3471 max mem: 55573 Test: Total time: 0:00:07 (1.5791 s / it) * Acc@1 79.408 Acc@5 94.888 loss 0.982 Accuracy of the model on the 50000 test images: 79.4% Max accuracy: 79.48% Test: [0/5] eta: 0:00:35 loss: 1.0416 (1.0416) acc1: 73.6979 (73.6979) acc5: 92.1875 (92.1875) time: 7.0293 data: 6.7932 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0700 (1.0863) acc1: 71.4844 (69.4720) acc5: 92.1875 (90.0160) time: 1.5512 data: 1.3588 max mem: 55573 Test: Total time: 0:00:07 (1.5708 s / it) * Acc@1 70.462 Acc@5 90.234 loss 1.107 Accuracy of the model EMA on 50000 test images: 70.5% Max EMA accuracy: 70.46% Epoch: [224] [ 0/156] eta: 0:37:03 lr: 0.001369 min_lr: 0.001369 loss: 3.2471 (3.2471) weight_decay: 0.0500 (0.0500) time: 14.2553 data: 13.6460 max mem: 55573 Epoch: [224] [ 10/156] eta: 0:04:27 lr: 0.001367 min_lr: 0.001367 loss: 3.3013 (3.2853) weight_decay: 0.0500 (0.0500) time: 1.8322 data: 1.2409 max mem: 55573 Epoch: [224] [ 20/156] eta: 0:02:48 lr: 0.001365 min_lr: 0.001365 loss: 3.2880 (3.2296) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0003 max mem: 55573 Epoch: [224] [ 30/156] eta: 0:02:09 lr: 0.001363 min_lr: 0.001363 loss: 3.2424 (3.1973) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [224] [ 40/156] eta: 0:01:47 lr: 0.001360 min_lr: 0.001360 loss: 2.9306 (3.0832) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [224] [ 50/156] eta: 0:01:30 lr: 0.001358 min_lr: 0.001358 loss: 2.7687 (3.0651) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [224] [ 60/156] eta: 0:01:18 lr: 0.001356 min_lr: 0.001356 loss: 3.1587 (3.0831) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [224] [ 70/156] eta: 0:01:07 lr: 0.001354 min_lr: 0.001354 loss: 3.1459 (3.0551) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [224] [ 80/156] eta: 0:00:57 lr: 0.001352 min_lr: 0.001352 loss: 2.7833 (3.0464) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [224] [ 90/156] eta: 0:00:48 lr: 0.001350 min_lr: 0.001350 loss: 3.0762 (3.0444) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [224] [100/156] eta: 0:00:40 lr: 0.001347 min_lr: 0.001347 loss: 3.1884 (3.0404) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [224] [110/156] eta: 0:00:32 lr: 0.001345 min_lr: 0.001345 loss: 3.1281 (3.0455) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [224] [120/156] eta: 0:00:25 lr: 0.001343 min_lr: 0.001343 loss: 3.0520 (3.0404) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [224] [130/156] eta: 0:00:18 lr: 0.001341 min_lr: 0.001341 loss: 3.1804 (3.0539) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0010 max mem: 55573 Epoch: [224] [140/156] eta: 0:00:10 lr: 0.001339 min_lr: 0.001339 loss: 3.0952 (3.0371) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0008 max mem: 55573 Epoch: [224] [150/156] eta: 0:00:04 lr: 0.001337 min_lr: 0.001337 loss: 3.0372 (3.0520) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [224] [155/156] eta: 0:00:00 lr: 0.001336 min_lr: 0.001336 loss: 3.2438 (3.0449) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [224] Total time: 0:01:46 (0.6839 s / it) Averaged stats: lr: 0.001336 min_lr: 0.001336 loss: 3.2438 (3.0661) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.9301 (0.9301) acc1: 82.6823 (82.6823) acc5: 96.0938 (96.0938) time: 7.1031 data: 6.8657 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9707 (0.9777) acc1: 81.2500 (78.8480) acc5: 96.0938 (94.9120) time: 1.5672 data: 1.3732 max mem: 55573 Test: Total time: 0:00:08 (1.6143 s / it) * Acc@1 79.518 Acc@5 94.962 loss 0.966 Accuracy of the model on the 50000 test images: 79.5% Max accuracy: 79.52% Test: [0/5] eta: 0:00:34 loss: 1.0401 (1.0401) acc1: 73.8281 (73.8281) acc5: 92.1875 (92.1875) time: 6.8706 data: 6.6343 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0685 (1.0850) acc1: 71.3542 (69.4720) acc5: 92.1875 (90.0800) time: 1.5195 data: 1.3270 max mem: 55573 Test: Total time: 0:00:07 (1.5392 s / it) * Acc@1 70.498 Acc@5 90.268 loss 1.106 Accuracy of the model EMA on 50000 test images: 70.5% Max EMA accuracy: 70.50% Epoch: [225] [ 0/156] eta: 0:35:30 lr: 0.001335 min_lr: 0.001335 loss: 3.3241 (3.3241) weight_decay: 0.0500 (0.0500) time: 13.6564 data: 7.5853 max mem: 55573 Epoch: [225] [ 10/156] eta: 0:04:20 lr: 0.001333 min_lr: 0.001333 loss: 3.1505 (2.9794) weight_decay: 0.0500 (0.0500) time: 1.7873 data: 0.6900 max mem: 55573 Epoch: [225] [ 20/156] eta: 0:02:45 lr: 0.001331 min_lr: 0.001331 loss: 3.1901 (3.1242) weight_decay: 0.0500 (0.0500) time: 0.5973 data: 0.0005 max mem: 55573 Epoch: [225] [ 30/156] eta: 0:02:08 lr: 0.001329 min_lr: 0.001329 loss: 3.3561 (3.1914) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [225] [ 40/156] eta: 0:01:45 lr: 0.001327 min_lr: 0.001327 loss: 3.3561 (3.2060) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [225] [ 50/156] eta: 0:01:30 lr: 0.001325 min_lr: 0.001325 loss: 3.2305 (3.1625) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [225] [ 60/156] eta: 0:01:17 lr: 0.001323 min_lr: 0.001323 loss: 3.2365 (3.1558) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [225] [ 70/156] eta: 0:01:06 lr: 0.001320 min_lr: 0.001320 loss: 3.3337 (3.1561) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [225] [ 80/156] eta: 0:00:57 lr: 0.001318 min_lr: 0.001318 loss: 3.3068 (3.1695) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [225] [ 90/156] eta: 0:00:48 lr: 0.001316 min_lr: 0.001316 loss: 3.3269 (3.1836) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [225] [100/156] eta: 0:00:40 lr: 0.001314 min_lr: 0.001314 loss: 3.3269 (3.1910) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [225] [110/156] eta: 0:00:32 lr: 0.001312 min_lr: 0.001312 loss: 3.2622 (3.1901) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [225] [120/156] eta: 0:00:25 lr: 0.001310 min_lr: 0.001310 loss: 3.2622 (3.1814) weight_decay: 0.0500 (0.0500) time: 0.5882 data: 0.0005 max mem: 55573 Epoch: [225] [130/156] eta: 0:00:17 lr: 0.001308 min_lr: 0.001308 loss: 3.1079 (3.1802) weight_decay: 0.0500 (0.0500) time: 0.5878 data: 0.0010 max mem: 55573 Epoch: [225] [140/156] eta: 0:00:10 lr: 0.001306 min_lr: 0.001306 loss: 3.0948 (3.1692) weight_decay: 0.0500 (0.0500) time: 0.5845 data: 0.0009 max mem: 55573 Epoch: [225] [150/156] eta: 0:00:04 lr: 0.001303 min_lr: 0.001303 loss: 3.3313 (3.1857) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [225] [155/156] eta: 0:00:00 lr: 0.001302 min_lr: 0.001302 loss: 3.3361 (3.1848) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [225] Total time: 0:01:46 (0.6804 s / it) Averaged stats: lr: 0.001302 min_lr: 0.001302 loss: 3.3361 (3.0759) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9752 (0.9752) acc1: 82.2917 (82.2917) acc5: 96.7448 (96.7448) time: 6.9131 data: 6.6761 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9865 (1.0339) acc1: 81.5104 (78.9120) acc5: 96.2264 (95.1680) time: 1.5289 data: 1.3353 max mem: 55573 Test: Total time: 0:00:07 (1.5752 s / it) * Acc@1 79.286 Acc@5 94.990 loss 1.014 Accuracy of the model on the 50000 test images: 79.3% Max accuracy: 79.52% Test: [0/5] eta: 0:00:35 loss: 1.0388 (1.0388) acc1: 73.8281 (73.8281) acc5: 92.0573 (92.0573) time: 7.1785 data: 6.9424 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0670 (1.0837) acc1: 71.6146 (69.4720) acc5: 92.0573 (90.0480) time: 1.5810 data: 1.3886 max mem: 55573 Test: Total time: 0:00:08 (1.6007 s / it) * Acc@1 70.554 Acc@5 90.284 loss 1.105 Accuracy of the model EMA on 50000 test images: 70.6% Max EMA accuracy: 70.55% Epoch: [226] [ 0/156] eta: 0:32:34 lr: 0.001302 min_lr: 0.001302 loss: 3.4337 (3.4337) weight_decay: 0.0500 (0.0500) time: 12.5261 data: 8.3005 max mem: 55573 Epoch: [226] [ 10/156] eta: 0:04:05 lr: 0.001300 min_lr: 0.001300 loss: 3.1413 (3.0090) weight_decay: 0.0500 (0.0500) time: 1.6790 data: 0.7550 max mem: 55573 Epoch: [226] [ 20/156] eta: 0:02:37 lr: 0.001298 min_lr: 0.001298 loss: 3.0162 (2.9641) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0004 max mem: 55573 Epoch: [226] [ 30/156] eta: 0:02:03 lr: 0.001296 min_lr: 0.001296 loss: 2.9583 (2.9477) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [226] [ 40/156] eta: 0:01:42 lr: 0.001294 min_lr: 0.001294 loss: 2.9671 (2.9474) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [226] [ 50/156] eta: 0:01:27 lr: 0.001292 min_lr: 0.001292 loss: 2.9671 (2.9600) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [226] [ 60/156] eta: 0:01:15 lr: 0.001289 min_lr: 0.001289 loss: 3.0280 (2.9996) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [226] [ 70/156] eta: 0:01:05 lr: 0.001287 min_lr: 0.001287 loss: 3.0280 (2.9961) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [226] [ 80/156] eta: 0:00:56 lr: 0.001285 min_lr: 0.001285 loss: 3.2521 (3.0028) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [226] [ 90/156] eta: 0:00:47 lr: 0.001283 min_lr: 0.001283 loss: 3.0709 (2.9998) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [226] [100/156] eta: 0:00:39 lr: 0.001281 min_lr: 0.001281 loss: 2.8813 (2.9864) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [226] [110/156] eta: 0:00:32 lr: 0.001279 min_lr: 0.001279 loss: 2.8813 (2.9860) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [226] [120/156] eta: 0:00:24 lr: 0.001277 min_lr: 0.001277 loss: 2.9877 (2.9839) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [226] [130/156] eta: 0:00:17 lr: 0.001275 min_lr: 0.001275 loss: 3.0524 (2.9850) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0010 max mem: 55573 Epoch: [226] [140/156] eta: 0:00:10 lr: 0.001273 min_lr: 0.001273 loss: 2.7070 (2.9606) weight_decay: 0.0500 (0.0500) time: 0.5864 data: 0.0008 max mem: 55573 Epoch: [226] [150/156] eta: 0:00:04 lr: 0.001270 min_lr: 0.001270 loss: 2.7070 (2.9644) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [226] [155/156] eta: 0:00:00 lr: 0.001269 min_lr: 0.001269 loss: 3.0981 (2.9662) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [226] Total time: 0:01:45 (0.6733 s / it) Averaged stats: lr: 0.001269 min_lr: 0.001269 loss: 3.0981 (3.0601) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8513 (0.8513) acc1: 80.9896 (80.9896) acc5: 97.3958 (97.3958) time: 6.9844 data: 6.7471 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9092 (0.9486) acc1: 80.9896 (78.5280) acc5: 96.2264 (95.3920) time: 1.5431 data: 1.3495 max mem: 55573 Test: Total time: 0:00:07 (1.5880 s / it) * Acc@1 79.620 Acc@5 95.104 loss 0.933 Accuracy of the model on the 50000 test images: 79.6% Max accuracy: 79.62% Test: [0/5] eta: 0:00:34 loss: 1.0373 (1.0373) acc1: 73.8281 (73.8281) acc5: 92.0573 (92.0573) time: 6.8620 data: 6.6262 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0654 (1.0823) acc1: 71.7448 (69.6000) acc5: 92.0573 (90.1120) time: 1.5177 data: 1.3253 max mem: 55573 Test: Total time: 0:00:07 (1.5330 s / it) * Acc@1 70.616 Acc@5 90.306 loss 1.103 Accuracy of the model EMA on 50000 test images: 70.6% Max EMA accuracy: 70.62% Epoch: [227] [ 0/156] eta: 0:32:46 lr: 0.001269 min_lr: 0.001269 loss: 2.8163 (2.8163) weight_decay: 0.0500 (0.0500) time: 12.6084 data: 8.7523 max mem: 55573 Epoch: [227] [ 10/156] eta: 0:04:12 lr: 0.001267 min_lr: 0.001267 loss: 3.0555 (3.0883) weight_decay: 0.0500 (0.0500) time: 1.7294 data: 0.7961 max mem: 55573 Epoch: [227] [ 20/156] eta: 0:02:41 lr: 0.001265 min_lr: 0.001265 loss: 3.1923 (3.1120) weight_decay: 0.0500 (0.0500) time: 0.6154 data: 0.0005 max mem: 55573 Epoch: [227] [ 30/156] eta: 0:02:05 lr: 0.001263 min_lr: 0.001263 loss: 3.1501 (3.0868) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [227] [ 40/156] eta: 0:01:43 lr: 0.001261 min_lr: 0.001261 loss: 3.0520 (3.0497) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [227] [ 50/156] eta: 0:01:28 lr: 0.001259 min_lr: 0.001259 loss: 2.9510 (3.0307) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [227] [ 60/156] eta: 0:01:16 lr: 0.001257 min_lr: 0.001257 loss: 3.2136 (3.0595) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [227] [ 70/156] eta: 0:01:05 lr: 0.001255 min_lr: 0.001255 loss: 3.3157 (3.0850) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [227] [ 80/156] eta: 0:00:56 lr: 0.001252 min_lr: 0.001252 loss: 3.2110 (3.0716) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [227] [ 90/156] eta: 0:00:48 lr: 0.001250 min_lr: 0.001250 loss: 3.2026 (3.0709) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [227] [100/156] eta: 0:00:40 lr: 0.001248 min_lr: 0.001248 loss: 3.2314 (3.0657) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [227] [110/156] eta: 0:00:32 lr: 0.001246 min_lr: 0.001246 loss: 3.2205 (3.0602) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [227] [120/156] eta: 0:00:25 lr: 0.001244 min_lr: 0.001244 loss: 3.0966 (3.0397) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [227] [130/156] eta: 0:00:17 lr: 0.001242 min_lr: 0.001242 loss: 2.7140 (3.0326) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0010 max mem: 55573 Epoch: [227] [140/156] eta: 0:00:10 lr: 0.001240 min_lr: 0.001240 loss: 2.9966 (3.0287) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0009 max mem: 55573 Epoch: [227] [150/156] eta: 0:00:04 lr: 0.001238 min_lr: 0.001238 loss: 3.0855 (3.0380) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [227] [155/156] eta: 0:00:00 lr: 0.001237 min_lr: 0.001237 loss: 3.2267 (3.0457) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [227] Total time: 0:01:45 (0.6768 s / it) Averaged stats: lr: 0.001237 min_lr: 0.001237 loss: 3.2267 (3.0449) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.9990 (0.9990) acc1: 81.1198 (81.1198) acc5: 96.4844 (96.4844) time: 7.0983 data: 6.8608 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0250 (1.0608) acc1: 80.8594 (78.5920) acc5: 94.3396 (94.8480) time: 1.5660 data: 1.3722 max mem: 55573 Test: Total time: 0:00:08 (1.6094 s / it) * Acc@1 79.512 Acc@5 95.066 loss 1.041 Accuracy of the model on the 50000 test images: 79.5% Max accuracy: 79.62% Test: [0/5] eta: 0:00:35 loss: 1.0358 (1.0358) acc1: 74.0885 (74.0885) acc5: 92.0573 (92.0573) time: 7.0268 data: 6.7908 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0638 (1.0809) acc1: 72.0052 (69.7600) acc5: 92.0573 (90.0480) time: 1.5511 data: 1.3584 max mem: 55573 Test: Total time: 0:00:07 (1.5748 s / it) * Acc@1 70.666 Acc@5 90.320 loss 1.102 Accuracy of the model EMA on 50000 test images: 70.7% Max EMA accuracy: 70.67% Epoch: [228] [ 0/156] eta: 0:31:34 lr: 0.001237 min_lr: 0.001237 loss: 2.5131 (2.5131) weight_decay: 0.0500 (0.0500) time: 12.1455 data: 11.5528 max mem: 55573 Epoch: [228] [ 10/156] eta: 0:04:07 lr: 0.001235 min_lr: 0.001235 loss: 3.1729 (3.0734) weight_decay: 0.0500 (0.0500) time: 1.6980 data: 1.0506 max mem: 55573 Epoch: [228] [ 20/156] eta: 0:02:39 lr: 0.001232 min_lr: 0.001232 loss: 3.3168 (3.0791) weight_decay: 0.0500 (0.0500) time: 0.6218 data: 0.0004 max mem: 55573 Epoch: [228] [ 30/156] eta: 0:02:03 lr: 0.001230 min_lr: 0.001230 loss: 3.3114 (3.1195) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [228] [ 40/156] eta: 0:01:42 lr: 0.001228 min_lr: 0.001228 loss: 3.3497 (3.1793) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [228] [ 50/156] eta: 0:01:27 lr: 0.001226 min_lr: 0.001226 loss: 3.2987 (3.1422) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [228] [ 60/156] eta: 0:01:15 lr: 0.001224 min_lr: 0.001224 loss: 3.2287 (3.1787) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [228] [ 70/156] eta: 0:01:05 lr: 0.001222 min_lr: 0.001222 loss: 3.2703 (3.1558) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [228] [ 80/156] eta: 0:00:56 lr: 0.001220 min_lr: 0.001220 loss: 2.9027 (3.1039) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [228] [ 90/156] eta: 0:00:47 lr: 0.001218 min_lr: 0.001218 loss: 3.1989 (3.1175) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [228] [100/156] eta: 0:00:39 lr: 0.001216 min_lr: 0.001216 loss: 3.1617 (3.0806) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [228] [110/156] eta: 0:00:32 lr: 0.001214 min_lr: 0.001214 loss: 2.7729 (3.0710) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [228] [120/156] eta: 0:00:24 lr: 0.001212 min_lr: 0.001212 loss: 3.2370 (3.0598) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [228] [130/156] eta: 0:00:17 lr: 0.001210 min_lr: 0.001210 loss: 3.2465 (3.0675) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0010 max mem: 55573 Epoch: [228] [140/156] eta: 0:00:10 lr: 0.001208 min_lr: 0.001208 loss: 3.2872 (3.0655) weight_decay: 0.0500 (0.0500) time: 0.5848 data: 0.0008 max mem: 55573 Epoch: [228] [150/156] eta: 0:00:04 lr: 0.001206 min_lr: 0.001206 loss: 2.8732 (3.0375) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [228] [155/156] eta: 0:00:00 lr: 0.001205 min_lr: 0.001205 loss: 3.0347 (3.0441) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [228] Total time: 0:01:45 (0.6742 s / it) Averaged stats: lr: 0.001205 min_lr: 0.001205 loss: 3.0347 (3.0475) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8680 (0.8680) acc1: 82.5521 (82.5521) acc5: 96.6146 (96.6146) time: 6.8899 data: 6.6524 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9535 (0.9494) acc1: 80.4688 (79.2320) acc5: 95.9635 (94.8800) time: 1.5243 data: 1.3306 max mem: 55573 Test: Total time: 0:00:07 (1.5614 s / it) * Acc@1 79.762 Acc@5 95.160 loss 0.932 Accuracy of the model on the 50000 test images: 79.8% Max accuracy: 79.76% Test: [0/5] eta: 0:00:33 loss: 1.0345 (1.0345) acc1: 74.0885 (74.0885) acc5: 91.9271 (91.9271) time: 6.7594 data: 6.5232 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0625 (1.0797) acc1: 72.0052 (69.7600) acc5: 91.9271 (90.0160) time: 1.4972 data: 1.3048 max mem: 55573 Test: Total time: 0:00:07 (1.5146 s / it) * Acc@1 70.680 Acc@5 90.332 loss 1.101 Accuracy of the model EMA on 50000 test images: 70.7% Max EMA accuracy: 70.68% Epoch: [229] [ 0/156] eta: 0:31:30 lr: 0.001204 min_lr: 0.001204 loss: 2.5137 (2.5137) weight_decay: 0.0500 (0.0500) time: 12.1189 data: 11.5339 max mem: 55573 Epoch: [229] [ 10/156] eta: 0:04:02 lr: 0.001202 min_lr: 0.001202 loss: 3.0896 (2.9449) weight_decay: 0.0500 (0.0500) time: 1.6592 data: 1.0489 max mem: 55573 Epoch: [229] [ 20/156] eta: 0:02:36 lr: 0.001200 min_lr: 0.001200 loss: 3.0896 (2.9743) weight_decay: 0.0500 (0.0500) time: 0.6023 data: 0.0004 max mem: 55573 Epoch: [229] [ 30/156] eta: 0:02:02 lr: 0.001198 min_lr: 0.001198 loss: 3.2674 (3.0803) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [229] [ 40/156] eta: 0:01:41 lr: 0.001196 min_lr: 0.001196 loss: 3.1836 (3.0064) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [229] [ 50/156] eta: 0:01:26 lr: 0.001194 min_lr: 0.001194 loss: 3.1144 (2.9876) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [229] [ 60/156] eta: 0:01:15 lr: 0.001192 min_lr: 0.001192 loss: 2.9827 (2.9846) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [229] [ 70/156] eta: 0:01:04 lr: 0.001190 min_lr: 0.001190 loss: 2.9855 (2.9972) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [229] [ 80/156] eta: 0:00:55 lr: 0.001188 min_lr: 0.001188 loss: 3.2722 (3.0290) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [229] [ 90/156] eta: 0:00:47 lr: 0.001186 min_lr: 0.001186 loss: 3.1640 (3.0102) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [229] [100/156] eta: 0:00:39 lr: 0.001184 min_lr: 0.001184 loss: 2.9256 (3.0158) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [229] [110/156] eta: 0:00:32 lr: 0.001182 min_lr: 0.001182 loss: 3.2222 (3.0250) weight_decay: 0.0500 (0.0500) time: 0.5947 data: 0.0004 max mem: 55573 Epoch: [229] [120/156] eta: 0:00:24 lr: 0.001180 min_lr: 0.001180 loss: 3.2183 (3.0377) weight_decay: 0.0500 (0.0500) time: 0.5942 data: 0.0005 max mem: 55573 Epoch: [229] [130/156] eta: 0:00:17 lr: 0.001178 min_lr: 0.001178 loss: 3.0658 (3.0213) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0011 max mem: 55573 Epoch: [229] [140/156] eta: 0:00:10 lr: 0.001176 min_lr: 0.001176 loss: 3.1340 (3.0235) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0009 max mem: 55573 Epoch: [229] [150/156] eta: 0:00:04 lr: 0.001174 min_lr: 0.001174 loss: 3.1340 (3.0159) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0002 max mem: 55573 Epoch: [229] [155/156] eta: 0:00:00 lr: 0.001173 min_lr: 0.001173 loss: 3.1264 (3.0129) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0002 max mem: 55573 Epoch: [229] Total time: 0:01:44 (0.6715 s / it) Averaged stats: lr: 0.001173 min_lr: 0.001173 loss: 3.1264 (3.0354) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8798 (0.8798) acc1: 82.2917 (82.2917) acc5: 96.8750 (96.8750) time: 6.9883 data: 6.7512 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9321 (0.9354) acc1: 80.8594 (78.8160) acc5: 96.7448 (95.3920) time: 1.5441 data: 1.3503 max mem: 55573 Test: Total time: 0:00:07 (1.5870 s / it) * Acc@1 79.666 Acc@5 95.112 loss 0.933 Accuracy of the model on the 50000 test images: 79.7% Max accuracy: 79.76% Test: [0/5] eta: 0:00:36 loss: 1.0333 (1.0333) acc1: 74.0885 (74.0885) acc5: 92.0573 (92.0573) time: 7.2720 data: 7.0359 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0613 (1.0786) acc1: 72.0052 (69.8880) acc5: 92.0573 (90.0480) time: 1.5997 data: 1.4073 max mem: 55573 Test: Total time: 0:00:08 (1.6207 s / it) * Acc@1 70.700 Acc@5 90.368 loss 1.099 Accuracy of the model EMA on 50000 test images: 70.7% Max EMA accuracy: 70.70% Epoch: [230] [ 0/156] eta: 0:34:28 lr: 0.001172 min_lr: 0.001172 loss: 2.4362 (2.4362) weight_decay: 0.0500 (0.0500) time: 13.2608 data: 7.6964 max mem: 55573 Epoch: [230] [ 10/156] eta: 0:04:14 lr: 0.001170 min_lr: 0.001170 loss: 2.4871 (2.7191) weight_decay: 0.0500 (0.0500) time: 1.7424 data: 0.7001 max mem: 55573 Epoch: [230] [ 20/156] eta: 0:02:42 lr: 0.001168 min_lr: 0.001168 loss: 3.0234 (2.8231) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [230] [ 30/156] eta: 0:02:05 lr: 0.001166 min_lr: 0.001166 loss: 3.1610 (2.9107) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [230] [ 40/156] eta: 0:01:44 lr: 0.001164 min_lr: 0.001164 loss: 3.2064 (2.9292) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [230] [ 50/156] eta: 0:01:28 lr: 0.001162 min_lr: 0.001162 loss: 2.9443 (2.9042) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0004 max mem: 55573 Epoch: [230] [ 60/156] eta: 0:01:16 lr: 0.001160 min_lr: 0.001160 loss: 3.0950 (2.9596) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [230] [ 70/156] eta: 0:01:06 lr: 0.001158 min_lr: 0.001158 loss: 3.1091 (2.9711) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [230] [ 80/156] eta: 0:00:56 lr: 0.001156 min_lr: 0.001156 loss: 3.2346 (3.0126) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0004 max mem: 55573 Epoch: [230] [ 90/156] eta: 0:00:48 lr: 0.001154 min_lr: 0.001154 loss: 3.2346 (2.9881) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [230] [100/156] eta: 0:00:40 lr: 0.001152 min_lr: 0.001152 loss: 2.4890 (2.9512) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [230] [110/156] eta: 0:00:32 lr: 0.001150 min_lr: 0.001150 loss: 3.0217 (2.9715) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [230] [120/156] eta: 0:00:25 lr: 0.001148 min_lr: 0.001148 loss: 3.1734 (2.9591) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [230] [130/156] eta: 0:00:17 lr: 0.001146 min_lr: 0.001146 loss: 2.9279 (2.9611) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0012 max mem: 55573 Epoch: [230] [140/156] eta: 0:00:10 lr: 0.001144 min_lr: 0.001144 loss: 3.0540 (2.9512) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0010 max mem: 55573 Epoch: [230] [150/156] eta: 0:00:04 lr: 0.001142 min_lr: 0.001142 loss: 3.0668 (2.9725) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [230] [155/156] eta: 0:00:00 lr: 0.001141 min_lr: 0.001141 loss: 3.1935 (2.9815) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [230] Total time: 0:01:45 (0.6770 s / it) Averaged stats: lr: 0.001141 min_lr: 0.001141 loss: 3.1935 (3.0356) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 1.0018 (1.0018) acc1: 81.5104 (81.5104) acc5: 96.6146 (96.6146) time: 6.9034 data: 6.6664 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0474 (1.0620) acc1: 79.6875 (78.4320) acc5: 95.8333 (95.0400) time: 1.5270 data: 1.3334 max mem: 55573 Test: Total time: 0:00:07 (1.5746 s / it) * Acc@1 79.730 Acc@5 95.038 loss 1.042 Accuracy of the model on the 50000 test images: 79.7% Max accuracy: 79.76% Test: [0/5] eta: 0:00:35 loss: 1.0322 (1.0322) acc1: 74.0885 (74.0885) acc5: 92.1875 (92.1875) time: 7.0832 data: 6.8471 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0603 (1.0777) acc1: 72.1354 (69.9200) acc5: 92.1875 (90.0480) time: 1.5620 data: 1.3695 max mem: 55573 Test: Total time: 0:00:07 (1.5783 s / it) * Acc@1 70.746 Acc@5 90.372 loss 1.098 Accuracy of the model EMA on 50000 test images: 70.7% Max EMA accuracy: 70.75% Epoch: [231] [ 0/156] eta: 0:35:41 lr: 0.001141 min_lr: 0.001141 loss: 2.1948 (2.1948) weight_decay: 0.0500 (0.0500) time: 13.7244 data: 11.6071 max mem: 55573 Epoch: [231] [ 10/156] eta: 0:04:21 lr: 0.001139 min_lr: 0.001139 loss: 3.1036 (3.0557) weight_decay: 0.0500 (0.0500) time: 1.7925 data: 1.0556 max mem: 55573 Epoch: [231] [ 20/156] eta: 0:02:45 lr: 0.001137 min_lr: 0.001137 loss: 3.0947 (2.9992) weight_decay: 0.0500 (0.0500) time: 0.5950 data: 0.0004 max mem: 55573 Epoch: [231] [ 30/156] eta: 0:02:08 lr: 0.001135 min_lr: 0.001135 loss: 3.0947 (3.0492) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [231] [ 40/156] eta: 0:01:45 lr: 0.001133 min_lr: 0.001133 loss: 3.0802 (3.0235) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [231] [ 50/156] eta: 0:01:30 lr: 0.001131 min_lr: 0.001131 loss: 3.0516 (2.9942) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [231] [ 60/156] eta: 0:01:17 lr: 0.001129 min_lr: 0.001129 loss: 3.0704 (3.0170) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [231] [ 70/156] eta: 0:01:06 lr: 0.001127 min_lr: 0.001127 loss: 3.0323 (2.9788) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [231] [ 80/156] eta: 0:00:57 lr: 0.001125 min_lr: 0.001125 loss: 2.8985 (2.9756) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [231] [ 90/156] eta: 0:00:48 lr: 0.001123 min_lr: 0.001123 loss: 3.0773 (2.9819) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [231] [100/156] eta: 0:00:40 lr: 0.001121 min_lr: 0.001121 loss: 3.0843 (2.9699) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [231] [110/156] eta: 0:00:32 lr: 0.001119 min_lr: 0.001119 loss: 3.0843 (2.9733) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [231] [120/156] eta: 0:00:25 lr: 0.001117 min_lr: 0.001117 loss: 3.1105 (2.9933) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [231] [130/156] eta: 0:00:17 lr: 0.001115 min_lr: 0.001115 loss: 3.2245 (2.9992) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0010 max mem: 55573 Epoch: [231] [140/156] eta: 0:00:10 lr: 0.001113 min_lr: 0.001113 loss: 3.0775 (3.0024) weight_decay: 0.0500 (0.0500) time: 0.5864 data: 0.0009 max mem: 55573 Epoch: [231] [150/156] eta: 0:00:04 lr: 0.001111 min_lr: 0.001111 loss: 2.9683 (2.9983) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0002 max mem: 55573 Epoch: [231] [155/156] eta: 0:00:00 lr: 0.001110 min_lr: 0.001110 loss: 2.9946 (3.0056) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [231] Total time: 0:01:46 (0.6805 s / it) Averaged stats: lr: 0.001110 min_lr: 0.001110 loss: 2.9946 (3.0454) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.9001 (0.9001) acc1: 82.1615 (82.1615) acc5: 97.0052 (97.0052) time: 6.7945 data: 6.5570 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9975 (1.0032) acc1: 80.3385 (79.2000) acc5: 96.0938 (95.3280) time: 1.5055 data: 1.3115 max mem: 55573 Test: Total time: 0:00:07 (1.5488 s / it) * Acc@1 79.838 Acc@5 95.056 loss 0.999 Accuracy of the model on the 50000 test images: 79.8% Max accuracy: 79.84% Test: [0/5] eta: 0:00:34 loss: 1.0308 (1.0308) acc1: 74.2188 (74.2188) acc5: 92.3177 (92.3177) time: 6.8288 data: 6.5927 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0589 (1.0765) acc1: 72.1354 (70.0480) acc5: 92.3177 (90.1120) time: 1.5111 data: 1.3187 max mem: 55573 Test: Total time: 0:00:07 (1.5289 s / it) * Acc@1 70.798 Acc@5 90.398 loss 1.097 Accuracy of the model EMA on 50000 test images: 70.8% Max EMA accuracy: 70.80% Epoch: [232] [ 0/156] eta: 0:32:47 lr: 0.001110 min_lr: 0.001110 loss: 3.1348 (3.1348) weight_decay: 0.0500 (0.0500) time: 12.6143 data: 11.1174 max mem: 55573 Epoch: [232] [ 10/156] eta: 0:04:16 lr: 0.001108 min_lr: 0.001108 loss: 2.9863 (2.8919) weight_decay: 0.0500 (0.0500) time: 1.7558 data: 1.0786 max mem: 55573 Epoch: [232] [ 20/156] eta: 0:02:43 lr: 0.001106 min_lr: 0.001106 loss: 2.9863 (2.9978) weight_decay: 0.0500 (0.0500) time: 0.6306 data: 0.0375 max mem: 55573 Epoch: [232] [ 30/156] eta: 0:02:06 lr: 0.001104 min_lr: 0.001104 loss: 3.1702 (3.0366) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [232] [ 40/156] eta: 0:01:44 lr: 0.001102 min_lr: 0.001102 loss: 3.0676 (2.9968) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [232] [ 50/156] eta: 0:01:29 lr: 0.001100 min_lr: 0.001100 loss: 3.0995 (3.0099) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0004 max mem: 55573 Epoch: [232] [ 60/156] eta: 0:01:17 lr: 0.001098 min_lr: 0.001098 loss: 3.1183 (3.0083) weight_decay: 0.0500 (0.0500) time: 0.5932 data: 0.0005 max mem: 55573 Epoch: [232] [ 70/156] eta: 0:01:06 lr: 0.001096 min_lr: 0.001096 loss: 3.1462 (3.0368) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [232] [ 80/156] eta: 0:00:56 lr: 0.001094 min_lr: 0.001094 loss: 3.2811 (3.0603) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [232] [ 90/156] eta: 0:00:48 lr: 0.001092 min_lr: 0.001092 loss: 3.1541 (3.0707) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [232] [100/156] eta: 0:00:40 lr: 0.001090 min_lr: 0.001090 loss: 3.1141 (3.0644) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [232] [110/156] eta: 0:00:32 lr: 0.001088 min_lr: 0.001088 loss: 3.2576 (3.0642) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0005 max mem: 55573 Epoch: [232] [120/156] eta: 0:00:25 lr: 0.001086 min_lr: 0.001086 loss: 2.9095 (3.0466) weight_decay: 0.0500 (0.0500) time: 0.5941 data: 0.0005 max mem: 55573 Epoch: [232] [130/156] eta: 0:00:17 lr: 0.001084 min_lr: 0.001084 loss: 2.7648 (3.0308) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0011 max mem: 55573 Epoch: [232] [140/156] eta: 0:00:10 lr: 0.001082 min_lr: 0.001082 loss: 2.8807 (3.0271) weight_decay: 0.0500 (0.0500) time: 0.5867 data: 0.0009 max mem: 55573 Epoch: [232] [150/156] eta: 0:00:04 lr: 0.001080 min_lr: 0.001080 loss: 2.8746 (3.0065) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [232] [155/156] eta: 0:00:00 lr: 0.001079 min_lr: 0.001079 loss: 2.5499 (3.0025) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [232] Total time: 0:01:45 (0.6791 s / it) Averaged stats: lr: 0.001079 min_lr: 0.001079 loss: 2.5499 (3.0233) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:32 loss: 0.8828 (0.8828) acc1: 81.6406 (81.6406) acc5: 96.6146 (96.6146) time: 6.5959 data: 6.3588 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9684 (0.9651) acc1: 81.6406 (79.2640) acc5: 96.0938 (95.2000) time: 1.4656 data: 1.2719 max mem: 55573 Test: Total time: 0:00:07 (1.5205 s / it) * Acc@1 80.096 Acc@5 95.214 loss 0.941 Accuracy of the model on the 50000 test images: 80.1% Max accuracy: 80.10% Test: [0/5] eta: 0:00:33 loss: 1.0298 (1.0298) acc1: 74.4792 (74.4792) acc5: 92.3177 (92.3177) time: 6.7706 data: 6.5346 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0580 (1.0756) acc1: 72.1354 (70.1120) acc5: 92.3177 (90.1440) time: 1.4995 data: 1.3070 max mem: 55573 Test: Total time: 0:00:07 (1.5164 s / it) * Acc@1 70.844 Acc@5 90.408 loss 1.096 Accuracy of the model EMA on 50000 test images: 70.8% Max EMA accuracy: 70.84% Epoch: [233] [ 0/156] eta: 0:35:07 lr: 0.001079 min_lr: 0.001079 loss: 3.3505 (3.3505) weight_decay: 0.0500 (0.0500) time: 13.5114 data: 8.3274 max mem: 55573 Epoch: [233] [ 10/156] eta: 0:04:17 lr: 0.001077 min_lr: 0.001077 loss: 3.1566 (3.0847) weight_decay: 0.0500 (0.0500) time: 1.7659 data: 0.7575 max mem: 55573 Epoch: [233] [ 20/156] eta: 0:02:44 lr: 0.001075 min_lr: 0.001075 loss: 3.1239 (3.0590) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [233] [ 30/156] eta: 0:02:07 lr: 0.001073 min_lr: 0.001073 loss: 3.1379 (3.0525) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [233] [ 40/156] eta: 0:01:45 lr: 0.001071 min_lr: 0.001071 loss: 3.0343 (3.0085) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [233] [ 50/156] eta: 0:01:29 lr: 0.001069 min_lr: 0.001069 loss: 3.1093 (3.0417) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [233] [ 60/156] eta: 0:01:17 lr: 0.001067 min_lr: 0.001067 loss: 3.2798 (3.0550) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0005 max mem: 55573 Epoch: [233] [ 70/156] eta: 0:01:06 lr: 0.001065 min_lr: 0.001065 loss: 3.2160 (3.0241) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [233] [ 80/156] eta: 0:00:56 lr: 0.001063 min_lr: 0.001063 loss: 3.0363 (3.0229) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [233] [ 90/156] eta: 0:00:48 lr: 0.001061 min_lr: 0.001061 loss: 3.2310 (3.0523) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [233] [100/156] eta: 0:00:40 lr: 0.001059 min_lr: 0.001059 loss: 3.2948 (3.0594) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [233] [110/156] eta: 0:00:32 lr: 0.001057 min_lr: 0.001057 loss: 3.0505 (3.0512) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [233] [120/156] eta: 0:00:25 lr: 0.001055 min_lr: 0.001055 loss: 2.9290 (3.0287) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [233] [130/156] eta: 0:00:17 lr: 0.001053 min_lr: 0.001053 loss: 2.8015 (3.0151) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0011 max mem: 55573 Epoch: [233] [140/156] eta: 0:00:10 lr: 0.001052 min_lr: 0.001052 loss: 3.1661 (3.0193) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0010 max mem: 55573 Epoch: [233] [150/156] eta: 0:00:04 lr: 0.001050 min_lr: 0.001050 loss: 3.2512 (3.0276) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [233] [155/156] eta: 0:00:00 lr: 0.001049 min_lr: 0.001049 loss: 3.1582 (3.0221) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [233] Total time: 0:01:45 (0.6787 s / it) Averaged stats: lr: 0.001049 min_lr: 0.001049 loss: 3.1582 (3.0307) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.8899 (0.8899) acc1: 82.4219 (82.4219) acc5: 96.2240 (96.2240) time: 7.1450 data: 6.9076 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9545 (0.9798) acc1: 79.9479 (79.0080) acc5: 96.2240 (95.2640) time: 1.5755 data: 1.3816 max mem: 55573 Test: Total time: 0:00:08 (1.6167 s / it) * Acc@1 79.954 Acc@5 95.242 loss 0.955 Accuracy of the model on the 50000 test images: 80.0% Max accuracy: 80.10% Test: [0/5] eta: 0:00:35 loss: 1.0286 (1.0286) acc1: 74.4792 (74.4792) acc5: 92.3177 (92.3177) time: 7.1729 data: 6.9368 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0568 (1.0744) acc1: 72.0052 (70.1440) acc5: 92.3177 (90.1760) time: 1.5800 data: 1.3875 max mem: 55573 Test: Total time: 0:00:08 (1.6004 s / it) * Acc@1 70.886 Acc@5 90.436 loss 1.095 Accuracy of the model EMA on 50000 test images: 70.9% Max EMA accuracy: 70.89% Epoch: [234] [ 0/156] eta: 0:32:31 lr: 0.001048 min_lr: 0.001048 loss: 3.6643 (3.6643) weight_decay: 0.0500 (0.0500) time: 12.5071 data: 11.8951 max mem: 55573 Epoch: [234] [ 10/156] eta: 0:04:14 lr: 0.001046 min_lr: 0.001046 loss: 3.2827 (3.0863) weight_decay: 0.0500 (0.0500) time: 1.7423 data: 1.1217 max mem: 55573 Epoch: [234] [ 20/156] eta: 0:02:42 lr: 0.001045 min_lr: 0.001045 loss: 3.2365 (3.1307) weight_decay: 0.0500 (0.0500) time: 0.6276 data: 0.0223 max mem: 55573 Epoch: [234] [ 30/156] eta: 0:02:05 lr: 0.001043 min_lr: 0.001043 loss: 3.1483 (3.0319) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [234] [ 40/156] eta: 0:01:44 lr: 0.001041 min_lr: 0.001041 loss: 3.0355 (3.0514) weight_decay: 0.0500 (0.0500) time: 0.5943 data: 0.0004 max mem: 55573 Epoch: [234] [ 50/156] eta: 0:01:29 lr: 0.001039 min_lr: 0.001039 loss: 3.0355 (3.0374) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0005 max mem: 55573 Epoch: [234] [ 60/156] eta: 0:01:16 lr: 0.001037 min_lr: 0.001037 loss: 2.9630 (3.0060) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [234] [ 70/156] eta: 0:01:06 lr: 0.001035 min_lr: 0.001035 loss: 3.1970 (3.0364) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [234] [ 80/156] eta: 0:00:56 lr: 0.001033 min_lr: 0.001033 loss: 3.2481 (3.0202) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [234] [ 90/156] eta: 0:00:48 lr: 0.001031 min_lr: 0.001031 loss: 3.2919 (3.0354) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [234] [100/156] eta: 0:00:40 lr: 0.001029 min_lr: 0.001029 loss: 3.2919 (3.0575) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [234] [110/156] eta: 0:00:32 lr: 0.001027 min_lr: 0.001027 loss: 3.2299 (3.0542) weight_decay: 0.0500 (0.0500) time: 0.5943 data: 0.0004 max mem: 55573 Epoch: [234] [120/156] eta: 0:00:25 lr: 0.001025 min_lr: 0.001025 loss: 3.0328 (3.0440) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.0006 max mem: 55573 Epoch: [234] [130/156] eta: 0:00:17 lr: 0.001023 min_lr: 0.001023 loss: 2.9037 (3.0245) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0012 max mem: 55573 Epoch: [234] [140/156] eta: 0:00:10 lr: 0.001021 min_lr: 0.001021 loss: 2.8980 (3.0180) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.0009 max mem: 55573 Epoch: [234] [150/156] eta: 0:00:04 lr: 0.001019 min_lr: 0.001019 loss: 3.2015 (3.0274) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [234] [155/156] eta: 0:00:00 lr: 0.001018 min_lr: 0.001018 loss: 3.2258 (3.0377) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [234] Total time: 0:01:45 (0.6780 s / it) Averaged stats: lr: 0.001018 min_lr: 0.001018 loss: 3.2258 (3.0291) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.9435 (0.9435) acc1: 81.7708 (81.7708) acc5: 96.6146 (96.6146) time: 6.6616 data: 6.4245 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0479 (1.0338) acc1: 80.2083 (78.7840) acc5: 96.2264 (95.1680) time: 1.4789 data: 1.2850 max mem: 55573 Test: Total time: 0:00:07 (1.5195 s / it) * Acc@1 79.936 Acc@5 95.278 loss 1.019 Accuracy of the model on the 50000 test images: 79.9% Max accuracy: 80.10% Test: [0/5] eta: 0:00:35 loss: 1.0276 (1.0276) acc1: 74.6094 (74.6094) acc5: 92.3177 (92.3177) time: 7.0079 data: 6.7719 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0560 (1.0737) acc1: 72.2656 (70.2720) acc5: 92.3177 (90.1440) time: 1.5469 data: 1.3545 max mem: 55573 Test: Total time: 0:00:07 (1.5731 s / it) * Acc@1 70.946 Acc@5 90.462 loss 1.094 Accuracy of the model EMA on 50000 test images: 70.9% Max EMA accuracy: 70.95% Epoch: [235] [ 0/156] eta: 0:34:39 lr: 0.001018 min_lr: 0.001018 loss: 3.4048 (3.4048) weight_decay: 0.0500 (0.0500) time: 13.3312 data: 8.8833 max mem: 55573 Epoch: [235] [ 10/156] eta: 0:04:16 lr: 0.001016 min_lr: 0.001016 loss: 3.1525 (3.1223) weight_decay: 0.0500 (0.0500) time: 1.7580 data: 0.8080 max mem: 55573 Epoch: [235] [ 20/156] eta: 0:02:43 lr: 0.001014 min_lr: 0.001014 loss: 3.1307 (3.0070) weight_decay: 0.0500 (0.0500) time: 0.5951 data: 0.0004 max mem: 55573 Epoch: [235] [ 30/156] eta: 0:02:06 lr: 0.001013 min_lr: 0.001013 loss: 3.0792 (2.9973) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [235] [ 40/156] eta: 0:01:44 lr: 0.001011 min_lr: 0.001011 loss: 2.8781 (2.9628) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [235] [ 50/156] eta: 0:01:29 lr: 0.001009 min_lr: 0.001009 loss: 3.0797 (2.9758) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [235] [ 60/156] eta: 0:01:16 lr: 0.001007 min_lr: 0.001007 loss: 3.0853 (2.9616) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [235] [ 70/156] eta: 0:01:06 lr: 0.001005 min_lr: 0.001005 loss: 2.7940 (2.9535) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [235] [ 80/156] eta: 0:00:56 lr: 0.001003 min_lr: 0.001003 loss: 3.0046 (2.9548) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [235] [ 90/156] eta: 0:00:48 lr: 0.001001 min_lr: 0.001001 loss: 3.1267 (2.9713) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [235] [100/156] eta: 0:00:40 lr: 0.000999 min_lr: 0.000999 loss: 3.3019 (3.0080) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [235] [110/156] eta: 0:00:32 lr: 0.000997 min_lr: 0.000997 loss: 3.3027 (3.0113) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [235] [120/156] eta: 0:00:25 lr: 0.000995 min_lr: 0.000995 loss: 3.0620 (3.0142) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [235] [130/156] eta: 0:00:17 lr: 0.000994 min_lr: 0.000994 loss: 3.1379 (3.0108) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0010 max mem: 55573 Epoch: [235] [140/156] eta: 0:00:10 lr: 0.000992 min_lr: 0.000992 loss: 2.9519 (2.9977) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0008 max mem: 55573 Epoch: [235] [150/156] eta: 0:00:04 lr: 0.000990 min_lr: 0.000990 loss: 2.9103 (2.9934) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [235] [155/156] eta: 0:00:00 lr: 0.000989 min_lr: 0.000989 loss: 2.9103 (2.9817) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [235] Total time: 0:01:45 (0.6778 s / it) Averaged stats: lr: 0.000989 min_lr: 0.000989 loss: 2.9103 (3.0192) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 0.8577 (0.8577) acc1: 82.5521 (82.5521) acc5: 96.3542 (96.3542) time: 7.3086 data: 7.0713 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9383 (0.9481) acc1: 80.9896 (79.2960) acc5: 96.3542 (95.2960) time: 1.6079 data: 1.4143 max mem: 55573 Test: Total time: 0:00:08 (1.6606 s / it) * Acc@1 80.202 Acc@5 95.328 loss 0.933 Accuracy of the model on the 50000 test images: 80.2% Max accuracy: 80.20% Test: [0/5] eta: 0:00:34 loss: 1.0267 (1.0267) acc1: 74.6094 (74.6094) acc5: 92.3177 (92.3177) time: 6.8274 data: 6.5914 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0550 (1.0727) acc1: 72.2656 (70.3040) acc5: 92.3177 (90.1440) time: 1.5108 data: 1.3184 max mem: 55573 Test: Total time: 0:00:07 (1.5290 s / it) * Acc@1 70.984 Acc@5 90.470 loss 1.093 Accuracy of the model EMA on 50000 test images: 71.0% Max EMA accuracy: 70.98% Epoch: [236] [ 0/156] eta: 0:32:40 lr: 0.000989 min_lr: 0.000989 loss: 3.3979 (3.3979) weight_decay: 0.0500 (0.0500) time: 12.5651 data: 11.0964 max mem: 55573 Epoch: [236] [ 10/156] eta: 0:04:09 lr: 0.000987 min_lr: 0.000987 loss: 3.2041 (3.0381) weight_decay: 0.0500 (0.0500) time: 1.7075 data: 1.0092 max mem: 55573 Epoch: [236] [ 20/156] eta: 0:02:40 lr: 0.000985 min_lr: 0.000985 loss: 3.0033 (2.9300) weight_decay: 0.0500 (0.0500) time: 0.6074 data: 0.0004 max mem: 55573 Epoch: [236] [ 30/156] eta: 0:02:04 lr: 0.000983 min_lr: 0.000983 loss: 3.1248 (2.9986) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [236] [ 40/156] eta: 0:01:43 lr: 0.000981 min_lr: 0.000981 loss: 3.1631 (2.9727) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [236] [ 50/156] eta: 0:01:28 lr: 0.000979 min_lr: 0.000979 loss: 3.0382 (3.0047) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [236] [ 60/156] eta: 0:01:16 lr: 0.000977 min_lr: 0.000977 loss: 3.1222 (3.0162) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [236] [ 70/156] eta: 0:01:05 lr: 0.000975 min_lr: 0.000975 loss: 3.1222 (3.0223) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [236] [ 80/156] eta: 0:00:56 lr: 0.000973 min_lr: 0.000973 loss: 3.1311 (3.0301) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [236] [ 90/156] eta: 0:00:47 lr: 0.000972 min_lr: 0.000972 loss: 3.1311 (3.0132) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [236] [100/156] eta: 0:00:39 lr: 0.000970 min_lr: 0.000970 loss: 3.2032 (3.0327) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [236] [110/156] eta: 0:00:32 lr: 0.000968 min_lr: 0.000968 loss: 3.1602 (3.0115) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [236] [120/156] eta: 0:00:24 lr: 0.000966 min_lr: 0.000966 loss: 3.1795 (3.0460) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [236] [130/156] eta: 0:00:17 lr: 0.000964 min_lr: 0.000964 loss: 3.2821 (3.0281) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0013 max mem: 55573 Epoch: [236] [140/156] eta: 0:00:10 lr: 0.000962 min_lr: 0.000962 loss: 2.8089 (3.0255) weight_decay: 0.0500 (0.0500) time: 0.5849 data: 0.0011 max mem: 55573 Epoch: [236] [150/156] eta: 0:00:04 lr: 0.000960 min_lr: 0.000960 loss: 3.1401 (3.0390) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [236] [155/156] eta: 0:00:00 lr: 0.000959 min_lr: 0.000959 loss: 3.2851 (3.0365) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [236] Total time: 0:01:45 (0.6746 s / it) Averaged stats: lr: 0.000959 min_lr: 0.000959 loss: 3.2851 (3.0004) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 0.9586 (0.9586) acc1: 82.2917 (82.2917) acc5: 96.8750 (96.8750) time: 7.3154 data: 7.0780 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0117 (1.0045) acc1: 79.0365 (78.8480) acc5: 96.7448 (95.3920) time: 1.6096 data: 1.4157 max mem: 55573 Test: Total time: 0:00:08 (1.6522 s / it) * Acc@1 79.952 Acc@5 95.234 loss 0.997 Accuracy of the model on the 50000 test images: 80.0% Max accuracy: 80.20% Test: [0/5] eta: 0:00:37 loss: 1.0259 (1.0259) acc1: 74.3490 (74.3490) acc5: 92.3177 (92.3177) time: 7.4771 data: 7.2405 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0542 (1.0719) acc1: 72.6562 (70.4000) acc5: 92.3177 (90.1760) time: 1.6407 data: 1.4482 max mem: 55573 Test: Total time: 0:00:08 (1.6598 s / it) * Acc@1 71.026 Acc@5 90.482 loss 1.092 Accuracy of the model EMA on 50000 test images: 71.0% Max EMA accuracy: 71.03% Epoch: [237] [ 0/156] eta: 0:36:57 lr: 0.000959 min_lr: 0.000959 loss: 2.1999 (2.1999) weight_decay: 0.0500 (0.0500) time: 14.2171 data: 13.6382 max mem: 55573 Epoch: [237] [ 10/156] eta: 0:04:26 lr: 0.000957 min_lr: 0.000957 loss: 3.1159 (2.9943) weight_decay: 0.0500 (0.0500) time: 1.8286 data: 1.2402 max mem: 55573 Epoch: [237] [ 20/156] eta: 0:02:48 lr: 0.000956 min_lr: 0.000956 loss: 3.2697 (3.1308) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [237] [ 30/156] eta: 0:02:09 lr: 0.000954 min_lr: 0.000954 loss: 3.2153 (3.1283) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [237] [ 40/156] eta: 0:01:46 lr: 0.000952 min_lr: 0.000952 loss: 3.1319 (3.0684) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [237] [ 50/156] eta: 0:01:30 lr: 0.000950 min_lr: 0.000950 loss: 2.8899 (3.0427) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [237] [ 60/156] eta: 0:01:18 lr: 0.000948 min_lr: 0.000948 loss: 3.0631 (3.0271) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [237] [ 70/156] eta: 0:01:07 lr: 0.000946 min_lr: 0.000946 loss: 2.9541 (3.0115) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [237] [ 80/156] eta: 0:00:57 lr: 0.000944 min_lr: 0.000944 loss: 2.8422 (2.9865) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [237] [ 90/156] eta: 0:00:48 lr: 0.000943 min_lr: 0.000943 loss: 3.1695 (3.0058) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [237] [100/156] eta: 0:00:40 lr: 0.000941 min_lr: 0.000941 loss: 3.1814 (2.9975) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [237] [110/156] eta: 0:00:32 lr: 0.000939 min_lr: 0.000939 loss: 3.0767 (3.0043) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [237] [120/156] eta: 0:00:25 lr: 0.000937 min_lr: 0.000937 loss: 2.9704 (2.9870) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [237] [130/156] eta: 0:00:18 lr: 0.000935 min_lr: 0.000935 loss: 2.8874 (2.9940) weight_decay: 0.0500 (0.0500) time: 0.5883 data: 0.0011 max mem: 55573 Epoch: [237] [140/156] eta: 0:00:10 lr: 0.000933 min_lr: 0.000933 loss: 3.0969 (2.9994) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0009 max mem: 55573 Epoch: [237] [150/156] eta: 0:00:04 lr: 0.000931 min_lr: 0.000931 loss: 3.0171 (2.9866) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [237] [155/156] eta: 0:00:00 lr: 0.000930 min_lr: 0.000930 loss: 3.0171 (2.9824) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [237] Total time: 0:01:46 (0.6830 s / it) Averaged stats: lr: 0.000930 min_lr: 0.000930 loss: 3.0171 (3.0080) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8175 (0.8175) acc1: 83.9844 (83.9844) acc5: 96.4844 (96.4844) time: 6.8026 data: 6.5654 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9224 (0.9133) acc1: 80.3385 (79.5520) acc5: 96.3542 (94.9760) time: 1.5070 data: 1.3132 max mem: 55573 Test: Total time: 0:00:07 (1.5566 s / it) * Acc@1 80.360 Acc@5 95.348 loss 0.883 Accuracy of the model on the 50000 test images: 80.4% Max accuracy: 80.36% Test: [0/5] eta: 0:00:34 loss: 1.0250 (1.0250) acc1: 74.3490 (74.3490) acc5: 92.5781 (92.5781) time: 6.8909 data: 6.6548 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0532 (1.0709) acc1: 72.5260 (70.4000) acc5: 92.5781 (90.2400) time: 1.5235 data: 1.3311 max mem: 55573 Test: Total time: 0:00:07 (1.5460 s / it) * Acc@1 71.046 Acc@5 90.494 loss 1.092 Accuracy of the model EMA on 50000 test images: 71.0% Max EMA accuracy: 71.05% Epoch: [238] [ 0/156] eta: 0:28:21 lr: 0.000930 min_lr: 0.000930 loss: 3.3425 (3.3425) weight_decay: 0.0500 (0.0500) time: 10.9044 data: 9.0314 max mem: 55573 Epoch: [238] [ 10/156] eta: 0:03:58 lr: 0.000928 min_lr: 0.000928 loss: 3.3451 (3.1910) weight_decay: 0.0500 (0.0500) time: 1.6321 data: 0.8587 max mem: 55573 Epoch: [238] [ 20/156] eta: 0:02:34 lr: 0.000927 min_lr: 0.000927 loss: 3.2249 (3.0664) weight_decay: 0.0500 (0.0500) time: 0.6501 data: 0.0209 max mem: 55573 Epoch: [238] [ 30/156] eta: 0:02:01 lr: 0.000925 min_lr: 0.000925 loss: 3.0973 (3.0900) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [238] [ 40/156] eta: 0:01:41 lr: 0.000923 min_lr: 0.000923 loss: 3.0731 (3.0330) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [238] [ 50/156] eta: 0:01:26 lr: 0.000921 min_lr: 0.000921 loss: 3.0731 (3.0753) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [238] [ 60/156] eta: 0:01:14 lr: 0.000919 min_lr: 0.000919 loss: 3.2486 (3.0574) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [238] [ 70/156] eta: 0:01:04 lr: 0.000917 min_lr: 0.000917 loss: 2.9300 (3.0168) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [238] [ 80/156] eta: 0:00:55 lr: 0.000916 min_lr: 0.000916 loss: 2.8110 (2.9884) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [238] [ 90/156] eta: 0:00:47 lr: 0.000914 min_lr: 0.000914 loss: 2.9812 (3.0042) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [238] [100/156] eta: 0:00:39 lr: 0.000912 min_lr: 0.000912 loss: 3.2001 (3.0058) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [238] [110/156] eta: 0:00:31 lr: 0.000910 min_lr: 0.000910 loss: 3.0272 (3.0090) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [238] [120/156] eta: 0:00:24 lr: 0.000908 min_lr: 0.000908 loss: 3.0263 (2.9980) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [238] [130/156] eta: 0:00:17 lr: 0.000906 min_lr: 0.000906 loss: 2.7779 (2.9961) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0010 max mem: 55573 Epoch: [238] [140/156] eta: 0:00:10 lr: 0.000905 min_lr: 0.000905 loss: 3.0270 (2.9993) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0008 max mem: 55573 Epoch: [238] [150/156] eta: 0:00:03 lr: 0.000903 min_lr: 0.000903 loss: 3.1117 (3.0027) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [238] [155/156] eta: 0:00:00 lr: 0.000902 min_lr: 0.000902 loss: 3.1117 (3.0084) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [238] Total time: 0:01:44 (0.6697 s / it) Averaged stats: lr: 0.000902 min_lr: 0.000902 loss: 3.1117 (3.0070) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9160 (0.9160) acc1: 83.0729 (83.0729) acc5: 96.8750 (96.8750) time: 6.8712 data: 6.6341 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0254 (1.0157) acc1: 80.8594 (79.1680) acc5: 96.2240 (95.2320) time: 1.5205 data: 1.3269 max mem: 55573 Test: Total time: 0:00:07 (1.5738 s / it) * Acc@1 80.090 Acc@5 95.288 loss 1.010 Accuracy of the model on the 50000 test images: 80.1% Max accuracy: 80.36% Test: [0/5] eta: 0:00:35 loss: 1.0242 (1.0242) acc1: 74.4792 (74.4792) acc5: 92.5781 (92.5781) time: 7.0003 data: 6.7641 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0524 (1.0702) acc1: 72.6562 (70.5280) acc5: 92.5781 (90.2720) time: 1.5454 data: 1.3529 max mem: 55573 Test: Total time: 0:00:07 (1.5633 s / it) * Acc@1 71.062 Acc@5 90.510 loss 1.091 Accuracy of the model EMA on 50000 test images: 71.1% Max EMA accuracy: 71.06% Epoch: [239] [ 0/156] eta: 0:34:09 lr: 0.000902 min_lr: 0.000902 loss: 3.3187 (3.3187) weight_decay: 0.0500 (0.0500) time: 13.1366 data: 9.9603 max mem: 55573 Epoch: [239] [ 10/156] eta: 0:04:12 lr: 0.000900 min_lr: 0.000900 loss: 3.2554 (2.9897) weight_decay: 0.0500 (0.0500) time: 1.7317 data: 0.9060 max mem: 55573 Epoch: [239] [ 20/156] eta: 0:02:41 lr: 0.000898 min_lr: 0.000898 loss: 2.9940 (2.9762) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [239] [ 30/156] eta: 0:02:05 lr: 0.000896 min_lr: 0.000896 loss: 3.0741 (3.0542) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [239] [ 40/156] eta: 0:01:44 lr: 0.000894 min_lr: 0.000894 loss: 3.2853 (3.0703) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [239] [ 50/156] eta: 0:01:28 lr: 0.000893 min_lr: 0.000893 loss: 3.1620 (3.0719) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [239] [ 60/156] eta: 0:01:16 lr: 0.000891 min_lr: 0.000891 loss: 3.0742 (3.0418) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [239] [ 70/156] eta: 0:01:06 lr: 0.000889 min_lr: 0.000889 loss: 3.0710 (3.0474) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [239] [ 80/156] eta: 0:00:56 lr: 0.000887 min_lr: 0.000887 loss: 3.2591 (3.0641) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [239] [ 90/156] eta: 0:00:48 lr: 0.000885 min_lr: 0.000885 loss: 2.9150 (3.0357) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [239] [100/156] eta: 0:00:40 lr: 0.000884 min_lr: 0.000884 loss: 2.8181 (3.0337) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [239] [110/156] eta: 0:00:32 lr: 0.000882 min_lr: 0.000882 loss: 3.2674 (3.0424) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [239] [120/156] eta: 0:00:25 lr: 0.000880 min_lr: 0.000880 loss: 2.9939 (3.0285) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [239] [130/156] eta: 0:00:17 lr: 0.000878 min_lr: 0.000878 loss: 2.9811 (3.0303) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0011 max mem: 55573 Epoch: [239] [140/156] eta: 0:00:10 lr: 0.000876 min_lr: 0.000876 loss: 3.1075 (3.0321) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0010 max mem: 55573 Epoch: [239] [150/156] eta: 0:00:04 lr: 0.000875 min_lr: 0.000875 loss: 3.0747 (3.0241) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [239] [155/156] eta: 0:00:00 lr: 0.000874 min_lr: 0.000874 loss: 3.0205 (3.0238) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [239] Total time: 0:01:45 (0.6766 s / it) Averaged stats: lr: 0.000874 min_lr: 0.000874 loss: 3.0205 (3.0013) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8734 (0.8734) acc1: 82.9427 (82.9427) acc5: 96.8750 (96.8750) time: 6.8741 data: 6.6367 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9880 (0.9537) acc1: 81.5104 (80.0320) acc5: 95.8333 (95.3920) time: 1.5213 data: 1.3274 max mem: 55573 Test: Total time: 0:00:07 (1.5623 s / it) * Acc@1 80.312 Acc@5 95.354 loss 0.954 Accuracy of the model on the 50000 test images: 80.3% Max accuracy: 80.36% Test: [0/5] eta: 0:00:34 loss: 1.0231 (1.0231) acc1: 74.3490 (74.3490) acc5: 92.5781 (92.5781) time: 6.8800 data: 6.6439 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0515 (1.0692) acc1: 72.6562 (70.4320) acc5: 92.5781 (90.2720) time: 1.5240 data: 1.3317 max mem: 55573 Test: Total time: 0:00:07 (1.5415 s / it) * Acc@1 71.088 Acc@5 90.538 loss 1.090 Accuracy of the model EMA on 50000 test images: 71.1% Max EMA accuracy: 71.09% Epoch: [240] [ 0/156] eta: 0:38:12 lr: 0.000874 min_lr: 0.000874 loss: 3.3980 (3.3980) weight_decay: 0.0500 (0.0500) time: 14.6971 data: 8.7712 max mem: 55573 Epoch: [240] [ 10/156] eta: 0:04:33 lr: 0.000872 min_lr: 0.000872 loss: 2.9356 (2.9409) weight_decay: 0.0500 (0.0500) time: 1.8700 data: 0.7977 max mem: 55573 Epoch: [240] [ 20/156] eta: 0:02:51 lr: 0.000870 min_lr: 0.000870 loss: 2.9447 (2.9405) weight_decay: 0.0500 (0.0500) time: 0.5881 data: 0.0005 max mem: 55573 Epoch: [240] [ 30/156] eta: 0:02:11 lr: 0.000868 min_lr: 0.000868 loss: 3.1113 (2.9153) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [240] [ 40/156] eta: 0:01:48 lr: 0.000866 min_lr: 0.000866 loss: 3.0330 (2.9265) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [240] [ 50/156] eta: 0:01:31 lr: 0.000865 min_lr: 0.000865 loss: 2.9506 (2.9259) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [240] [ 60/156] eta: 0:01:18 lr: 0.000863 min_lr: 0.000863 loss: 2.9410 (2.9047) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [240] [ 70/156] eta: 0:01:07 lr: 0.000861 min_lr: 0.000861 loss: 3.0100 (2.9196) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [240] [ 80/156] eta: 0:00:58 lr: 0.000859 min_lr: 0.000859 loss: 2.9925 (2.9168) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [240] [ 90/156] eta: 0:00:49 lr: 0.000857 min_lr: 0.000857 loss: 3.0491 (2.9331) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [240] [100/156] eta: 0:00:40 lr: 0.000856 min_lr: 0.000856 loss: 3.2209 (2.9435) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [240] [110/156] eta: 0:00:32 lr: 0.000854 min_lr: 0.000854 loss: 3.1894 (2.9450) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [240] [120/156] eta: 0:00:25 lr: 0.000852 min_lr: 0.000852 loss: 3.0388 (2.9504) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [240] [130/156] eta: 0:00:18 lr: 0.000850 min_lr: 0.000850 loss: 3.0539 (2.9530) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0026 max mem: 55573 Epoch: [240] [140/156] eta: 0:00:11 lr: 0.000849 min_lr: 0.000849 loss: 3.0896 (2.9631) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0024 max mem: 55573 Epoch: [240] [150/156] eta: 0:00:04 lr: 0.000847 min_lr: 0.000847 loss: 3.1142 (2.9701) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [240] [155/156] eta: 0:00:00 lr: 0.000846 min_lr: 0.000846 loss: 3.1142 (2.9636) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [240] Total time: 0:01:47 (0.6863 s / it) Averaged stats: lr: 0.000846 min_lr: 0.000846 loss: 3.1142 (2.9940) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8776 (0.8776) acc1: 82.9427 (82.9427) acc5: 96.7448 (96.7448) time: 6.8378 data: 6.6001 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9537 (0.9608) acc1: 80.5990 (79.0400) acc5: 96.7448 (95.6160) time: 1.5144 data: 1.3201 max mem: 55573 Test: Total time: 0:00:07 (1.5628 s / it) * Acc@1 80.326 Acc@5 95.380 loss 0.954 Accuracy of the model on the 50000 test images: 80.3% Max accuracy: 80.36% Test: [0/5] eta: 0:00:36 loss: 1.0224 (1.0224) acc1: 74.3490 (74.3490) acc5: 92.5781 (92.5781) time: 7.3874 data: 7.1513 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0509 (1.0684) acc1: 72.6562 (70.4320) acc5: 92.5781 (90.2720) time: 1.6228 data: 1.4303 max mem: 55573 Test: Total time: 0:00:08 (1.6483 s / it) * Acc@1 71.120 Acc@5 90.564 loss 1.089 Accuracy of the model EMA on 50000 test images: 71.1% Max EMA accuracy: 71.12% Epoch: [241] [ 0/156] eta: 0:36:18 lr: 0.000846 min_lr: 0.000846 loss: 3.1426 (3.1426) weight_decay: 0.0500 (0.0500) time: 13.9625 data: 13.3665 max mem: 55573 Epoch: [241] [ 10/156] eta: 0:04:23 lr: 0.000844 min_lr: 0.000844 loss: 3.1426 (2.9547) weight_decay: 0.0500 (0.0500) time: 1.8048 data: 1.2155 max mem: 55573 Epoch: [241] [ 20/156] eta: 0:02:46 lr: 0.000842 min_lr: 0.000842 loss: 3.0177 (2.9242) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [241] [ 30/156] eta: 0:02:08 lr: 0.000840 min_lr: 0.000840 loss: 3.0799 (2.9111) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [241] [ 40/156] eta: 0:01:46 lr: 0.000839 min_lr: 0.000839 loss: 3.1768 (2.9651) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [241] [ 50/156] eta: 0:01:30 lr: 0.000837 min_lr: 0.000837 loss: 3.2439 (3.0067) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [241] [ 60/156] eta: 0:01:17 lr: 0.000835 min_lr: 0.000835 loss: 3.2378 (3.0225) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [241] [ 70/156] eta: 0:01:06 lr: 0.000833 min_lr: 0.000833 loss: 3.1312 (3.0286) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [241] [ 80/156] eta: 0:00:57 lr: 0.000832 min_lr: 0.000832 loss: 2.9947 (3.0092) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [241] [ 90/156] eta: 0:00:48 lr: 0.000830 min_lr: 0.000830 loss: 2.8429 (3.0064) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [241] [100/156] eta: 0:00:40 lr: 0.000828 min_lr: 0.000828 loss: 3.0716 (3.0118) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [241] [110/156] eta: 0:00:32 lr: 0.000826 min_lr: 0.000826 loss: 3.2231 (3.0323) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [241] [120/156] eta: 0:00:25 lr: 0.000825 min_lr: 0.000825 loss: 3.2362 (3.0376) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [241] [130/156] eta: 0:00:17 lr: 0.000823 min_lr: 0.000823 loss: 3.0454 (3.0267) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0010 max mem: 55573 Epoch: [241] [140/156] eta: 0:00:10 lr: 0.000821 min_lr: 0.000821 loss: 3.1446 (3.0322) weight_decay: 0.0500 (0.0500) time: 0.5847 data: 0.0009 max mem: 55573 Epoch: [241] [150/156] eta: 0:00:04 lr: 0.000819 min_lr: 0.000819 loss: 2.9341 (3.0014) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [241] [155/156] eta: 0:00:00 lr: 0.000819 min_lr: 0.000819 loss: 2.5782 (2.9863) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [241] Total time: 0:01:46 (0.6815 s / it) Averaged stats: lr: 0.000819 min_lr: 0.000819 loss: 2.5782 (2.9824) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8157 (0.8157) acc1: 83.4635 (83.4635) acc5: 96.8750 (96.8750) time: 6.9084 data: 6.6708 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9151 (0.8980) acc1: 79.5573 (79.3920) acc5: 96.8750 (95.6160) time: 1.5282 data: 1.3343 max mem: 55573 Test: Total time: 0:00:07 (1.5833 s / it) * Acc@1 80.422 Acc@5 95.600 loss 0.890 Accuracy of the model on the 50000 test images: 80.4% Max accuracy: 80.42% Test: [0/5] eta: 0:00:33 loss: 1.0214 (1.0214) acc1: 74.3490 (74.3490) acc5: 92.4479 (92.4479) time: 6.7254 data: 6.4893 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0500 (1.0676) acc1: 72.9167 (70.5280) acc5: 92.4479 (90.2400) time: 1.4904 data: 1.2980 max mem: 55573 Test: Total time: 0:00:07 (1.5079 s / it) * Acc@1 71.148 Acc@5 90.576 loss 1.088 Accuracy of the model EMA on 50000 test images: 71.1% Max EMA accuracy: 71.15% Epoch: [242] [ 0/156] eta: 0:31:25 lr: 0.000818 min_lr: 0.000818 loss: 2.7945 (2.7945) weight_decay: 0.0500 (0.0500) time: 12.0856 data: 8.9755 max mem: 55573 Epoch: [242] [ 10/156] eta: 0:04:02 lr: 0.000817 min_lr: 0.000817 loss: 3.2077 (3.0510) weight_decay: 0.0500 (0.0500) time: 1.6632 data: 0.8297 max mem: 55573 Epoch: [242] [ 20/156] eta: 0:02:36 lr: 0.000815 min_lr: 0.000815 loss: 3.2077 (3.0223) weight_decay: 0.0500 (0.0500) time: 0.6051 data: 0.0078 max mem: 55573 Epoch: [242] [ 30/156] eta: 0:02:02 lr: 0.000813 min_lr: 0.000813 loss: 2.8132 (3.0070) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [242] [ 40/156] eta: 0:01:41 lr: 0.000811 min_lr: 0.000811 loss: 2.9504 (2.9950) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [242] [ 50/156] eta: 0:01:27 lr: 0.000810 min_lr: 0.000810 loss: 3.0321 (3.0186) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [242] [ 60/156] eta: 0:01:15 lr: 0.000808 min_lr: 0.000808 loss: 3.0670 (2.9828) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [242] [ 70/156] eta: 0:01:04 lr: 0.000806 min_lr: 0.000806 loss: 2.9167 (2.9859) weight_decay: 0.0500 (0.0500) time: 0.5882 data: 0.0005 max mem: 55573 Epoch: [242] [ 80/156] eta: 0:00:55 lr: 0.000805 min_lr: 0.000805 loss: 2.9167 (2.9679) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0005 max mem: 55573 Epoch: [242] [ 90/156] eta: 0:00:47 lr: 0.000803 min_lr: 0.000803 loss: 2.8917 (2.9524) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [242] [100/156] eta: 0:00:39 lr: 0.000801 min_lr: 0.000801 loss: 3.0962 (2.9727) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [242] [110/156] eta: 0:00:32 lr: 0.000799 min_lr: 0.000799 loss: 3.1545 (2.9711) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [242] [120/156] eta: 0:00:24 lr: 0.000798 min_lr: 0.000798 loss: 3.1514 (2.9780) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [242] [130/156] eta: 0:00:17 lr: 0.000796 min_lr: 0.000796 loss: 3.2966 (2.9971) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0010 max mem: 55573 Epoch: [242] [140/156] eta: 0:00:10 lr: 0.000794 min_lr: 0.000794 loss: 3.2589 (2.9956) weight_decay: 0.0500 (0.0500) time: 0.5844 data: 0.0008 max mem: 55573 Epoch: [242] [150/156] eta: 0:00:03 lr: 0.000792 min_lr: 0.000792 loss: 3.1391 (2.9873) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [242] [155/156] eta: 0:00:00 lr: 0.000792 min_lr: 0.000792 loss: 3.1605 (2.9902) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [242] Total time: 0:01:44 (0.6705 s / it) Averaged stats: lr: 0.000792 min_lr: 0.000792 loss: 3.1605 (2.9962) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9076 (0.9076) acc1: 82.1615 (82.1615) acc5: 96.6146 (96.6146) time: 6.8576 data: 6.6203 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9609 (0.9641) acc1: 81.2500 (79.2320) acc5: 96.6146 (95.5840) time: 1.5179 data: 1.3241 max mem: 55573 Test: Total time: 0:00:07 (1.5607 s / it) * Acc@1 80.460 Acc@5 95.506 loss 0.952 Accuracy of the model on the 50000 test images: 80.5% Max accuracy: 80.46% Test: [0/5] eta: 0:00:33 loss: 1.0207 (1.0207) acc1: 74.4792 (74.4792) acc5: 92.4479 (92.4479) time: 6.7189 data: 6.4829 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0494 (1.0670) acc1: 72.9167 (70.4960) acc5: 92.4479 (90.2400) time: 1.4891 data: 1.2967 max mem: 55573 Test: Total time: 0:00:07 (1.5063 s / it) * Acc@1 71.186 Acc@5 90.580 loss 1.087 Accuracy of the model EMA on 50000 test images: 71.2% Max EMA accuracy: 71.19% Epoch: [243] [ 0/156] eta: 0:32:13 lr: 0.000791 min_lr: 0.000791 loss: 2.8609 (2.8609) weight_decay: 0.0500 (0.0500) time: 12.3911 data: 11.6840 max mem: 55573 Epoch: [243] [ 10/156] eta: 0:04:04 lr: 0.000790 min_lr: 0.000790 loss: 3.1559 (2.9092) weight_decay: 0.0500 (0.0500) time: 1.6719 data: 1.0627 max mem: 55573 Epoch: [243] [ 20/156] eta: 0:02:37 lr: 0.000788 min_lr: 0.000788 loss: 3.1859 (2.9610) weight_decay: 0.0500 (0.0500) time: 0.5949 data: 0.0005 max mem: 55573 Epoch: [243] [ 30/156] eta: 0:02:02 lr: 0.000786 min_lr: 0.000786 loss: 3.0720 (2.9136) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [243] [ 40/156] eta: 0:01:42 lr: 0.000785 min_lr: 0.000785 loss: 2.7843 (2.8974) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [243] [ 50/156] eta: 0:01:27 lr: 0.000783 min_lr: 0.000783 loss: 2.9405 (2.8930) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [243] [ 60/156] eta: 0:01:15 lr: 0.000781 min_lr: 0.000781 loss: 3.0915 (2.9048) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [243] [ 70/156] eta: 0:01:05 lr: 0.000779 min_lr: 0.000779 loss: 3.0915 (2.9151) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [243] [ 80/156] eta: 0:00:56 lr: 0.000778 min_lr: 0.000778 loss: 2.8911 (2.8938) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [243] [ 90/156] eta: 0:00:47 lr: 0.000776 min_lr: 0.000776 loss: 3.0735 (2.9255) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [243] [100/156] eta: 0:00:39 lr: 0.000774 min_lr: 0.000774 loss: 3.0735 (2.9047) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [243] [110/156] eta: 0:00:32 lr: 0.000773 min_lr: 0.000773 loss: 2.9475 (2.9097) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [243] [120/156] eta: 0:00:24 lr: 0.000771 min_lr: 0.000771 loss: 2.9738 (2.9108) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [243] [130/156] eta: 0:00:17 lr: 0.000769 min_lr: 0.000769 loss: 2.9585 (2.9037) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0012 max mem: 55573 Epoch: [243] [140/156] eta: 0:00:10 lr: 0.000768 min_lr: 0.000768 loss: 3.1787 (2.9112) weight_decay: 0.0500 (0.0500) time: 0.5852 data: 0.0010 max mem: 55573 Epoch: [243] [150/156] eta: 0:00:04 lr: 0.000766 min_lr: 0.000766 loss: 2.9596 (2.9108) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [243] [155/156] eta: 0:00:00 lr: 0.000765 min_lr: 0.000765 loss: 2.9596 (2.9114) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [243] Total time: 0:01:44 (0.6715 s / it) Averaged stats: lr: 0.000765 min_lr: 0.000765 loss: 2.9596 (2.9675) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.7999 (0.7999) acc1: 84.6354 (84.6354) acc5: 97.2656 (97.2656) time: 6.9459 data: 6.7085 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9187 (0.8826) acc1: 81.1198 (80.1600) acc5: 95.9635 (95.6160) time: 1.5356 data: 1.3418 max mem: 55573 Test: Total time: 0:00:07 (1.5738 s / it) * Acc@1 80.684 Acc@5 95.530 loss 0.874 Accuracy of the model on the 50000 test images: 80.7% Max accuracy: 80.68% Test: [0/5] eta: 0:00:34 loss: 1.0196 (1.0196) acc1: 74.4792 (74.4792) acc5: 92.5781 (92.5781) time: 6.8698 data: 6.6338 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0485 (1.0660) acc1: 72.9167 (70.4320) acc5: 92.5781 (90.2720) time: 1.5194 data: 1.3269 max mem: 55573 Test: Total time: 0:00:07 (1.5341 s / it) * Acc@1 71.206 Acc@5 90.590 loss 1.086 Accuracy of the model EMA on 50000 test images: 71.2% Max EMA accuracy: 71.21% Epoch: [244] [ 0/156] eta: 0:37:17 lr: 0.000765 min_lr: 0.000765 loss: 3.3999 (3.3999) weight_decay: 0.0500 (0.0500) time: 14.3454 data: 10.2577 max mem: 55573 Epoch: [244] [ 10/156] eta: 0:04:29 lr: 0.000763 min_lr: 0.000763 loss: 3.2380 (2.9095) weight_decay: 0.0500 (0.0500) time: 1.8445 data: 0.9329 max mem: 55573 Epoch: [244] [ 20/156] eta: 0:02:49 lr: 0.000761 min_lr: 0.000761 loss: 2.8900 (2.8631) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0004 max mem: 55573 Epoch: [244] [ 30/156] eta: 0:02:10 lr: 0.000760 min_lr: 0.000760 loss: 2.9914 (2.9104) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0004 max mem: 55573 Epoch: [244] [ 40/156] eta: 0:01:47 lr: 0.000758 min_lr: 0.000758 loss: 3.1919 (2.9976) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0005 max mem: 55573 Epoch: [244] [ 50/156] eta: 0:01:31 lr: 0.000756 min_lr: 0.000756 loss: 3.1919 (3.0007) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [244] [ 60/156] eta: 0:01:18 lr: 0.000755 min_lr: 0.000755 loss: 3.2072 (3.0139) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [244] [ 70/156] eta: 0:01:07 lr: 0.000753 min_lr: 0.000753 loss: 3.2063 (3.0056) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [244] [ 80/156] eta: 0:00:57 lr: 0.000751 min_lr: 0.000751 loss: 3.1858 (3.0098) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [244] [ 90/156] eta: 0:00:49 lr: 0.000750 min_lr: 0.000750 loss: 3.2160 (3.0297) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [244] [100/156] eta: 0:00:40 lr: 0.000748 min_lr: 0.000748 loss: 3.2142 (3.0297) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [244] [110/156] eta: 0:00:32 lr: 0.000746 min_lr: 0.000746 loss: 3.0204 (3.0280) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [244] [120/156] eta: 0:00:25 lr: 0.000745 min_lr: 0.000745 loss: 2.9828 (3.0102) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [244] [130/156] eta: 0:00:18 lr: 0.000743 min_lr: 0.000743 loss: 2.9828 (3.0103) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0011 max mem: 55573 Epoch: [244] [140/156] eta: 0:00:11 lr: 0.000741 min_lr: 0.000741 loss: 3.0697 (3.0116) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0009 max mem: 55573 Epoch: [244] [150/156] eta: 0:00:04 lr: 0.000740 min_lr: 0.000740 loss: 3.2031 (3.0209) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [244] [155/156] eta: 0:00:00 lr: 0.000739 min_lr: 0.000739 loss: 3.1393 (3.0113) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [244] Total time: 0:01:46 (0.6852 s / it) Averaged stats: lr: 0.000739 min_lr: 0.000739 loss: 3.1393 (2.9743) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.8708 (0.8708) acc1: 83.2031 (83.2031) acc5: 96.3542 (96.3542) time: 7.0651 data: 6.8277 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9747 (0.9723) acc1: 81.2500 (79.9680) acc5: 96.3542 (95.6480) time: 1.5593 data: 1.3656 max mem: 55573 Test: Total time: 0:00:08 (1.6038 s / it) * Acc@1 80.662 Acc@5 95.568 loss 0.961 Accuracy of the model on the 50000 test images: 80.7% Max accuracy: 80.68% Test: [0/5] eta: 0:00:35 loss: 1.0190 (1.0190) acc1: 74.4792 (74.4792) acc5: 92.5781 (92.5781) time: 7.0807 data: 6.8446 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0479 (1.0654) acc1: 72.9167 (70.4960) acc5: 92.5781 (90.2400) time: 1.5614 data: 1.3690 max mem: 55573 Test: Total time: 0:00:07 (1.5852 s / it) * Acc@1 71.222 Acc@5 90.616 loss 1.086 Accuracy of the model EMA on 50000 test images: 71.2% Max EMA accuracy: 71.22% Epoch: [245] [ 0/156] eta: 0:35:58 lr: 0.000739 min_lr: 0.000739 loss: 3.3691 (3.3691) weight_decay: 0.0500 (0.0500) time: 13.8396 data: 11.0724 max mem: 55573 Epoch: [245] [ 10/156] eta: 0:04:23 lr: 0.000737 min_lr: 0.000737 loss: 3.0555 (2.9300) weight_decay: 0.0500 (0.0500) time: 1.8048 data: 1.0069 max mem: 55573 Epoch: [245] [ 20/156] eta: 0:02:46 lr: 0.000735 min_lr: 0.000735 loss: 3.1110 (3.0389) weight_decay: 0.0500 (0.0500) time: 0.5964 data: 0.0004 max mem: 55573 Epoch: [245] [ 30/156] eta: 0:02:08 lr: 0.000734 min_lr: 0.000734 loss: 3.1110 (2.9886) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [245] [ 40/156] eta: 0:01:46 lr: 0.000732 min_lr: 0.000732 loss: 3.0408 (3.0518) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [245] [ 50/156] eta: 0:01:30 lr: 0.000730 min_lr: 0.000730 loss: 3.2732 (3.0539) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [245] [ 60/156] eta: 0:01:17 lr: 0.000729 min_lr: 0.000729 loss: 3.2088 (3.0512) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [245] [ 70/156] eta: 0:01:06 lr: 0.000727 min_lr: 0.000727 loss: 3.0619 (3.0374) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [245] [ 80/156] eta: 0:00:57 lr: 0.000725 min_lr: 0.000725 loss: 2.9237 (3.0093) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [245] [ 90/156] eta: 0:00:48 lr: 0.000724 min_lr: 0.000724 loss: 2.8219 (2.9697) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [245] [100/156] eta: 0:00:40 lr: 0.000722 min_lr: 0.000722 loss: 2.8943 (2.9644) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [245] [110/156] eta: 0:00:32 lr: 0.000720 min_lr: 0.000720 loss: 2.9016 (2.9546) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [245] [120/156] eta: 0:00:25 lr: 0.000719 min_lr: 0.000719 loss: 2.9137 (2.9537) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [245] [130/156] eta: 0:00:18 lr: 0.000717 min_lr: 0.000717 loss: 3.0493 (2.9458) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0013 max mem: 55573 Epoch: [245] [140/156] eta: 0:00:10 lr: 0.000716 min_lr: 0.000716 loss: 2.9955 (2.9513) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.0012 max mem: 55573 Epoch: [245] [150/156] eta: 0:00:04 lr: 0.000714 min_lr: 0.000714 loss: 3.0130 (2.9527) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [245] [155/156] eta: 0:00:00 lr: 0.000713 min_lr: 0.000713 loss: 3.0130 (2.9489) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [245] Total time: 0:01:46 (0.6814 s / it) Averaged stats: lr: 0.000713 min_lr: 0.000713 loss: 3.0130 (2.9844) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8974 (0.8974) acc1: 83.3333 (83.3333) acc5: 96.4844 (96.4844) time: 6.6254 data: 6.3882 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9883 (0.9832) acc1: 80.8594 (80.2880) acc5: 96.3542 (95.3280) time: 1.4703 data: 1.2777 max mem: 55573 Test: Total time: 0:00:07 (1.4867 s / it) * Acc@1 80.540 Acc@5 95.402 loss 0.979 Accuracy of the model on the 50000 test images: 80.5% Max accuracy: 80.68% Test: [0/5] eta: 0:00:35 loss: 1.0181 (1.0181) acc1: 74.3490 (74.3490) acc5: 92.5781 (92.5781) time: 7.1249 data: 6.8885 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0471 (1.0646) acc1: 73.0469 (70.4960) acc5: 92.5781 (90.2400) time: 1.5702 data: 1.3778 max mem: 55573 Test: Total time: 0:00:07 (1.5937 s / it) * Acc@1 71.258 Acc@5 90.636 loss 1.085 Accuracy of the model EMA on 50000 test images: 71.3% Max EMA accuracy: 71.26% Epoch: [246] [ 0/156] eta: 0:33:02 lr: 0.000713 min_lr: 0.000713 loss: 2.6407 (2.6407) weight_decay: 0.0500 (0.0500) time: 12.7065 data: 9.8186 max mem: 55573 Epoch: [246] [ 10/156] eta: 0:04:08 lr: 0.000711 min_lr: 0.000711 loss: 3.1739 (2.9320) weight_decay: 0.0500 (0.0500) time: 1.7017 data: 0.8931 max mem: 55573 Epoch: [246] [ 20/156] eta: 0:02:39 lr: 0.000710 min_lr: 0.000710 loss: 3.1739 (2.9681) weight_decay: 0.0500 (0.0500) time: 0.5973 data: 0.0005 max mem: 55573 Epoch: [246] [ 30/156] eta: 0:02:04 lr: 0.000708 min_lr: 0.000708 loss: 3.1449 (2.9013) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [246] [ 40/156] eta: 0:01:43 lr: 0.000706 min_lr: 0.000706 loss: 2.8731 (2.8855) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [246] [ 50/156] eta: 0:01:28 lr: 0.000705 min_lr: 0.000705 loss: 3.0080 (2.9249) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [246] [ 60/156] eta: 0:01:15 lr: 0.000703 min_lr: 0.000703 loss: 3.2292 (2.9537) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [246] [ 70/156] eta: 0:01:05 lr: 0.000701 min_lr: 0.000701 loss: 3.0743 (2.9324) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [246] [ 80/156] eta: 0:00:56 lr: 0.000700 min_lr: 0.000700 loss: 2.9237 (2.9351) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [246] [ 90/156] eta: 0:00:47 lr: 0.000698 min_lr: 0.000698 loss: 2.9237 (2.9314) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [246] [100/156] eta: 0:00:39 lr: 0.000697 min_lr: 0.000697 loss: 3.0877 (2.9437) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [246] [110/156] eta: 0:00:32 lr: 0.000695 min_lr: 0.000695 loss: 2.9590 (2.9354) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [246] [120/156] eta: 0:00:24 lr: 0.000693 min_lr: 0.000693 loss: 2.8653 (2.9331) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [246] [130/156] eta: 0:00:17 lr: 0.000692 min_lr: 0.000692 loss: 3.1678 (2.9456) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0010 max mem: 55573 Epoch: [246] [140/156] eta: 0:00:10 lr: 0.000690 min_lr: 0.000690 loss: 3.2163 (2.9557) weight_decay: 0.0500 (0.0500) time: 0.5872 data: 0.0009 max mem: 55573 Epoch: [246] [150/156] eta: 0:00:04 lr: 0.000689 min_lr: 0.000689 loss: 3.1388 (2.9570) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [246] [155/156] eta: 0:00:00 lr: 0.000688 min_lr: 0.000688 loss: 3.1392 (2.9550) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0002 max mem: 55573 Epoch: [246] Total time: 0:01:45 (0.6748 s / it) Averaged stats: lr: 0.000688 min_lr: 0.000688 loss: 3.1392 (2.9773) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:32 loss: 0.8925 (0.8925) acc1: 83.3333 (83.3333) acc5: 95.9635 (95.9635) time: 6.5925 data: 6.3554 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9951 (0.9806) acc1: 81.5104 (79.8080) acc5: 95.9635 (95.4240) time: 1.4638 data: 1.2712 max mem: 55573 Test: Total time: 0:00:07 (1.4982 s / it) * Acc@1 80.752 Acc@5 95.538 loss 0.972 Accuracy of the model on the 50000 test images: 80.8% Max accuracy: 80.75% Test: [0/5] eta: 0:00:34 loss: 1.0175 (1.0175) acc1: 74.4792 (74.4792) acc5: 92.5781 (92.5781) time: 6.8623 data: 6.6263 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0464 (1.0640) acc1: 72.9167 (70.4960) acc5: 92.5781 (90.2720) time: 1.5177 data: 1.3254 max mem: 55573 Test: Total time: 0:00:07 (1.5398 s / it) * Acc@1 71.284 Acc@5 90.654 loss 1.084 Accuracy of the model EMA on 50000 test images: 71.3% Max EMA accuracy: 71.28% Epoch: [247] [ 0/156] eta: 0:33:39 lr: 0.000688 min_lr: 0.000688 loss: 2.8782 (2.8782) weight_decay: 0.0500 (0.0500) time: 12.9427 data: 12.3420 max mem: 55573 Epoch: [247] [ 10/156] eta: 0:04:10 lr: 0.000686 min_lr: 0.000686 loss: 3.2311 (2.9350) weight_decay: 0.0500 (0.0500) time: 1.7158 data: 1.1225 max mem: 55573 Epoch: [247] [ 20/156] eta: 0:02:40 lr: 0.000684 min_lr: 0.000684 loss: 2.7026 (2.8100) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [247] [ 30/156] eta: 0:02:04 lr: 0.000683 min_lr: 0.000683 loss: 2.8381 (2.8364) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [247] [ 40/156] eta: 0:01:43 lr: 0.000681 min_lr: 0.000681 loss: 2.9502 (2.8785) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [247] [ 50/156] eta: 0:01:28 lr: 0.000680 min_lr: 0.000680 loss: 3.0865 (2.9118) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [247] [ 60/156] eta: 0:01:16 lr: 0.000678 min_lr: 0.000678 loss: 3.0022 (2.8722) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [247] [ 70/156] eta: 0:01:05 lr: 0.000676 min_lr: 0.000676 loss: 2.9411 (2.8970) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [247] [ 80/156] eta: 0:00:56 lr: 0.000675 min_lr: 0.000675 loss: 3.0902 (2.8944) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [247] [ 90/156] eta: 0:00:47 lr: 0.000673 min_lr: 0.000673 loss: 2.9805 (2.9011) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [247] [100/156] eta: 0:00:39 lr: 0.000672 min_lr: 0.000672 loss: 2.9805 (2.8947) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [247] [110/156] eta: 0:00:32 lr: 0.000670 min_lr: 0.000670 loss: 2.9990 (2.8989) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [247] [120/156] eta: 0:00:24 lr: 0.000668 min_lr: 0.000668 loss: 2.9809 (2.8979) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [247] [130/156] eta: 0:00:17 lr: 0.000667 min_lr: 0.000667 loss: 2.9809 (2.9025) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0010 max mem: 55573 Epoch: [247] [140/156] eta: 0:00:10 lr: 0.000665 min_lr: 0.000665 loss: 3.1077 (2.9055) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0009 max mem: 55573 Epoch: [247] [150/156] eta: 0:00:04 lr: 0.000664 min_lr: 0.000664 loss: 3.0374 (2.9037) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [247] [155/156] eta: 0:00:00 lr: 0.000663 min_lr: 0.000663 loss: 3.0729 (2.9079) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [247] Total time: 0:01:45 (0.6750 s / it) Averaged stats: lr: 0.000663 min_lr: 0.000663 loss: 3.0729 (2.9592) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8610 (0.8610) acc1: 83.8542 (83.8542) acc5: 96.7448 (96.7448) time: 6.9495 data: 6.7121 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9372 (0.9487) acc1: 81.3802 (79.8080) acc5: 96.6146 (95.5520) time: 1.5364 data: 1.3425 max mem: 55573 Test: Total time: 0:00:07 (1.5826 s / it) * Acc@1 80.926 Acc@5 95.540 loss 0.929 Accuracy of the model on the 50000 test images: 80.9% Max accuracy: 80.93% Test: [0/5] eta: 0:00:32 loss: 1.0165 (1.0165) acc1: 74.6094 (74.6094) acc5: 92.9688 (92.9688) time: 6.5870 data: 6.3509 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0455 (1.0631) acc1: 72.9167 (70.5920) acc5: 92.9688 (90.3680) time: 1.4631 data: 1.2703 max mem: 55573 Test: Total time: 0:00:07 (1.4784 s / it) * Acc@1 71.322 Acc@5 90.690 loss 1.084 Accuracy of the model EMA on 50000 test images: 71.3% Max EMA accuracy: 71.32% Epoch: [248] [ 0/156] eta: 0:31:02 lr: 0.000663 min_lr: 0.000663 loss: 2.1806 (2.1806) weight_decay: 0.0500 (0.0500) time: 11.9395 data: 10.6523 max mem: 55573 Epoch: [248] [ 10/156] eta: 0:04:08 lr: 0.000661 min_lr: 0.000661 loss: 2.9802 (2.9472) weight_decay: 0.0500 (0.0500) time: 1.7016 data: 0.9688 max mem: 55573 Epoch: [248] [ 20/156] eta: 0:02:39 lr: 0.000659 min_lr: 0.000659 loss: 2.9802 (2.9646) weight_decay: 0.0500 (0.0500) time: 0.6342 data: 0.0004 max mem: 55573 Epoch: [248] [ 30/156] eta: 0:02:04 lr: 0.000658 min_lr: 0.000658 loss: 3.1493 (2.9802) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [248] [ 40/156] eta: 0:01:43 lr: 0.000656 min_lr: 0.000656 loss: 3.2330 (2.9500) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [248] [ 50/156] eta: 0:01:27 lr: 0.000655 min_lr: 0.000655 loss: 3.2184 (3.0073) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [248] [ 60/156] eta: 0:01:15 lr: 0.000653 min_lr: 0.000653 loss: 3.1678 (2.9907) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [248] [ 70/156] eta: 0:01:05 lr: 0.000652 min_lr: 0.000652 loss: 3.0659 (2.9861) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [248] [ 80/156] eta: 0:00:56 lr: 0.000650 min_lr: 0.000650 loss: 2.8058 (2.9423) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [248] [ 90/156] eta: 0:00:47 lr: 0.000648 min_lr: 0.000648 loss: 2.8035 (2.9423) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [248] [100/156] eta: 0:00:39 lr: 0.000647 min_lr: 0.000647 loss: 3.0561 (2.9374) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [248] [110/156] eta: 0:00:32 lr: 0.000645 min_lr: 0.000645 loss: 3.1550 (2.9463) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [248] [120/156] eta: 0:00:24 lr: 0.000644 min_lr: 0.000644 loss: 3.1226 (2.9425) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [248] [130/156] eta: 0:00:17 lr: 0.000642 min_lr: 0.000642 loss: 2.8107 (2.9193) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0010 max mem: 55573 Epoch: [248] [140/156] eta: 0:00:10 lr: 0.000641 min_lr: 0.000641 loss: 2.5655 (2.8975) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0008 max mem: 55573 Epoch: [248] [150/156] eta: 0:00:04 lr: 0.000639 min_lr: 0.000639 loss: 2.6729 (2.8937) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [248] [155/156] eta: 0:00:00 lr: 0.000638 min_lr: 0.000638 loss: 2.6904 (2.8941) weight_decay: 0.0500 (0.0500) time: 0.5831 data: 0.0001 max mem: 55573 Epoch: [248] Total time: 0:01:45 (0.6744 s / it) Averaged stats: lr: 0.000638 min_lr: 0.000638 loss: 2.6904 (2.9557) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8047 (0.8047) acc1: 83.7240 (83.7240) acc5: 96.3542 (96.3542) time: 6.7582 data: 6.5212 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8819 (0.8884) acc1: 81.2500 (79.7440) acc5: 96.3542 (95.4240) time: 1.4980 data: 1.3043 max mem: 55573 Test: Total time: 0:00:07 (1.5450 s / it) * Acc@1 80.788 Acc@5 95.544 loss 0.869 Accuracy of the model on the 50000 test images: 80.8% Max accuracy: 80.93% Test: [0/5] eta: 0:00:36 loss: 1.0159 (1.0159) acc1: 74.7396 (74.7396) acc5: 92.8385 (92.8385) time: 7.2882 data: 7.0522 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0448 (1.0624) acc1: 72.9167 (70.5920) acc5: 92.8385 (90.4000) time: 1.6028 data: 1.4105 max mem: 55573 Test: Total time: 0:00:08 (1.6242 s / it) * Acc@1 71.350 Acc@5 90.690 loss 1.083 Accuracy of the model EMA on 50000 test images: 71.4% Max EMA accuracy: 71.35% Epoch: [249] [ 0/156] eta: 0:34:06 lr: 0.000638 min_lr: 0.000638 loss: 3.1490 (3.1490) weight_decay: 0.0500 (0.0500) time: 13.1209 data: 9.6996 max mem: 55573 Epoch: [249] [ 10/156] eta: 0:04:12 lr: 0.000637 min_lr: 0.000637 loss: 2.9597 (2.7495) weight_decay: 0.0500 (0.0500) time: 1.7328 data: 0.8822 max mem: 55573 Epoch: [249] [ 20/156] eta: 0:02:41 lr: 0.000635 min_lr: 0.000635 loss: 2.9597 (2.8714) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [249] [ 30/156] eta: 0:02:05 lr: 0.000633 min_lr: 0.000633 loss: 3.0604 (2.9169) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [249] [ 40/156] eta: 0:01:44 lr: 0.000632 min_lr: 0.000632 loss: 3.1562 (2.9564) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [249] [ 50/156] eta: 0:01:28 lr: 0.000630 min_lr: 0.000630 loss: 3.2065 (3.0046) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [249] [ 60/156] eta: 0:01:16 lr: 0.000629 min_lr: 0.000629 loss: 3.1668 (3.0245) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0005 max mem: 55573 Epoch: [249] [ 70/156] eta: 0:01:06 lr: 0.000627 min_lr: 0.000627 loss: 3.1482 (3.0206) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0005 max mem: 55573 Epoch: [249] [ 80/156] eta: 0:00:56 lr: 0.000626 min_lr: 0.000626 loss: 3.0365 (2.9783) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [249] [ 90/156] eta: 0:00:48 lr: 0.000624 min_lr: 0.000624 loss: 2.9328 (2.9896) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [249] [100/156] eta: 0:00:40 lr: 0.000623 min_lr: 0.000623 loss: 3.0039 (2.9723) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [249] [110/156] eta: 0:00:32 lr: 0.000621 min_lr: 0.000621 loss: 3.0492 (2.9870) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0005 max mem: 55573 Epoch: [249] [120/156] eta: 0:00:25 lr: 0.000620 min_lr: 0.000620 loss: 3.2353 (2.9849) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [249] [130/156] eta: 0:00:17 lr: 0.000618 min_lr: 0.000618 loss: 2.9726 (2.9711) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0010 max mem: 55573 Epoch: [249] [140/156] eta: 0:00:10 lr: 0.000616 min_lr: 0.000616 loss: 2.9929 (2.9847) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.0009 max mem: 55573 Epoch: [249] [150/156] eta: 0:00:04 lr: 0.000615 min_lr: 0.000615 loss: 3.1881 (2.9904) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [249] [155/156] eta: 0:00:00 lr: 0.000614 min_lr: 0.000614 loss: 3.0254 (2.9835) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [249] Total time: 0:01:45 (0.6766 s / it) Averaged stats: lr: 0.000614 min_lr: 0.000614 loss: 3.0254 (2.9614) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8010 (0.8010) acc1: 82.4219 (82.4219) acc5: 96.2240 (96.2240) time: 6.8848 data: 6.6453 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8894 (0.8971) acc1: 81.1198 (79.6480) acc5: 96.2240 (95.2640) time: 1.5238 data: 1.3292 max mem: 55573 Test: Total time: 0:00:07 (1.5682 s / it) * Acc@1 80.894 Acc@5 95.680 loss 0.881 Accuracy of the model on the 50000 test images: 80.9% Max accuracy: 80.93% Test: [0/5] eta: 0:00:33 loss: 1.0154 (1.0154) acc1: 74.7396 (74.7396) acc5: 92.8385 (92.8385) time: 6.7184 data: 6.4823 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0444 (1.0619) acc1: 73.0469 (70.5920) acc5: 92.8385 (90.4000) time: 1.4890 data: 1.2966 max mem: 55573 Test: Total time: 0:00:07 (1.5152 s / it) * Acc@1 71.372 Acc@5 90.708 loss 1.082 Accuracy of the model EMA on 50000 test images: 71.4% Max EMA accuracy: 71.37% Epoch: [250] [ 0/156] eta: 0:33:51 lr: 0.000614 min_lr: 0.000614 loss: 3.3445 (3.3445) weight_decay: 0.0500 (0.0500) time: 13.0222 data: 8.0548 max mem: 55573 Epoch: [250] [ 10/156] eta: 0:04:12 lr: 0.000612 min_lr: 0.000612 loss: 2.8357 (2.8452) weight_decay: 0.0500 (0.0500) time: 1.7300 data: 0.7327 max mem: 55573 Epoch: [250] [ 20/156] eta: 0:02:41 lr: 0.000611 min_lr: 0.000611 loss: 2.6911 (2.7816) weight_decay: 0.0500 (0.0500) time: 0.5948 data: 0.0004 max mem: 55573 Epoch: [250] [ 30/156] eta: 0:02:05 lr: 0.000609 min_lr: 0.000609 loss: 2.9856 (2.8439) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [250] [ 40/156] eta: 0:01:43 lr: 0.000608 min_lr: 0.000608 loss: 3.1567 (2.8695) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [250] [ 50/156] eta: 0:01:28 lr: 0.000606 min_lr: 0.000606 loss: 3.2278 (2.9350) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [250] [ 60/156] eta: 0:01:16 lr: 0.000605 min_lr: 0.000605 loss: 3.1864 (2.9150) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [250] [ 70/156] eta: 0:01:05 lr: 0.000603 min_lr: 0.000603 loss: 3.1155 (2.9388) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0004 max mem: 55573 Epoch: [250] [ 80/156] eta: 0:00:56 lr: 0.000602 min_lr: 0.000602 loss: 3.1273 (2.9613) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [250] [ 90/156] eta: 0:00:47 lr: 0.000600 min_lr: 0.000600 loss: 3.1423 (2.9734) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [250] [100/156] eta: 0:00:39 lr: 0.000599 min_lr: 0.000599 loss: 3.1060 (2.9617) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [250] [110/156] eta: 0:00:32 lr: 0.000597 min_lr: 0.000597 loss: 3.1479 (2.9794) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [250] [120/156] eta: 0:00:24 lr: 0.000596 min_lr: 0.000596 loss: 3.1479 (2.9812) weight_decay: 0.0500 (0.0500) time: 0.5958 data: 0.0004 max mem: 55573 Epoch: [250] [130/156] eta: 0:00:17 lr: 0.000594 min_lr: 0.000594 loss: 3.1344 (2.9759) weight_decay: 0.0500 (0.0500) time: 0.5943 data: 0.0010 max mem: 55573 Epoch: [250] [140/156] eta: 0:00:10 lr: 0.000593 min_lr: 0.000593 loss: 3.0377 (2.9668) weight_decay: 0.0500 (0.0500) time: 0.5844 data: 0.0008 max mem: 55573 Epoch: [250] [150/156] eta: 0:00:04 lr: 0.000591 min_lr: 0.000591 loss: 2.7942 (2.9538) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [250] [155/156] eta: 0:00:00 lr: 0.000591 min_lr: 0.000591 loss: 3.0777 (2.9530) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [250] Total time: 0:01:45 (0.6759 s / it) Averaged stats: lr: 0.000591 min_lr: 0.000591 loss: 3.0777 (2.9624) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:32 loss: 0.7916 (0.7916) acc1: 84.7656 (84.7656) acc5: 97.3958 (97.3958) time: 6.5721 data: 6.3350 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8921 (0.8898) acc1: 80.9896 (80.5760) acc5: 96.6146 (95.6800) time: 1.4610 data: 1.2671 max mem: 55573 Test: Total time: 0:00:07 (1.5100 s / it) * Acc@1 81.088 Acc@5 95.628 loss 0.885 Accuracy of the model on the 50000 test images: 81.1% Max accuracy: 81.09% Test: [0/5] eta: 0:00:35 loss: 1.0143 (1.0143) acc1: 74.7396 (74.7396) acc5: 92.8385 (92.8385) time: 7.0628 data: 6.8267 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0433 (1.0611) acc1: 73.1771 (70.7200) acc5: 92.8385 (90.4640) time: 1.5579 data: 1.3655 max mem: 55573 Test: Total time: 0:00:07 (1.5745 s / it) * Acc@1 71.402 Acc@5 90.720 loss 1.081 Accuracy of the model EMA on 50000 test images: 71.4% Max EMA accuracy: 71.40% Epoch: [251] [ 0/156] eta: 0:36:03 lr: 0.000590 min_lr: 0.000590 loss: 2.2402 (2.2402) weight_decay: 0.0500 (0.0500) time: 13.8672 data: 8.5797 max mem: 55573 Epoch: [251] [ 10/156] eta: 0:04:24 lr: 0.000589 min_lr: 0.000589 loss: 2.8069 (2.7955) weight_decay: 0.0500 (0.0500) time: 1.8110 data: 0.7804 max mem: 55573 Epoch: [251] [ 20/156] eta: 0:02:47 lr: 0.000587 min_lr: 0.000587 loss: 3.1535 (2.9577) weight_decay: 0.0500 (0.0500) time: 0.5984 data: 0.0004 max mem: 55573 Epoch: [251] [ 30/156] eta: 0:02:09 lr: 0.000586 min_lr: 0.000586 loss: 3.1535 (2.9770) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [251] [ 40/156] eta: 0:01:46 lr: 0.000584 min_lr: 0.000584 loss: 3.0687 (2.9922) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [251] [ 50/156] eta: 0:01:30 lr: 0.000583 min_lr: 0.000583 loss: 2.9991 (2.9743) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [251] [ 60/156] eta: 0:01:17 lr: 0.000581 min_lr: 0.000581 loss: 3.2008 (2.9932) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [251] [ 70/156] eta: 0:01:07 lr: 0.000580 min_lr: 0.000580 loss: 3.2239 (2.9755) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [251] [ 80/156] eta: 0:00:57 lr: 0.000578 min_lr: 0.000578 loss: 3.2272 (2.9888) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [251] [ 90/156] eta: 0:00:48 lr: 0.000577 min_lr: 0.000577 loss: 3.0009 (2.9643) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [251] [100/156] eta: 0:00:40 lr: 0.000575 min_lr: 0.000575 loss: 2.9556 (2.9702) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [251] [110/156] eta: 0:00:32 lr: 0.000574 min_lr: 0.000574 loss: 3.1446 (2.9798) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [251] [120/156] eta: 0:00:25 lr: 0.000572 min_lr: 0.000572 loss: 2.9726 (2.9667) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [251] [130/156] eta: 0:00:18 lr: 0.000571 min_lr: 0.000571 loss: 2.8723 (2.9538) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0010 max mem: 55573 Epoch: [251] [140/156] eta: 0:00:10 lr: 0.000569 min_lr: 0.000569 loss: 2.7077 (2.9352) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [251] [150/156] eta: 0:00:04 lr: 0.000568 min_lr: 0.000568 loss: 2.8233 (2.9453) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [251] [155/156] eta: 0:00:00 lr: 0.000567 min_lr: 0.000567 loss: 2.9071 (2.9431) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [251] Total time: 0:01:46 (0.6820 s / it) Averaged stats: lr: 0.000567 min_lr: 0.000567 loss: 2.9071 (2.9658) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.8097 (0.8097) acc1: 84.3750 (84.3750) acc5: 96.6146 (96.6146) time: 7.0996 data: 6.8622 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9013 (0.8999) acc1: 81.6406 (80.1920) acc5: 96.6146 (95.6480) time: 1.5664 data: 1.3725 max mem: 55573 Test: Total time: 0:00:08 (1.6097 s / it) * Acc@1 80.938 Acc@5 95.652 loss 0.891 Accuracy of the model on the 50000 test images: 80.9% Max accuracy: 81.09% Test: [0/5] eta: 0:00:35 loss: 1.0137 (1.0137) acc1: 74.7396 (74.7396) acc5: 92.8385 (92.8385) time: 7.0821 data: 6.8460 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0427 (1.0605) acc1: 73.1771 (70.7200) acc5: 92.8385 (90.4960) time: 1.5681 data: 1.3757 max mem: 55573 Test: Total time: 0:00:07 (1.5912 s / it) * Acc@1 71.446 Acc@5 90.728 loss 1.081 Accuracy of the model EMA on 50000 test images: 71.4% Max EMA accuracy: 71.45% Epoch: [252] [ 0/156] eta: 0:33:14 lr: 0.000567 min_lr: 0.000567 loss: 2.9682 (2.9682) weight_decay: 0.0500 (0.0500) time: 12.7873 data: 8.0248 max mem: 55573 Epoch: [252] [ 10/156] eta: 0:04:08 lr: 0.000566 min_lr: 0.000566 loss: 3.0019 (3.0869) weight_decay: 0.0500 (0.0500) time: 1.6992 data: 0.7299 max mem: 55573 Epoch: [252] [ 20/156] eta: 0:02:39 lr: 0.000564 min_lr: 0.000564 loss: 2.9468 (2.8639) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [252] [ 30/156] eta: 0:02:03 lr: 0.000563 min_lr: 0.000563 loss: 2.6308 (2.8387) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [252] [ 40/156] eta: 0:01:43 lr: 0.000561 min_lr: 0.000561 loss: 2.8804 (2.8960) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [252] [ 50/156] eta: 0:01:27 lr: 0.000560 min_lr: 0.000560 loss: 3.0213 (2.9132) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [252] [ 60/156] eta: 0:01:15 lr: 0.000558 min_lr: 0.000558 loss: 2.9626 (2.9070) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [252] [ 70/156] eta: 0:01:05 lr: 0.000557 min_lr: 0.000557 loss: 3.1091 (2.9156) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [252] [ 80/156] eta: 0:00:56 lr: 0.000555 min_lr: 0.000555 loss: 3.1207 (2.9101) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [252] [ 90/156] eta: 0:00:47 lr: 0.000554 min_lr: 0.000554 loss: 3.1694 (2.9471) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [252] [100/156] eta: 0:00:39 lr: 0.000552 min_lr: 0.000552 loss: 3.1284 (2.9413) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [252] [110/156] eta: 0:00:32 lr: 0.000551 min_lr: 0.000551 loss: 3.1274 (2.9527) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [252] [120/156] eta: 0:00:24 lr: 0.000550 min_lr: 0.000550 loss: 3.1646 (2.9492) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [252] [130/156] eta: 0:00:17 lr: 0.000548 min_lr: 0.000548 loss: 2.9703 (2.9407) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0010 max mem: 55573 Epoch: [252] [140/156] eta: 0:00:10 lr: 0.000547 min_lr: 0.000547 loss: 3.0675 (2.9483) weight_decay: 0.0500 (0.0500) time: 0.5873 data: 0.0008 max mem: 55573 Epoch: [252] [150/156] eta: 0:00:04 lr: 0.000545 min_lr: 0.000545 loss: 3.1353 (2.9470) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [252] [155/156] eta: 0:00:00 lr: 0.000544 min_lr: 0.000544 loss: 2.9998 (2.9456) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [252] Total time: 0:01:45 (0.6743 s / it) Averaged stats: lr: 0.000544 min_lr: 0.000544 loss: 2.9998 (2.9441) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8492 (0.8492) acc1: 84.2448 (84.2448) acc5: 96.7448 (96.7448) time: 6.8070 data: 6.5699 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9133 (0.9162) acc1: 82.2917 (80.0640) acc5: 96.7448 (95.7440) time: 1.5077 data: 1.3141 max mem: 55573 Test: Total time: 0:00:07 (1.5599 s / it) * Acc@1 81.040 Acc@5 95.684 loss 0.907 Accuracy of the model on the 50000 test images: 81.0% Max accuracy: 81.09% Test: [0/5] eta: 0:00:35 loss: 1.0131 (1.0131) acc1: 74.8698 (74.8698) acc5: 92.8385 (92.8385) time: 7.1325 data: 6.8964 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0419 (1.0599) acc1: 73.1771 (70.7520) acc5: 92.8385 (90.4960) time: 1.5718 data: 1.3794 max mem: 55573 Test: Total time: 0:00:08 (1.6027 s / it) * Acc@1 71.464 Acc@5 90.734 loss 1.080 Accuracy of the model EMA on 50000 test images: 71.5% Max EMA accuracy: 71.46% Epoch: [253] [ 0/156] eta: 0:33:25 lr: 0.000544 min_lr: 0.000544 loss: 3.4221 (3.4221) weight_decay: 0.0500 (0.0500) time: 12.8526 data: 12.2643 max mem: 55573 Epoch: [253] [ 10/156] eta: 0:04:11 lr: 0.000543 min_lr: 0.000543 loss: 3.1473 (2.9906) weight_decay: 0.0500 (0.0500) time: 1.7233 data: 1.1155 max mem: 55573 Epoch: [253] [ 20/156] eta: 0:02:40 lr: 0.000541 min_lr: 0.000541 loss: 3.0944 (3.0063) weight_decay: 0.0500 (0.0500) time: 0.6001 data: 0.0007 max mem: 55573 Epoch: [253] [ 30/156] eta: 0:02:05 lr: 0.000540 min_lr: 0.000540 loss: 3.0308 (2.9520) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0007 max mem: 55573 Epoch: [253] [ 40/156] eta: 0:01:43 lr: 0.000539 min_lr: 0.000539 loss: 3.1300 (3.0006) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0007 max mem: 55573 Epoch: [253] [ 50/156] eta: 0:01:28 lr: 0.000537 min_lr: 0.000537 loss: 3.2214 (3.0107) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0006 max mem: 55573 Epoch: [253] [ 60/156] eta: 0:01:16 lr: 0.000536 min_lr: 0.000536 loss: 3.1802 (3.0262) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0006 max mem: 55573 Epoch: [253] [ 70/156] eta: 0:01:05 lr: 0.000534 min_lr: 0.000534 loss: 3.1178 (2.9938) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0006 max mem: 55573 Epoch: [253] [ 80/156] eta: 0:00:56 lr: 0.000533 min_lr: 0.000533 loss: 2.8278 (2.9713) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0006 max mem: 55573 Epoch: [253] [ 90/156] eta: 0:00:48 lr: 0.000531 min_lr: 0.000531 loss: 2.8278 (2.9503) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [253] [100/156] eta: 0:00:40 lr: 0.000530 min_lr: 0.000530 loss: 2.9784 (2.9393) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [253] [110/156] eta: 0:00:32 lr: 0.000529 min_lr: 0.000529 loss: 3.1659 (2.9627) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [253] [120/156] eta: 0:00:24 lr: 0.000527 min_lr: 0.000527 loss: 3.1787 (2.9729) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0005 max mem: 55573 Epoch: [253] [130/156] eta: 0:00:17 lr: 0.000526 min_lr: 0.000526 loss: 3.1232 (2.9736) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0022 max mem: 55573 Epoch: [253] [140/156] eta: 0:00:10 lr: 0.000524 min_lr: 0.000524 loss: 3.1073 (2.9703) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0020 max mem: 55573 Epoch: [253] [150/156] eta: 0:00:04 lr: 0.000523 min_lr: 0.000523 loss: 3.0690 (2.9697) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0002 max mem: 55573 Epoch: [253] [155/156] eta: 0:00:00 lr: 0.000522 min_lr: 0.000522 loss: 3.0690 (2.9754) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0002 max mem: 55573 Epoch: [253] Total time: 0:01:45 (0.6762 s / it) Averaged stats: lr: 0.000522 min_lr: 0.000522 loss: 3.0690 (2.9484) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8757 (0.8757) acc1: 83.9844 (83.9844) acc5: 97.1354 (97.1354) time: 6.9042 data: 6.6668 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9822 (0.9628) acc1: 81.2500 (79.7440) acc5: 96.4844 (95.5520) time: 1.5274 data: 1.3334 max mem: 55573 Test: Total time: 0:00:07 (1.5685 s / it) * Acc@1 80.950 Acc@5 95.638 loss 0.952 Accuracy of the model on the 50000 test images: 81.0% Max accuracy: 81.09% Test: [0/5] eta: 0:00:33 loss: 1.0123 (1.0123) acc1: 74.8698 (74.8698) acc5: 92.8385 (92.8385) time: 6.7825 data: 6.5463 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0412 (1.0594) acc1: 73.1771 (70.7200) acc5: 92.8385 (90.4960) time: 1.5024 data: 1.3096 max mem: 55573 Test: Total time: 0:00:07 (1.5227 s / it) * Acc@1 71.494 Acc@5 90.752 loss 1.079 Accuracy of the model EMA on 50000 test images: 71.5% Max EMA accuracy: 71.49% Epoch: [254] [ 0/156] eta: 0:34:59 lr: 0.000522 min_lr: 0.000522 loss: 2.8639 (2.8639) weight_decay: 0.0500 (0.0500) time: 13.4556 data: 9.2297 max mem: 55573 Epoch: [254] [ 10/156] eta: 0:04:17 lr: 0.000521 min_lr: 0.000521 loss: 2.8639 (2.9365) weight_decay: 0.0500 (0.0500) time: 1.7646 data: 0.8394 max mem: 55573 Epoch: [254] [ 20/156] eta: 0:02:43 lr: 0.000519 min_lr: 0.000519 loss: 3.0973 (2.9797) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [254] [ 30/156] eta: 0:02:06 lr: 0.000518 min_lr: 0.000518 loss: 3.1396 (2.9560) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [254] [ 40/156] eta: 0:01:45 lr: 0.000516 min_lr: 0.000516 loss: 2.9000 (2.9217) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [254] [ 50/156] eta: 0:01:29 lr: 0.000515 min_lr: 0.000515 loss: 3.0022 (2.9422) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [254] [ 60/156] eta: 0:01:17 lr: 0.000513 min_lr: 0.000513 loss: 3.1607 (2.9574) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [254] [ 70/156] eta: 0:01:06 lr: 0.000512 min_lr: 0.000512 loss: 3.1571 (2.9691) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [254] [ 80/156] eta: 0:00:56 lr: 0.000511 min_lr: 0.000511 loss: 3.0491 (2.9606) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [254] [ 90/156] eta: 0:00:48 lr: 0.000509 min_lr: 0.000509 loss: 3.0129 (2.9561) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [254] [100/156] eta: 0:00:40 lr: 0.000508 min_lr: 0.000508 loss: 3.1075 (2.9717) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [254] [110/156] eta: 0:00:32 lr: 0.000506 min_lr: 0.000506 loss: 3.1075 (2.9697) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [254] [120/156] eta: 0:00:25 lr: 0.000505 min_lr: 0.000505 loss: 3.1044 (2.9692) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [254] [130/156] eta: 0:00:17 lr: 0.000504 min_lr: 0.000504 loss: 3.1654 (2.9672) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0011 max mem: 55573 Epoch: [254] [140/156] eta: 0:00:10 lr: 0.000502 min_lr: 0.000502 loss: 3.1147 (2.9587) weight_decay: 0.0500 (0.0500) time: 0.5880 data: 0.0009 max mem: 55573 Epoch: [254] [150/156] eta: 0:00:04 lr: 0.000501 min_lr: 0.000501 loss: 2.9490 (2.9527) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [254] [155/156] eta: 0:00:00 lr: 0.000500 min_lr: 0.000500 loss: 2.8677 (2.9457) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [254] Total time: 0:01:45 (0.6793 s / it) Averaged stats: lr: 0.000500 min_lr: 0.000500 loss: 2.8677 (2.9402) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8205 (0.8205) acc1: 83.4635 (83.4635) acc5: 96.8750 (96.8750) time: 6.8401 data: 6.6029 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9191 (0.9095) acc1: 80.0781 (79.3920) acc5: 96.4844 (95.2960) time: 1.5143 data: 1.3207 max mem: 55573 Test: Total time: 0:00:07 (1.5626 s / it) * Acc@1 81.148 Acc@5 95.688 loss 0.889 Accuracy of the model on the 50000 test images: 81.1% Max accuracy: 81.15% Test: [0/5] eta: 0:00:35 loss: 1.0116 (1.0116) acc1: 74.8698 (74.8698) acc5: 92.8385 (92.8385) time: 7.0763 data: 6.8402 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0406 (1.0587) acc1: 73.1771 (70.7200) acc5: 92.8385 (90.4960) time: 1.5606 data: 1.3681 max mem: 55573 Test: Total time: 0:00:07 (1.5762 s / it) * Acc@1 71.520 Acc@5 90.762 loss 1.079 Accuracy of the model EMA on 50000 test images: 71.5% Max EMA accuracy: 71.52% Epoch: [255] [ 0/156] eta: 0:34:35 lr: 0.000500 min_lr: 0.000500 loss: 2.9535 (2.9535) weight_decay: 0.0500 (0.0500) time: 13.3055 data: 12.1994 max mem: 55573 Epoch: [255] [ 10/156] eta: 0:04:14 lr: 0.000499 min_lr: 0.000499 loss: 2.9525 (2.8866) weight_decay: 0.0500 (0.0500) time: 1.7463 data: 1.1101 max mem: 55573 Epoch: [255] [ 20/156] eta: 0:02:42 lr: 0.000497 min_lr: 0.000497 loss: 2.9538 (2.8507) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0008 max mem: 55573 Epoch: [255] [ 30/156] eta: 0:02:06 lr: 0.000496 min_lr: 0.000496 loss: 3.1051 (2.8840) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0005 max mem: 55573 Epoch: [255] [ 40/156] eta: 0:01:44 lr: 0.000494 min_lr: 0.000494 loss: 3.1073 (2.9318) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [255] [ 50/156] eta: 0:01:29 lr: 0.000493 min_lr: 0.000493 loss: 3.1127 (2.9452) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [255] [ 60/156] eta: 0:01:16 lr: 0.000492 min_lr: 0.000492 loss: 3.0280 (2.9338) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [255] [ 70/156] eta: 0:01:06 lr: 0.000490 min_lr: 0.000490 loss: 2.9144 (2.9130) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [255] [ 80/156] eta: 0:00:56 lr: 0.000489 min_lr: 0.000489 loss: 2.9144 (2.9254) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [255] [ 90/156] eta: 0:00:48 lr: 0.000488 min_lr: 0.000488 loss: 3.0562 (2.9446) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [255] [100/156] eta: 0:00:40 lr: 0.000486 min_lr: 0.000486 loss: 3.0562 (2.9399) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [255] [110/156] eta: 0:00:32 lr: 0.000485 min_lr: 0.000485 loss: 2.7112 (2.9092) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [255] [120/156] eta: 0:00:25 lr: 0.000483 min_lr: 0.000483 loss: 2.7353 (2.9191) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [255] [130/156] eta: 0:00:17 lr: 0.000482 min_lr: 0.000482 loss: 3.0040 (2.9082) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0011 max mem: 55573 Epoch: [255] [140/156] eta: 0:00:10 lr: 0.000481 min_lr: 0.000481 loss: 3.0637 (2.9178) weight_decay: 0.0500 (0.0500) time: 0.5876 data: 0.0010 max mem: 55573 Epoch: [255] [150/156] eta: 0:00:04 lr: 0.000479 min_lr: 0.000479 loss: 2.9804 (2.9109) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [255] [155/156] eta: 0:00:00 lr: 0.000479 min_lr: 0.000479 loss: 2.9681 (2.9084) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [255] Total time: 0:01:45 (0.6777 s / it) Averaged stats: lr: 0.000479 min_lr: 0.000479 loss: 2.9681 (2.9234) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.7980 (0.7980) acc1: 85.5469 (85.5469) acc5: 97.0052 (97.0052) time: 7.0333 data: 6.7960 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8964 (0.8791) acc1: 81.5104 (80.4160) acc5: 96.8750 (95.7760) time: 1.5527 data: 1.3593 max mem: 55573 Test: Total time: 0:00:08 (1.6005 s / it) * Acc@1 81.160 Acc@5 95.680 loss 0.870 Accuracy of the model on the 50000 test images: 81.2% Max accuracy: 81.16% Test: [0/5] eta: 0:00:34 loss: 1.0106 (1.0106) acc1: 75.0000 (75.0000) acc5: 92.9688 (92.9688) time: 6.9980 data: 6.7620 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0398 (1.0579) acc1: 73.0469 (70.7200) acc5: 92.9688 (90.5280) time: 1.5449 data: 1.3525 max mem: 55573 Test: Total time: 0:00:08 (1.6040 s / it) * Acc@1 71.526 Acc@5 90.774 loss 1.078 Accuracy of the model EMA on 50000 test images: 71.5% Max EMA accuracy: 71.53% Epoch: [256] [ 0/156] eta: 0:33:49 lr: 0.000479 min_lr: 0.000479 loss: 3.3918 (3.3918) weight_decay: 0.0500 (0.0500) time: 13.0121 data: 10.7335 max mem: 55573 Epoch: [256] [ 10/156] eta: 0:04:11 lr: 0.000477 min_lr: 0.000477 loss: 3.2328 (3.1635) weight_decay: 0.0500 (0.0500) time: 1.7217 data: 0.9762 max mem: 55573 Epoch: [256] [ 20/156] eta: 0:02:40 lr: 0.000476 min_lr: 0.000476 loss: 3.1182 (3.0441) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [256] [ 30/156] eta: 0:02:05 lr: 0.000474 min_lr: 0.000474 loss: 2.8632 (2.9648) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [256] [ 40/156] eta: 0:01:43 lr: 0.000473 min_lr: 0.000473 loss: 2.8632 (2.9331) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [256] [ 50/156] eta: 0:01:28 lr: 0.000472 min_lr: 0.000472 loss: 3.1343 (2.9705) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [256] [ 60/156] eta: 0:01:16 lr: 0.000470 min_lr: 0.000470 loss: 3.1530 (2.9864) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [256] [ 70/156] eta: 0:01:05 lr: 0.000469 min_lr: 0.000469 loss: 3.1530 (2.9752) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [256] [ 80/156] eta: 0:00:56 lr: 0.000468 min_lr: 0.000468 loss: 2.8329 (2.9581) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [256] [ 90/156] eta: 0:00:48 lr: 0.000466 min_lr: 0.000466 loss: 2.6293 (2.9269) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [256] [100/156] eta: 0:00:39 lr: 0.000465 min_lr: 0.000465 loss: 2.6572 (2.9197) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [256] [110/156] eta: 0:00:32 lr: 0.000464 min_lr: 0.000464 loss: 2.8957 (2.9084) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [256] [120/156] eta: 0:00:24 lr: 0.000462 min_lr: 0.000462 loss: 2.9314 (2.9062) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0006 max mem: 55573 Epoch: [256] [130/156] eta: 0:00:17 lr: 0.000461 min_lr: 0.000461 loss: 3.0481 (2.9170) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0011 max mem: 55573 Epoch: [256] [140/156] eta: 0:00:10 lr: 0.000460 min_lr: 0.000460 loss: 3.1691 (2.9231) weight_decay: 0.0500 (0.0500) time: 0.5870 data: 0.0009 max mem: 55573 Epoch: [256] [150/156] eta: 0:00:04 lr: 0.000458 min_lr: 0.000458 loss: 3.1533 (2.9224) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [256] [155/156] eta: 0:00:00 lr: 0.000458 min_lr: 0.000458 loss: 3.2344 (2.9331) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [256] Total time: 0:01:45 (0.6760 s / it) Averaged stats: lr: 0.000458 min_lr: 0.000458 loss: 3.2344 (2.9279) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8586 (0.8586) acc1: 82.6823 (82.6823) acc5: 96.8750 (96.8750) time: 6.7346 data: 6.4975 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9384 (0.9228) acc1: 80.5990 (79.3920) acc5: 96.7448 (95.8400) time: 1.4934 data: 1.2996 max mem: 55573 Test: Total time: 0:00:07 (1.5451 s / it) * Acc@1 81.056 Acc@5 95.700 loss 0.910 Accuracy of the model on the 50000 test images: 81.1% Max accuracy: 81.16% Test: [0/5] eta: 0:00:35 loss: 1.0097 (1.0097) acc1: 75.0000 (75.0000) acc5: 92.9688 (92.9688) time: 7.1635 data: 6.9274 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0389 (1.0571) acc1: 72.9167 (70.7200) acc5: 92.9688 (90.5600) time: 1.5779 data: 1.3856 max mem: 55573 Test: Total time: 0:00:08 (1.6020 s / it) * Acc@1 71.578 Acc@5 90.808 loss 1.077 Accuracy of the model EMA on 50000 test images: 71.6% Max EMA accuracy: 71.58% Epoch: [257] [ 0/156] eta: 0:37:03 lr: 0.000458 min_lr: 0.000458 loss: 3.5251 (3.5251) weight_decay: 0.0500 (0.0500) time: 14.2519 data: 13.6495 max mem: 55573 Epoch: [257] [ 10/156] eta: 0:04:27 lr: 0.000456 min_lr: 0.000456 loss: 3.0016 (2.8137) weight_decay: 0.0500 (0.0500) time: 1.8300 data: 1.2411 max mem: 55573 Epoch: [257] [ 20/156] eta: 0:02:48 lr: 0.000455 min_lr: 0.000455 loss: 3.0507 (2.8634) weight_decay: 0.0500 (0.0500) time: 0.5884 data: 0.0004 max mem: 55573 Epoch: [257] [ 30/156] eta: 0:02:09 lr: 0.000454 min_lr: 0.000454 loss: 3.0507 (2.8281) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0004 max mem: 55573 Epoch: [257] [ 40/156] eta: 0:01:46 lr: 0.000452 min_lr: 0.000452 loss: 2.8542 (2.8531) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0004 max mem: 55573 Epoch: [257] [ 50/156] eta: 0:01:30 lr: 0.000451 min_lr: 0.000451 loss: 3.0111 (2.9003) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [257] [ 60/156] eta: 0:01:18 lr: 0.000450 min_lr: 0.000450 loss: 3.1115 (2.9130) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0004 max mem: 55573 Epoch: [257] [ 70/156] eta: 0:01:07 lr: 0.000448 min_lr: 0.000448 loss: 3.0531 (2.9105) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [257] [ 80/156] eta: 0:00:57 lr: 0.000447 min_lr: 0.000447 loss: 2.9950 (2.9250) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [257] [ 90/156] eta: 0:00:48 lr: 0.000446 min_lr: 0.000446 loss: 2.9439 (2.9291) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [257] [100/156] eta: 0:00:40 lr: 0.000444 min_lr: 0.000444 loss: 2.8294 (2.9148) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0004 max mem: 55573 Epoch: [257] [110/156] eta: 0:00:32 lr: 0.000443 min_lr: 0.000443 loss: 2.8769 (2.9223) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [257] [120/156] eta: 0:00:25 lr: 0.000442 min_lr: 0.000442 loss: 2.8769 (2.9109) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [257] [130/156] eta: 0:00:18 lr: 0.000440 min_lr: 0.000440 loss: 2.9277 (2.9182) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0010 max mem: 55573 Epoch: [257] [140/156] eta: 0:00:10 lr: 0.000439 min_lr: 0.000439 loss: 3.0314 (2.9226) weight_decay: 0.0500 (0.0500) time: 0.5840 data: 0.0009 max mem: 55573 Epoch: [257] [150/156] eta: 0:00:04 lr: 0.000438 min_lr: 0.000438 loss: 2.7961 (2.9055) weight_decay: 0.0500 (0.0500) time: 0.5811 data: 0.0001 max mem: 55573 Epoch: [257] [155/156] eta: 0:00:00 lr: 0.000437 min_lr: 0.000437 loss: 2.7961 (2.9028) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [257] Total time: 0:01:46 (0.6826 s / it) Averaged stats: lr: 0.000437 min_lr: 0.000437 loss: 2.7961 (2.9313) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.7976 (0.7976) acc1: 83.7240 (83.7240) acc5: 96.7448 (96.7448) time: 6.8944 data: 6.6569 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8990 (0.8710) acc1: 81.1198 (79.6160) acc5: 96.7448 (95.6160) time: 1.5254 data: 1.3315 max mem: 55573 Test: Total time: 0:00:07 (1.5675 s / it) * Acc@1 81.212 Acc@5 95.762 loss 0.861 Accuracy of the model on the 50000 test images: 81.2% Max accuracy: 81.21% Test: [0/5] eta: 0:00:34 loss: 1.0088 (1.0088) acc1: 75.0000 (75.0000) acc5: 92.9688 (92.9688) time: 6.9383 data: 6.7022 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0380 (1.0563) acc1: 73.0469 (70.7520) acc5: 92.9688 (90.5600) time: 1.5330 data: 1.3405 max mem: 55573 Test: Total time: 0:00:07 (1.5502 s / it) * Acc@1 71.598 Acc@5 90.814 loss 1.076 Accuracy of the model EMA on 50000 test images: 71.6% Max EMA accuracy: 71.60% Epoch: [258] [ 0/156] eta: 0:35:04 lr: 0.000437 min_lr: 0.000437 loss: 3.1609 (3.1609) weight_decay: 0.0500 (0.0500) time: 13.4923 data: 12.8979 max mem: 55573 Epoch: [258] [ 10/156] eta: 0:04:18 lr: 0.000436 min_lr: 0.000436 loss: 3.0857 (3.0078) weight_decay: 0.0500 (0.0500) time: 1.7680 data: 1.1729 max mem: 55573 Epoch: [258] [ 20/156] eta: 0:02:44 lr: 0.000434 min_lr: 0.000434 loss: 3.0857 (3.0714) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [258] [ 30/156] eta: 0:02:07 lr: 0.000433 min_lr: 0.000433 loss: 3.0856 (3.0823) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [258] [ 40/156] eta: 0:01:45 lr: 0.000432 min_lr: 0.000432 loss: 3.1378 (3.1148) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [258] [ 50/156] eta: 0:01:29 lr: 0.000430 min_lr: 0.000430 loss: 3.1415 (3.1042) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [258] [ 60/156] eta: 0:01:17 lr: 0.000429 min_lr: 0.000429 loss: 3.2020 (3.1005) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [258] [ 70/156] eta: 0:01:06 lr: 0.000428 min_lr: 0.000428 loss: 3.0974 (3.0729) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [258] [ 80/156] eta: 0:00:57 lr: 0.000427 min_lr: 0.000427 loss: 2.8465 (3.0470) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [258] [ 90/156] eta: 0:00:48 lr: 0.000425 min_lr: 0.000425 loss: 2.8811 (3.0166) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [258] [100/156] eta: 0:00:40 lr: 0.000424 min_lr: 0.000424 loss: 2.8724 (2.9978) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [258] [110/156] eta: 0:00:32 lr: 0.000423 min_lr: 0.000423 loss: 2.9120 (2.9882) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [258] [120/156] eta: 0:00:25 lr: 0.000421 min_lr: 0.000421 loss: 2.9853 (2.9854) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [258] [130/156] eta: 0:00:17 lr: 0.000420 min_lr: 0.000420 loss: 2.9863 (2.9824) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0010 max mem: 55573 Epoch: [258] [140/156] eta: 0:00:10 lr: 0.000419 min_lr: 0.000419 loss: 3.0808 (2.9917) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0009 max mem: 55573 Epoch: [258] [150/156] eta: 0:00:04 lr: 0.000418 min_lr: 0.000418 loss: 3.0985 (2.9838) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [258] [155/156] eta: 0:00:00 lr: 0.000417 min_lr: 0.000417 loss: 3.0808 (2.9702) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [258] Total time: 0:01:46 (0.6798 s / it) Averaged stats: lr: 0.000417 min_lr: 0.000417 loss: 3.0808 (2.9329) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.7947 (0.7947) acc1: 83.9844 (83.9844) acc5: 97.2656 (97.2656) time: 6.7196 data: 6.4825 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8978 (0.8649) acc1: 82.1615 (80.5120) acc5: 96.3542 (95.6480) time: 1.4904 data: 1.2966 max mem: 55573 Test: Total time: 0:00:07 (1.5346 s / it) * Acc@1 81.234 Acc@5 95.776 loss 0.865 Accuracy of the model on the 50000 test images: 81.2% Max accuracy: 81.23% Test: [0/5] eta: 0:00:36 loss: 1.0078 (1.0078) acc1: 75.0000 (75.0000) acc5: 93.0990 (93.0990) time: 7.2707 data: 7.0346 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0371 (1.0555) acc1: 73.0469 (70.7200) acc5: 93.0990 (90.5600) time: 1.5995 data: 1.4070 max mem: 55573 Test: Total time: 0:00:08 (1.6168 s / it) * Acc@1 71.630 Acc@5 90.834 loss 1.075 Accuracy of the model EMA on 50000 test images: 71.6% Max EMA accuracy: 71.63% Epoch: [259] [ 0/156] eta: 0:31:16 lr: 0.000417 min_lr: 0.000417 loss: 2.7398 (2.7398) weight_decay: 0.0500 (0.0500) time: 12.0281 data: 10.3321 max mem: 55573 Epoch: [259] [ 10/156] eta: 0:04:00 lr: 0.000415 min_lr: 0.000415 loss: 2.8272 (2.8356) weight_decay: 0.0500 (0.0500) time: 1.6496 data: 0.9397 max mem: 55573 Epoch: [259] [ 20/156] eta: 0:02:35 lr: 0.000414 min_lr: 0.000414 loss: 2.9253 (2.9164) weight_decay: 0.0500 (0.0500) time: 0.6009 data: 0.0005 max mem: 55573 Epoch: [259] [ 30/156] eta: 0:02:01 lr: 0.000413 min_lr: 0.000413 loss: 2.9181 (2.8343) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [259] [ 40/156] eta: 0:01:41 lr: 0.000412 min_lr: 0.000412 loss: 2.8941 (2.8687) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [259] [ 50/156] eta: 0:01:26 lr: 0.000410 min_lr: 0.000410 loss: 3.1082 (2.9165) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [259] [ 60/156] eta: 0:01:15 lr: 0.000409 min_lr: 0.000409 loss: 3.1785 (2.9675) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [259] [ 70/156] eta: 0:01:04 lr: 0.000408 min_lr: 0.000408 loss: 3.1441 (2.9631) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [259] [ 80/156] eta: 0:00:55 lr: 0.000407 min_lr: 0.000407 loss: 3.0342 (2.9823) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [259] [ 90/156] eta: 0:00:47 lr: 0.000405 min_lr: 0.000405 loss: 3.0444 (2.9808) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [259] [100/156] eta: 0:00:39 lr: 0.000404 min_lr: 0.000404 loss: 2.9600 (2.9724) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [259] [110/156] eta: 0:00:31 lr: 0.000403 min_lr: 0.000403 loss: 3.0743 (2.9819) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [259] [120/156] eta: 0:00:24 lr: 0.000402 min_lr: 0.000402 loss: 3.1718 (2.9947) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0005 max mem: 55573 Epoch: [259] [130/156] eta: 0:00:17 lr: 0.000400 min_lr: 0.000400 loss: 3.0573 (2.9772) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0022 max mem: 55573 Epoch: [259] [140/156] eta: 0:00:10 lr: 0.000399 min_lr: 0.000399 loss: 2.7366 (2.9590) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0021 max mem: 55573 Epoch: [259] [150/156] eta: 0:00:03 lr: 0.000398 min_lr: 0.000398 loss: 2.7334 (2.9472) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [259] [155/156] eta: 0:00:00 lr: 0.000397 min_lr: 0.000397 loss: 2.6201 (2.9324) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [259] Total time: 0:01:44 (0.6706 s / it) Averaged stats: lr: 0.000397 min_lr: 0.000397 loss: 2.6201 (2.9241) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.7550 (0.7550) acc1: 84.3750 (84.3750) acc5: 97.2656 (97.2656) time: 6.9502 data: 6.7128 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8701 (0.8482) acc1: 81.9010 (80.5760) acc5: 96.3542 (95.6800) time: 1.5365 data: 1.3426 max mem: 55573 Test: Total time: 0:00:07 (1.5875 s / it) * Acc@1 81.400 Acc@5 95.780 loss 0.844 Accuracy of the model on the 50000 test images: 81.4% Max accuracy: 81.40% Test: [0/5] eta: 0:00:33 loss: 1.0068 (1.0068) acc1: 75.0000 (75.0000) acc5: 93.0990 (93.0990) time: 6.7884 data: 6.5523 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0362 (1.0546) acc1: 73.1771 (70.7840) acc5: 93.0990 (90.5600) time: 1.5031 data: 1.3106 max mem: 55573 Test: Total time: 0:00:07 (1.5173 s / it) * Acc@1 71.670 Acc@5 90.862 loss 1.074 Accuracy of the model EMA on 50000 test images: 71.7% Max EMA accuracy: 71.67% Epoch: [260] [ 0/156] eta: 0:33:31 lr: 0.000397 min_lr: 0.000397 loss: 2.7067 (2.7067) weight_decay: 0.0500 (0.0500) time: 12.8913 data: 8.4963 max mem: 55573 Epoch: [260] [ 10/156] eta: 0:04:09 lr: 0.000396 min_lr: 0.000396 loss: 3.1695 (3.0916) weight_decay: 0.0500 (0.0500) time: 1.7093 data: 0.7728 max mem: 55573 Epoch: [260] [ 20/156] eta: 0:02:39 lr: 0.000395 min_lr: 0.000395 loss: 3.1695 (3.0567) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [260] [ 30/156] eta: 0:02:04 lr: 0.000393 min_lr: 0.000393 loss: 3.0272 (2.9721) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [260] [ 40/156] eta: 0:01:43 lr: 0.000392 min_lr: 0.000392 loss: 3.0272 (2.9480) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [260] [ 50/156] eta: 0:01:28 lr: 0.000391 min_lr: 0.000391 loss: 2.9547 (2.9194) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [260] [ 60/156] eta: 0:01:16 lr: 0.000390 min_lr: 0.000390 loss: 2.9547 (2.9192) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [260] [ 70/156] eta: 0:01:05 lr: 0.000388 min_lr: 0.000388 loss: 2.9820 (2.9082) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [260] [ 80/156] eta: 0:00:56 lr: 0.000387 min_lr: 0.000387 loss: 2.9820 (2.9017) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [260] [ 90/156] eta: 0:00:47 lr: 0.000386 min_lr: 0.000386 loss: 3.0424 (2.9050) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [260] [100/156] eta: 0:00:39 lr: 0.000385 min_lr: 0.000385 loss: 3.0424 (2.9086) weight_decay: 0.0500 (0.0500) time: 0.5942 data: 0.0005 max mem: 55573 Epoch: [260] [110/156] eta: 0:00:32 lr: 0.000383 min_lr: 0.000383 loss: 3.0122 (2.9271) weight_decay: 0.0500 (0.0500) time: 0.5952 data: 0.0004 max mem: 55573 Epoch: [260] [120/156] eta: 0:00:24 lr: 0.000382 min_lr: 0.000382 loss: 3.0122 (2.9194) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [260] [130/156] eta: 0:00:17 lr: 0.000381 min_lr: 0.000381 loss: 2.7703 (2.9040) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0010 max mem: 55573 Epoch: [260] [140/156] eta: 0:00:10 lr: 0.000380 min_lr: 0.000380 loss: 2.8755 (2.8947) weight_decay: 0.0500 (0.0500) time: 0.5874 data: 0.0009 max mem: 55573 Epoch: [260] [150/156] eta: 0:00:04 lr: 0.000379 min_lr: 0.000379 loss: 2.9898 (2.8945) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [260] [155/156] eta: 0:00:00 lr: 0.000378 min_lr: 0.000378 loss: 2.9950 (2.9021) weight_decay: 0.0500 (0.0500) time: 0.5811 data: 0.0001 max mem: 55573 Epoch: [260] Total time: 0:01:45 (0.6753 s / it) Averaged stats: lr: 0.000378 min_lr: 0.000378 loss: 2.9950 (2.9215) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8271 (0.8271) acc1: 83.3333 (83.3333) acc5: 97.1354 (97.1354) time: 6.9527 data: 6.7152 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9396 (0.9142) acc1: 81.1198 (80.1280) acc5: 96.6146 (95.6160) time: 1.5370 data: 1.3431 max mem: 55573 Test: Total time: 0:00:07 (1.5838 s / it) * Acc@1 81.320 Acc@5 95.762 loss 0.904 Accuracy of the model on the 50000 test images: 81.3% Max accuracy: 81.40% Test: [0/5] eta: 0:00:35 loss: 1.0056 (1.0056) acc1: 75.1302 (75.1302) acc5: 93.0990 (93.0990) time: 7.0897 data: 6.8537 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0351 (1.0535) acc1: 73.4375 (70.9120) acc5: 93.0990 (90.5920) time: 1.5632 data: 1.3708 max mem: 55573 Test: Total time: 0:00:07 (1.5806 s / it) * Acc@1 71.690 Acc@5 90.872 loss 1.073 Accuracy of the model EMA on 50000 test images: 71.7% Max EMA accuracy: 71.69% Epoch: [261] [ 0/156] eta: 0:33:23 lr: 0.000378 min_lr: 0.000378 loss: 3.2199 (3.2199) weight_decay: 0.0500 (0.0500) time: 12.8413 data: 11.0221 max mem: 55573 Epoch: [261] [ 10/156] eta: 0:04:11 lr: 0.000377 min_lr: 0.000377 loss: 3.1778 (3.1154) weight_decay: 0.0500 (0.0500) time: 1.7212 data: 1.0024 max mem: 55573 Epoch: [261] [ 20/156] eta: 0:02:40 lr: 0.000375 min_lr: 0.000375 loss: 2.9262 (2.9330) weight_decay: 0.0500 (0.0500) time: 0.5998 data: 0.0004 max mem: 55573 Epoch: [261] [ 30/156] eta: 0:02:04 lr: 0.000374 min_lr: 0.000374 loss: 2.9090 (2.9269) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [261] [ 40/156] eta: 0:01:43 lr: 0.000373 min_lr: 0.000373 loss: 3.0872 (2.9015) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0004 max mem: 55573 Epoch: [261] [ 50/156] eta: 0:01:28 lr: 0.000372 min_lr: 0.000372 loss: 2.9542 (2.9061) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [261] [ 60/156] eta: 0:01:16 lr: 0.000371 min_lr: 0.000371 loss: 2.7454 (2.8846) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [261] [ 70/156] eta: 0:01:05 lr: 0.000369 min_lr: 0.000369 loss: 2.7431 (2.8717) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [261] [ 80/156] eta: 0:00:56 lr: 0.000368 min_lr: 0.000368 loss: 2.8727 (2.8687) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [261] [ 90/156] eta: 0:00:47 lr: 0.000367 min_lr: 0.000367 loss: 2.8727 (2.8630) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [261] [100/156] eta: 0:00:39 lr: 0.000366 min_lr: 0.000366 loss: 2.9614 (2.8657) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [261] [110/156] eta: 0:00:32 lr: 0.000365 min_lr: 0.000365 loss: 2.8262 (2.8486) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [261] [120/156] eta: 0:00:24 lr: 0.000363 min_lr: 0.000363 loss: 2.6465 (2.8376) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [261] [130/156] eta: 0:00:17 lr: 0.000362 min_lr: 0.000362 loss: 2.7699 (2.8312) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0010 max mem: 55573 Epoch: [261] [140/156] eta: 0:00:10 lr: 0.000361 min_lr: 0.000361 loss: 2.8326 (2.8363) weight_decay: 0.0500 (0.0500) time: 0.5872 data: 0.0009 max mem: 55573 Epoch: [261] [150/156] eta: 0:00:04 lr: 0.000360 min_lr: 0.000360 loss: 3.0701 (2.8487) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0002 max mem: 55573 Epoch: [261] [155/156] eta: 0:00:00 lr: 0.000359 min_lr: 0.000359 loss: 3.2062 (2.8631) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0002 max mem: 55573 Epoch: [261] Total time: 0:01:45 (0.6762 s / it) Averaged stats: lr: 0.000359 min_lr: 0.000359 loss: 3.2062 (2.9025) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.9068 (0.9068) acc1: 83.8542 (83.8542) acc5: 97.2656 (97.2656) time: 6.8009 data: 6.5637 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9978 (0.9878) acc1: 81.1198 (79.9680) acc5: 96.4844 (95.8080) time: 1.5066 data: 1.3128 max mem: 55573 Test: Total time: 0:00:07 (1.5480 s / it) * Acc@1 81.202 Acc@5 95.678 loss 0.972 Accuracy of the model on the 50000 test images: 81.2% Max accuracy: 81.40% Test: [0/5] eta: 0:00:36 loss: 1.0044 (1.0044) acc1: 75.2604 (75.2604) acc5: 93.0990 (93.0990) time: 7.3110 data: 7.0751 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0341 (1.0524) acc1: 73.3073 (70.9760) acc5: 93.0990 (90.5920) time: 1.6075 data: 1.4151 max mem: 55573 Test: Total time: 0:00:08 (1.6259 s / it) * Acc@1 71.738 Acc@5 90.894 loss 1.072 Accuracy of the model EMA on 50000 test images: 71.7% Max EMA accuracy: 71.74% Epoch: [262] [ 0/156] eta: 0:32:12 lr: 0.000359 min_lr: 0.000359 loss: 2.1975 (2.1975) weight_decay: 0.0500 (0.0500) time: 12.3863 data: 11.7987 max mem: 55573 Epoch: [262] [ 10/156] eta: 0:04:05 lr: 0.000358 min_lr: 0.000358 loss: 2.9014 (2.8406) weight_decay: 0.0500 (0.0500) time: 1.6807 data: 1.0730 max mem: 55573 Epoch: [262] [ 20/156] eta: 0:02:38 lr: 0.000357 min_lr: 0.000357 loss: 2.9014 (2.7601) weight_decay: 0.0500 (0.0500) time: 0.6012 data: 0.0004 max mem: 55573 Epoch: [262] [ 30/156] eta: 0:02:03 lr: 0.000355 min_lr: 0.000355 loss: 2.9793 (2.8640) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [262] [ 40/156] eta: 0:01:42 lr: 0.000354 min_lr: 0.000354 loss: 3.1412 (2.8176) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [262] [ 50/156] eta: 0:01:27 lr: 0.000353 min_lr: 0.000353 loss: 2.8914 (2.8544) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [262] [ 60/156] eta: 0:01:15 lr: 0.000352 min_lr: 0.000352 loss: 2.9434 (2.8632) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [262] [ 70/156] eta: 0:01:05 lr: 0.000351 min_lr: 0.000351 loss: 2.7523 (2.8384) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [262] [ 80/156] eta: 0:00:56 lr: 0.000350 min_lr: 0.000350 loss: 2.8386 (2.8401) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [262] [ 90/156] eta: 0:00:47 lr: 0.000348 min_lr: 0.000348 loss: 2.9707 (2.8454) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [262] [100/156] eta: 0:00:39 lr: 0.000347 min_lr: 0.000347 loss: 2.9872 (2.8512) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [262] [110/156] eta: 0:00:32 lr: 0.000346 min_lr: 0.000346 loss: 2.9872 (2.8652) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [262] [120/156] eta: 0:00:24 lr: 0.000345 min_lr: 0.000345 loss: 3.0972 (2.8735) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [262] [130/156] eta: 0:00:17 lr: 0.000344 min_lr: 0.000344 loss: 3.1143 (2.8736) weight_decay: 0.0500 (0.0500) time: 0.5891 data: 0.0010 max mem: 55573 Epoch: [262] [140/156] eta: 0:00:10 lr: 0.000343 min_lr: 0.000343 loss: 3.0647 (2.8810) weight_decay: 0.0500 (0.0500) time: 0.5851 data: 0.0009 max mem: 55573 Epoch: [262] [150/156] eta: 0:00:04 lr: 0.000341 min_lr: 0.000341 loss: 3.0451 (2.8887) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [262] [155/156] eta: 0:00:00 lr: 0.000341 min_lr: 0.000341 loss: 3.0713 (2.8903) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [262] Total time: 0:01:44 (0.6725 s / it) Averaged stats: lr: 0.000341 min_lr: 0.000341 loss: 3.0713 (2.8898) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 0.8446 (0.8446) acc1: 83.9844 (83.9844) acc5: 97.0052 (97.0052) time: 7.2412 data: 7.0037 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9370 (0.9247) acc1: 81.2500 (80.0640) acc5: 96.6146 (95.8400) time: 1.5946 data: 1.4008 max mem: 55573 Test: Total time: 0:00:08 (1.6372 s / it) * Acc@1 81.370 Acc@5 95.744 loss 0.909 Accuracy of the model on the 50000 test images: 81.4% Max accuracy: 81.40% Test: [0/5] eta: 0:00:34 loss: 1.0033 (1.0033) acc1: 75.2604 (75.2604) acc5: 93.0990 (93.0990) time: 6.8980 data: 6.6619 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0330 (1.0513) acc1: 73.0469 (70.9120) acc5: 93.0990 (90.6240) time: 1.5249 data: 1.3325 max mem: 55573 Test: Total time: 0:00:07 (1.5474 s / it) * Acc@1 71.746 Acc@5 90.910 loss 1.071 Accuracy of the model EMA on 50000 test images: 71.7% Max EMA accuracy: 71.75% Epoch: [263] [ 0/156] eta: 0:33:46 lr: 0.000341 min_lr: 0.000341 loss: 3.1876 (3.1876) weight_decay: 0.0500 (0.0500) time: 12.9881 data: 9.0971 max mem: 55573 Epoch: [263] [ 10/156] eta: 0:04:14 lr: 0.000340 min_lr: 0.000340 loss: 3.0054 (2.9130) weight_decay: 0.0500 (0.0500) time: 1.7419 data: 0.8274 max mem: 55573 Epoch: [263] [ 20/156] eta: 0:02:42 lr: 0.000338 min_lr: 0.000338 loss: 3.0456 (3.0095) weight_decay: 0.0500 (0.0500) time: 0.6041 data: 0.0004 max mem: 55573 Epoch: [263] [ 30/156] eta: 0:02:05 lr: 0.000337 min_lr: 0.000337 loss: 3.0223 (2.9204) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [263] [ 40/156] eta: 0:01:44 lr: 0.000336 min_lr: 0.000336 loss: 3.0071 (2.9674) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [263] [ 50/156] eta: 0:01:28 lr: 0.000335 min_lr: 0.000335 loss: 3.1295 (2.9658) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0004 max mem: 55573 Epoch: [263] [ 60/156] eta: 0:01:16 lr: 0.000334 min_lr: 0.000334 loss: 2.9793 (2.9590) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [263] [ 70/156] eta: 0:01:06 lr: 0.000333 min_lr: 0.000333 loss: 3.1265 (2.9743) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [263] [ 80/156] eta: 0:00:56 lr: 0.000331 min_lr: 0.000331 loss: 3.0273 (2.9453) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [263] [ 90/156] eta: 0:00:48 lr: 0.000330 min_lr: 0.000330 loss: 3.1652 (2.9685) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [263] [100/156] eta: 0:00:40 lr: 0.000329 min_lr: 0.000329 loss: 3.2533 (2.9676) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [263] [110/156] eta: 0:00:32 lr: 0.000328 min_lr: 0.000328 loss: 3.1120 (2.9849) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [263] [120/156] eta: 0:00:25 lr: 0.000327 min_lr: 0.000327 loss: 3.1710 (2.9950) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [263] [130/156] eta: 0:00:17 lr: 0.000326 min_lr: 0.000326 loss: 3.0840 (2.9880) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0010 max mem: 55573 Epoch: [263] [140/156] eta: 0:00:10 lr: 0.000325 min_lr: 0.000325 loss: 2.8981 (2.9735) weight_decay: 0.0500 (0.0500) time: 0.5857 data: 0.0008 max mem: 55573 Epoch: [263] [150/156] eta: 0:00:04 lr: 0.000324 min_lr: 0.000324 loss: 2.8909 (2.9727) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [263] [155/156] eta: 0:00:00 lr: 0.000323 min_lr: 0.000323 loss: 3.0125 (2.9750) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [263] Total time: 0:01:45 (0.6771 s / it) Averaged stats: lr: 0.000323 min_lr: 0.000323 loss: 3.0125 (2.8937) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.8238 (0.8238) acc1: 85.1562 (85.1562) acc5: 97.3958 (97.3958) time: 7.1451 data: 6.9077 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9314 (0.9102) acc1: 81.3802 (80.4800) acc5: 96.2240 (95.8080) time: 1.5754 data: 1.3816 max mem: 55573 Test: Total time: 0:00:08 (1.6197 s / it) * Acc@1 81.438 Acc@5 95.792 loss 0.892 Accuracy of the model on the 50000 test images: 81.4% Max accuracy: 81.44% Test: [0/5] eta: 0:00:33 loss: 1.0021 (1.0021) acc1: 75.2604 (75.2604) acc5: 93.0990 (93.0990) time: 6.7269 data: 6.4907 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0318 (1.0502) acc1: 73.1771 (70.9760) acc5: 93.0990 (90.6880) time: 1.4906 data: 1.2982 max mem: 55573 Test: Total time: 0:00:07 (1.5088 s / it) * Acc@1 71.770 Acc@5 90.936 loss 1.070 Accuracy of the model EMA on 50000 test images: 71.8% Max EMA accuracy: 71.77% Epoch: [264] [ 0/156] eta: 0:34:55 lr: 0.000323 min_lr: 0.000323 loss: 2.1688 (2.1688) weight_decay: 0.0500 (0.0500) time: 13.4342 data: 12.2308 max mem: 55573 Epoch: [264] [ 10/156] eta: 0:04:18 lr: 0.000322 min_lr: 0.000322 loss: 3.0480 (2.8455) weight_decay: 0.0500 (0.0500) time: 1.7705 data: 1.1125 max mem: 55573 Epoch: [264] [ 20/156] eta: 0:02:44 lr: 0.000321 min_lr: 0.000321 loss: 3.0487 (2.8925) weight_decay: 0.0500 (0.0500) time: 0.5970 data: 0.0006 max mem: 55573 Epoch: [264] [ 30/156] eta: 0:02:07 lr: 0.000319 min_lr: 0.000319 loss: 3.0600 (2.9023) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [264] [ 40/156] eta: 0:01:45 lr: 0.000318 min_lr: 0.000318 loss: 2.7512 (2.8595) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [264] [ 50/156] eta: 0:01:29 lr: 0.000317 min_lr: 0.000317 loss: 2.7512 (2.8438) weight_decay: 0.0500 (0.0500) time: 0.5978 data: 0.0004 max mem: 55573 Epoch: [264] [ 60/156] eta: 0:01:17 lr: 0.000316 min_lr: 0.000316 loss: 2.9402 (2.8437) weight_decay: 0.0500 (0.0500) time: 0.5971 data: 0.0004 max mem: 55573 Epoch: [264] [ 70/156] eta: 0:01:06 lr: 0.000315 min_lr: 0.000315 loss: 2.9885 (2.8372) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0004 max mem: 55573 Epoch: [264] [ 80/156] eta: 0:00:57 lr: 0.000314 min_lr: 0.000314 loss: 2.8966 (2.8333) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [264] [ 90/156] eta: 0:00:48 lr: 0.000313 min_lr: 0.000313 loss: 2.9865 (2.8574) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [264] [100/156] eta: 0:00:40 lr: 0.000312 min_lr: 0.000312 loss: 3.1369 (2.8870) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [264] [110/156] eta: 0:00:32 lr: 0.000311 min_lr: 0.000311 loss: 3.1578 (2.8992) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [264] [120/156] eta: 0:00:25 lr: 0.000309 min_lr: 0.000309 loss: 3.0884 (2.9003) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [264] [130/156] eta: 0:00:17 lr: 0.000308 min_lr: 0.000308 loss: 3.0144 (2.9003) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0011 max mem: 55573 Epoch: [264] [140/156] eta: 0:00:10 lr: 0.000307 min_lr: 0.000307 loss: 2.9853 (2.8949) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0009 max mem: 55573 Epoch: [264] [150/156] eta: 0:00:04 lr: 0.000306 min_lr: 0.000306 loss: 2.9853 (2.8901) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [264] [155/156] eta: 0:00:00 lr: 0.000306 min_lr: 0.000306 loss: 2.9865 (2.8790) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [264] Total time: 0:01:46 (0.6801 s / it) Averaged stats: lr: 0.000306 min_lr: 0.000306 loss: 2.9865 (2.9031) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.7493 (0.7493) acc1: 85.4167 (85.4167) acc5: 97.5260 (97.5260) time: 6.6617 data: 6.4246 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8910 (0.8612) acc1: 80.9896 (80.7040) acc5: 97.2656 (95.9680) time: 1.4787 data: 1.2850 max mem: 55573 Test: Total time: 0:00:07 (1.5255 s / it) * Acc@1 81.594 Acc@5 95.906 loss 0.846 Accuracy of the model on the 50000 test images: 81.6% Max accuracy: 81.59% Test: [0/5] eta: 0:00:36 loss: 1.0008 (1.0008) acc1: 75.2604 (75.2604) acc5: 93.0990 (93.0990) time: 7.2770 data: 7.0409 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0306 (1.0492) acc1: 73.1771 (70.9440) acc5: 93.0990 (90.6880) time: 1.6008 data: 1.4083 max mem: 55573 Test: Total time: 0:00:08 (1.6206 s / it) * Acc@1 71.792 Acc@5 90.954 loss 1.069 Accuracy of the model EMA on 50000 test images: 71.8% Max EMA accuracy: 71.79% Epoch: [265] [ 0/156] eta: 0:31:33 lr: 0.000305 min_lr: 0.000305 loss: 2.0269 (2.0269) weight_decay: 0.0500 (0.0500) time: 12.1368 data: 10.6787 max mem: 55573 Epoch: [265] [ 10/156] eta: 0:04:08 lr: 0.000304 min_lr: 0.000304 loss: 2.9988 (2.8009) weight_decay: 0.0500 (0.0500) time: 1.7023 data: 1.0080 max mem: 55573 Epoch: [265] [ 20/156] eta: 0:02:39 lr: 0.000303 min_lr: 0.000303 loss: 2.9988 (2.8437) weight_decay: 0.0500 (0.0500) time: 0.6239 data: 0.0207 max mem: 55573 Epoch: [265] [ 30/156] eta: 0:02:04 lr: 0.000302 min_lr: 0.000302 loss: 3.1459 (2.8635) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [265] [ 40/156] eta: 0:01:43 lr: 0.000301 min_lr: 0.000301 loss: 3.1344 (2.9092) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [265] [ 50/156] eta: 0:01:27 lr: 0.000300 min_lr: 0.000300 loss: 2.9664 (2.9137) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [265] [ 60/156] eta: 0:01:15 lr: 0.000299 min_lr: 0.000299 loss: 2.9664 (2.9160) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [265] [ 70/156] eta: 0:01:05 lr: 0.000298 min_lr: 0.000298 loss: 3.0165 (2.9094) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [265] [ 80/156] eta: 0:00:56 lr: 0.000297 min_lr: 0.000297 loss: 2.8801 (2.8881) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [265] [ 90/156] eta: 0:00:47 lr: 0.000296 min_lr: 0.000296 loss: 2.8796 (2.8977) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [265] [100/156] eta: 0:00:39 lr: 0.000295 min_lr: 0.000295 loss: 2.8727 (2.8801) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [265] [110/156] eta: 0:00:32 lr: 0.000293 min_lr: 0.000293 loss: 2.9514 (2.8811) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [265] [120/156] eta: 0:00:24 lr: 0.000292 min_lr: 0.000292 loss: 3.0595 (2.8849) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [265] [130/156] eta: 0:00:17 lr: 0.000291 min_lr: 0.000291 loss: 3.0083 (2.8724) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0010 max mem: 55573 Epoch: [265] [140/156] eta: 0:00:10 lr: 0.000290 min_lr: 0.000290 loss: 3.0046 (2.8720) weight_decay: 0.0500 (0.0500) time: 0.5853 data: 0.0009 max mem: 55573 Epoch: [265] [150/156] eta: 0:00:04 lr: 0.000289 min_lr: 0.000289 loss: 3.0046 (2.8720) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [265] [155/156] eta: 0:00:00 lr: 0.000289 min_lr: 0.000289 loss: 3.0046 (2.8728) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [265] Total time: 0:01:45 (0.6742 s / it) Averaged stats: lr: 0.000289 min_lr: 0.000289 loss: 3.0046 (2.9060) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.7970 (0.7970) acc1: 84.2448 (84.2448) acc5: 97.5260 (97.5260) time: 7.0308 data: 6.7933 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9168 (0.9011) acc1: 80.4688 (80.1600) acc5: 97.1354 (96.1920) time: 1.5526 data: 1.3587 max mem: 55573 Test: Total time: 0:00:08 (1.6007 s / it) * Acc@1 81.428 Acc@5 95.764 loss 0.888 Accuracy of the model on the 50000 test images: 81.4% Max accuracy: 81.59% Test: [0/5] eta: 0:00:34 loss: 0.9991 (0.9991) acc1: 75.2604 (75.2604) acc5: 93.2292 (93.2292) time: 6.8546 data: 6.6185 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0291 (1.0477) acc1: 73.1771 (71.0080) acc5: 93.2292 (90.7200) time: 1.5163 data: 1.3238 max mem: 55573 Test: Total time: 0:00:07 (1.5341 s / it) * Acc@1 71.820 Acc@5 90.992 loss 1.067 Accuracy of the model EMA on 50000 test images: 71.8% Max EMA accuracy: 71.82% Epoch: [266] [ 0/156] eta: 0:34:10 lr: 0.000289 min_lr: 0.000289 loss: 1.9500 (1.9500) weight_decay: 0.0500 (0.0500) time: 13.1437 data: 7.9771 max mem: 55573 Epoch: [266] [ 10/156] eta: 0:04:14 lr: 0.000287 min_lr: 0.000287 loss: 2.9130 (2.6579) weight_decay: 0.0500 (0.0500) time: 1.7448 data: 0.7376 max mem: 55573 Epoch: [266] [ 20/156] eta: 0:02:42 lr: 0.000286 min_lr: 0.000286 loss: 3.0565 (2.8202) weight_decay: 0.0500 (0.0500) time: 0.5978 data: 0.0070 max mem: 55573 Epoch: [266] [ 30/156] eta: 0:02:05 lr: 0.000285 min_lr: 0.000285 loss: 3.0629 (2.7700) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [266] [ 40/156] eta: 0:01:44 lr: 0.000284 min_lr: 0.000284 loss: 2.9586 (2.7967) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [266] [ 50/156] eta: 0:01:28 lr: 0.000283 min_lr: 0.000283 loss: 2.9686 (2.7991) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [266] [ 60/156] eta: 0:01:16 lr: 0.000282 min_lr: 0.000282 loss: 2.9586 (2.8085) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [266] [ 70/156] eta: 0:01:06 lr: 0.000281 min_lr: 0.000281 loss: 3.1175 (2.8643) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [266] [ 80/156] eta: 0:00:56 lr: 0.000280 min_lr: 0.000280 loss: 3.0374 (2.8778) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [266] [ 90/156] eta: 0:00:48 lr: 0.000279 min_lr: 0.000279 loss: 3.0374 (2.8947) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [266] [100/156] eta: 0:00:40 lr: 0.000278 min_lr: 0.000278 loss: 3.0375 (2.9006) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [266] [110/156] eta: 0:00:32 lr: 0.000277 min_lr: 0.000277 loss: 2.8785 (2.8771) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [266] [120/156] eta: 0:00:25 lr: 0.000276 min_lr: 0.000276 loss: 2.8723 (2.8759) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [266] [130/156] eta: 0:00:17 lr: 0.000275 min_lr: 0.000275 loss: 2.9773 (2.8775) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0011 max mem: 55573 Epoch: [266] [140/156] eta: 0:00:10 lr: 0.000274 min_lr: 0.000274 loss: 2.8768 (2.8637) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0009 max mem: 55573 Epoch: [266] [150/156] eta: 0:00:04 lr: 0.000273 min_lr: 0.000273 loss: 2.7045 (2.8467) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [266] [155/156] eta: 0:00:00 lr: 0.000272 min_lr: 0.000272 loss: 2.7601 (2.8534) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [266] Total time: 0:01:45 (0.6774 s / it) Averaged stats: lr: 0.000272 min_lr: 0.000272 loss: 2.7601 (2.8738) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.7754 (0.7754) acc1: 84.8958 (84.8958) acc5: 97.3958 (97.3958) time: 7.0181 data: 6.7806 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8946 (0.8679) acc1: 81.7708 (80.9600) acc5: 96.2240 (95.8080) time: 1.5500 data: 1.3562 max mem: 55573 Test: Total time: 0:00:07 (1.5962 s / it) * Acc@1 81.570 Acc@5 95.860 loss 0.850 Accuracy of the model on the 50000 test images: 81.6% Max accuracy: 81.59% Test: [0/5] eta: 0:00:35 loss: 0.9978 (0.9978) acc1: 75.5208 (75.5208) acc5: 93.2292 (93.2292) time: 7.1616 data: 6.9254 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0280 (1.0465) acc1: 73.3073 (71.1360) acc5: 93.2292 (90.7200) time: 1.5777 data: 1.3852 max mem: 55573 Test: Total time: 0:00:07 (1.5943 s / it) * Acc@1 71.876 Acc@5 91.004 loss 1.066 Accuracy of the model EMA on 50000 test images: 71.9% Max EMA accuracy: 71.88% Epoch: [267] [ 0/156] eta: 0:32:56 lr: 0.000272 min_lr: 0.000272 loss: 3.1005 (3.1005) weight_decay: 0.0500 (0.0500) time: 12.6680 data: 12.0716 max mem: 55573 Epoch: [267] [ 10/156] eta: 0:04:09 lr: 0.000271 min_lr: 0.000271 loss: 3.0704 (2.8159) weight_decay: 0.0500 (0.0500) time: 1.7089 data: 1.0978 max mem: 55573 Epoch: [267] [ 20/156] eta: 0:02:40 lr: 0.000270 min_lr: 0.000270 loss: 3.0704 (2.8178) weight_decay: 0.0500 (0.0500) time: 0.6028 data: 0.0005 max mem: 55573 Epoch: [267] [ 30/156] eta: 0:02:04 lr: 0.000269 min_lr: 0.000269 loss: 3.0643 (2.8176) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0006 max mem: 55573 Epoch: [267] [ 40/156] eta: 0:01:43 lr: 0.000268 min_lr: 0.000268 loss: 2.8956 (2.8432) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [267] [ 50/156] eta: 0:01:28 lr: 0.000267 min_lr: 0.000267 loss: 2.7596 (2.8139) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [267] [ 60/156] eta: 0:01:16 lr: 0.000266 min_lr: 0.000266 loss: 2.8994 (2.8390) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [267] [ 70/156] eta: 0:01:05 lr: 0.000265 min_lr: 0.000265 loss: 2.8301 (2.8214) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [267] [ 80/156] eta: 0:00:56 lr: 0.000264 min_lr: 0.000264 loss: 2.8301 (2.8196) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0005 max mem: 55573 Epoch: [267] [ 90/156] eta: 0:00:47 lr: 0.000263 min_lr: 0.000263 loss: 2.9773 (2.8299) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [267] [100/156] eta: 0:00:39 lr: 0.000262 min_lr: 0.000262 loss: 2.9307 (2.8252) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [267] [110/156] eta: 0:00:32 lr: 0.000261 min_lr: 0.000261 loss: 3.0279 (2.8372) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [267] [120/156] eta: 0:00:24 lr: 0.000260 min_lr: 0.000260 loss: 2.9123 (2.8265) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [267] [130/156] eta: 0:00:17 lr: 0.000259 min_lr: 0.000259 loss: 2.9123 (2.8322) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0013 max mem: 55573 Epoch: [267] [140/156] eta: 0:00:10 lr: 0.000258 min_lr: 0.000258 loss: 3.1719 (2.8425) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0012 max mem: 55573 Epoch: [267] [150/156] eta: 0:00:04 lr: 0.000257 min_lr: 0.000257 loss: 2.9450 (2.8435) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0002 max mem: 55573 Epoch: [267] [155/156] eta: 0:00:00 lr: 0.000256 min_lr: 0.000256 loss: 2.8096 (2.8360) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [267] Total time: 0:01:45 (0.6751 s / it) Averaged stats: lr: 0.000256 min_lr: 0.000256 loss: 2.8096 (2.8918) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.7924 (0.7924) acc1: 84.8958 (84.8958) acc5: 97.5260 (97.5260) time: 7.0536 data: 6.8162 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8892 (0.8671) acc1: 82.8125 (80.9600) acc5: 96.2264 (96.0000) time: 1.5570 data: 1.3633 max mem: 55573 Test: Total time: 0:00:07 (1.5996 s / it) * Acc@1 81.624 Acc@5 95.834 loss 0.854 Accuracy of the model on the 50000 test images: 81.6% Max accuracy: 81.62% Test: [0/5] eta: 0:00:34 loss: 0.9963 (0.9963) acc1: 75.5208 (75.5208) acc5: 93.3594 (93.3594) time: 6.9512 data: 6.7153 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0268 (1.0453) acc1: 73.3073 (71.1360) acc5: 93.3594 (90.7520) time: 1.5356 data: 1.3432 max mem: 55573 Test: Total time: 0:00:07 (1.5553 s / it) * Acc@1 71.900 Acc@5 91.020 loss 1.065 Accuracy of the model EMA on 50000 test images: 71.9% Max EMA accuracy: 71.90% Epoch: [268] [ 0/156] eta: 0:35:04 lr: 0.000256 min_lr: 0.000256 loss: 3.2994 (3.2994) weight_decay: 0.0500 (0.0500) time: 13.4906 data: 12.0033 max mem: 55573 Epoch: [268] [ 10/156] eta: 0:04:24 lr: 0.000255 min_lr: 0.000255 loss: 2.9986 (2.8224) weight_decay: 0.0500 (0.0500) time: 1.8090 data: 1.1404 max mem: 55573 Epoch: [268] [ 20/156] eta: 0:02:47 lr: 0.000254 min_lr: 0.000254 loss: 2.9964 (2.7410) weight_decay: 0.0500 (0.0500) time: 0.6153 data: 0.0275 max mem: 55573 Epoch: [268] [ 30/156] eta: 0:02:08 lr: 0.000253 min_lr: 0.000253 loss: 2.5462 (2.7029) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0007 max mem: 55573 Epoch: [268] [ 40/156] eta: 0:01:46 lr: 0.000252 min_lr: 0.000252 loss: 2.8679 (2.7747) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0006 max mem: 55573 Epoch: [268] [ 50/156] eta: 0:01:30 lr: 0.000251 min_lr: 0.000251 loss: 3.0400 (2.7980) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0006 max mem: 55573 Epoch: [268] [ 60/156] eta: 0:01:17 lr: 0.000250 min_lr: 0.000250 loss: 3.0254 (2.8008) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0008 max mem: 55573 Epoch: [268] [ 70/156] eta: 0:01:07 lr: 0.000249 min_lr: 0.000249 loss: 3.0189 (2.8246) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0009 max mem: 55573 Epoch: [268] [ 80/156] eta: 0:00:57 lr: 0.000248 min_lr: 0.000248 loss: 3.0146 (2.8351) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0009 max mem: 55573 Epoch: [268] [ 90/156] eta: 0:00:48 lr: 0.000247 min_lr: 0.000247 loss: 3.0146 (2.8387) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0009 max mem: 55573 Epoch: [268] [100/156] eta: 0:00:40 lr: 0.000246 min_lr: 0.000246 loss: 3.1794 (2.8740) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0008 max mem: 55573 Epoch: [268] [110/156] eta: 0:00:32 lr: 0.000245 min_lr: 0.000245 loss: 3.1049 (2.8670) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0009 max mem: 55573 Epoch: [268] [120/156] eta: 0:00:25 lr: 0.000244 min_lr: 0.000244 loss: 2.8424 (2.8576) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0008 max mem: 55573 Epoch: [268] [130/156] eta: 0:00:18 lr: 0.000243 min_lr: 0.000243 loss: 2.8424 (2.8509) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0017 max mem: 55573 Epoch: [268] [140/156] eta: 0:00:10 lr: 0.000242 min_lr: 0.000242 loss: 2.9300 (2.8573) weight_decay: 0.0500 (0.0500) time: 0.5886 data: 0.0013 max mem: 55573 Epoch: [268] [150/156] eta: 0:00:04 lr: 0.000241 min_lr: 0.000241 loss: 2.9830 (2.8730) weight_decay: 0.0500 (0.0500) time: 0.5834 data: 0.0001 max mem: 55573 Epoch: [268] [155/156] eta: 0:00:00 lr: 0.000241 min_lr: 0.000241 loss: 2.9983 (2.8816) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [268] Total time: 0:01:46 (0.6822 s / it) Averaged stats: lr: 0.000241 min_lr: 0.000241 loss: 2.9983 (2.8931) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8810 (0.8810) acc1: 84.7656 (84.7656) acc5: 97.6562 (97.6562) time: 6.6305 data: 6.3934 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9746 (0.9619) acc1: 81.3802 (80.8000) acc5: 96.8750 (96.0000) time: 1.4726 data: 1.2788 max mem: 55573 Test: Total time: 0:00:07 (1.5256 s / it) * Acc@1 81.458 Acc@5 95.938 loss 0.945 Accuracy of the model on the 50000 test images: 81.5% Max accuracy: 81.62% Test: [0/5] eta: 0:00:34 loss: 0.9948 (0.9948) acc1: 75.5208 (75.5208) acc5: 93.3594 (93.3594) time: 6.9925 data: 6.7565 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0252 (1.0439) acc1: 73.4375 (71.2000) acc5: 93.3594 (90.7840) time: 1.5438 data: 1.3514 max mem: 55573 Test: Total time: 0:00:07 (1.5635 s / it) * Acc@1 71.944 Acc@5 91.040 loss 1.063 Accuracy of the model EMA on 50000 test images: 71.9% Max EMA accuracy: 71.94% Epoch: [269] [ 0/156] eta: 0:35:53 lr: 0.000240 min_lr: 0.000240 loss: 3.1547 (3.1547) weight_decay: 0.0500 (0.0500) time: 13.8031 data: 8.6513 max mem: 55573 Epoch: [269] [ 10/156] eta: 0:04:21 lr: 0.000240 min_lr: 0.000240 loss: 2.9185 (2.6801) weight_decay: 0.0500 (0.0500) time: 1.7915 data: 0.7868 max mem: 55573 Epoch: [269] [ 20/156] eta: 0:02:45 lr: 0.000239 min_lr: 0.000239 loss: 3.0589 (2.8956) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [269] [ 30/156] eta: 0:02:08 lr: 0.000238 min_lr: 0.000238 loss: 3.1496 (2.9056) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [269] [ 40/156] eta: 0:01:45 lr: 0.000237 min_lr: 0.000237 loss: 3.0737 (2.9178) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [269] [ 50/156] eta: 0:01:30 lr: 0.000236 min_lr: 0.000236 loss: 2.8693 (2.8944) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [269] [ 60/156] eta: 0:01:17 lr: 0.000235 min_lr: 0.000235 loss: 2.8160 (2.8732) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [269] [ 70/156] eta: 0:01:06 lr: 0.000234 min_lr: 0.000234 loss: 2.9254 (2.8828) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [269] [ 80/156] eta: 0:00:57 lr: 0.000233 min_lr: 0.000233 loss: 3.0773 (2.8880) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [269] [ 90/156] eta: 0:00:48 lr: 0.000232 min_lr: 0.000232 loss: 3.0324 (2.8974) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [269] [100/156] eta: 0:00:40 lr: 0.000231 min_lr: 0.000231 loss: 3.0407 (2.8925) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0004 max mem: 55573 Epoch: [269] [110/156] eta: 0:00:32 lr: 0.000230 min_lr: 0.000230 loss: 3.1261 (2.8965) weight_decay: 0.0500 (0.0500) time: 0.5933 data: 0.0004 max mem: 55573 Epoch: [269] [120/156] eta: 0:00:25 lr: 0.000229 min_lr: 0.000229 loss: 2.9741 (2.8878) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [269] [130/156] eta: 0:00:17 lr: 0.000228 min_lr: 0.000228 loss: 2.9741 (2.8669) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0010 max mem: 55573 Epoch: [269] [140/156] eta: 0:00:10 lr: 0.000227 min_lr: 0.000227 loss: 3.0449 (2.8755) weight_decay: 0.0500 (0.0500) time: 0.5874 data: 0.0009 max mem: 55573 Epoch: [269] [150/156] eta: 0:00:04 lr: 0.000226 min_lr: 0.000226 loss: 3.0082 (2.8824) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0002 max mem: 55573 Epoch: [269] [155/156] eta: 0:00:00 lr: 0.000226 min_lr: 0.000226 loss: 2.8860 (2.8853) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0002 max mem: 55573 Epoch: [269] Total time: 0:01:46 (0.6812 s / it) Averaged stats: lr: 0.000226 min_lr: 0.000226 loss: 2.8860 (2.8851) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8288 (0.8288) acc1: 85.0260 (85.0260) acc5: 97.3958 (97.3958) time: 6.9102 data: 6.6706 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9264 (0.9141) acc1: 81.5104 (80.8000) acc5: 96.8750 (95.8080) time: 1.5288 data: 1.3342 max mem: 55573 Test: Total time: 0:00:07 (1.5827 s / it) * Acc@1 81.536 Acc@5 95.834 loss 0.900 Accuracy of the model on the 50000 test images: 81.5% Max accuracy: 81.62% Test: [0/5] eta: 0:00:36 loss: 0.9933 (0.9933) acc1: 75.5208 (75.5208) acc5: 93.3594 (93.3594) time: 7.3407 data: 7.1047 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0239 (1.0425) acc1: 73.5677 (71.2960) acc5: 93.3594 (90.8480) time: 1.6134 data: 1.4210 max mem: 55573 Test: Total time: 0:00:08 (1.6356 s / it) * Acc@1 71.988 Acc@5 91.068 loss 1.062 Accuracy of the model EMA on 50000 test images: 72.0% Max EMA accuracy: 71.99% Epoch: [270] [ 0/156] eta: 0:33:30 lr: 0.000225 min_lr: 0.000225 loss: 3.3509 (3.3509) weight_decay: 0.0500 (0.0500) time: 12.8899 data: 11.8927 max mem: 55573 Epoch: [270] [ 10/156] eta: 0:04:16 lr: 0.000224 min_lr: 0.000224 loss: 2.8087 (2.7720) weight_decay: 0.0500 (0.0500) time: 1.7546 data: 1.1157 max mem: 55573 Epoch: [270] [ 20/156] eta: 0:02:43 lr: 0.000224 min_lr: 0.000224 loss: 2.8701 (2.9050) weight_decay: 0.0500 (0.0500) time: 0.6163 data: 0.0192 max mem: 55573 Epoch: [270] [ 30/156] eta: 0:02:06 lr: 0.000223 min_lr: 0.000223 loss: 2.9750 (2.8910) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [270] [ 40/156] eta: 0:01:44 lr: 0.000222 min_lr: 0.000222 loss: 2.8659 (2.8468) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0004 max mem: 55573 Epoch: [270] [ 50/156] eta: 0:01:29 lr: 0.000221 min_lr: 0.000221 loss: 2.8659 (2.8432) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0004 max mem: 55573 Epoch: [270] [ 60/156] eta: 0:01:16 lr: 0.000220 min_lr: 0.000220 loss: 3.1727 (2.8808) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.0004 max mem: 55573 Epoch: [270] [ 70/156] eta: 0:01:06 lr: 0.000219 min_lr: 0.000219 loss: 3.1105 (2.8865) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0004 max mem: 55573 Epoch: [270] [ 80/156] eta: 0:00:56 lr: 0.000218 min_lr: 0.000218 loss: 3.1105 (2.9081) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [270] [ 90/156] eta: 0:00:48 lr: 0.000217 min_lr: 0.000217 loss: 3.0267 (2.8943) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [270] [100/156] eta: 0:00:40 lr: 0.000216 min_lr: 0.000216 loss: 2.9689 (2.9033) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [270] [110/156] eta: 0:00:32 lr: 0.000215 min_lr: 0.000215 loss: 2.9698 (2.9019) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [270] [120/156] eta: 0:00:25 lr: 0.000214 min_lr: 0.000214 loss: 3.0649 (2.9093) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [270] [130/156] eta: 0:00:17 lr: 0.000213 min_lr: 0.000213 loss: 2.9174 (2.8961) weight_decay: 0.0500 (0.0500) time: 0.5889 data: 0.0010 max mem: 55573 Epoch: [270] [140/156] eta: 0:00:10 lr: 0.000212 min_lr: 0.000212 loss: 3.0142 (2.9157) weight_decay: 0.0500 (0.0500) time: 0.5854 data: 0.0009 max mem: 55573 Epoch: [270] [150/156] eta: 0:00:04 lr: 0.000211 min_lr: 0.000211 loss: 2.9378 (2.8910) weight_decay: 0.0500 (0.0500) time: 0.5832 data: 0.0001 max mem: 55573 Epoch: [270] [155/156] eta: 0:00:00 lr: 0.000211 min_lr: 0.000211 loss: 2.7498 (2.8863) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [270] Total time: 0:01:45 (0.6789 s / it) Averaged stats: lr: 0.000211 min_lr: 0.000211 loss: 2.7498 (2.8759) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.7849 (0.7849) acc1: 84.1146 (84.1146) acc5: 97.1354 (97.1354) time: 6.8922 data: 6.6548 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8878 (0.8763) acc1: 81.1198 (80.3840) acc5: 96.2264 (96.0320) time: 1.5250 data: 1.3310 max mem: 55573 Test: Total time: 0:00:07 (1.5696 s / it) * Acc@1 81.686 Acc@5 95.888 loss 0.860 Accuracy of the model on the 50000 test images: 81.7% Max accuracy: 81.69% Test: [0/5] eta: 0:00:35 loss: 0.9916 (0.9916) acc1: 75.6510 (75.6510) acc5: 93.3594 (93.3594) time: 7.0966 data: 6.8606 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0223 (1.0410) acc1: 73.6979 (71.3600) acc5: 93.3594 (90.8800) time: 1.5646 data: 1.3722 max mem: 55573 Test: Total time: 0:00:07 (1.5816 s / it) * Acc@1 72.024 Acc@5 91.088 loss 1.060 Accuracy of the model EMA on 50000 test images: 72.0% Max EMA accuracy: 72.02% Epoch: [271] [ 0/156] eta: 0:32:12 lr: 0.000211 min_lr: 0.000211 loss: 2.9731 (2.9731) weight_decay: 0.0500 (0.0500) time: 12.3903 data: 11.7915 max mem: 55573 Epoch: [271] [ 10/156] eta: 0:04:09 lr: 0.000210 min_lr: 0.000210 loss: 3.0375 (2.8147) weight_decay: 0.0500 (0.0500) time: 1.7093 data: 1.1157 max mem: 55573 Epoch: [271] [ 20/156] eta: 0:02:40 lr: 0.000209 min_lr: 0.000209 loss: 2.9679 (2.7672) weight_decay: 0.0500 (0.0500) time: 0.6163 data: 0.0243 max mem: 55573 Epoch: [271] [ 30/156] eta: 0:02:04 lr: 0.000208 min_lr: 0.000208 loss: 3.0723 (2.8565) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [271] [ 40/156] eta: 0:01:43 lr: 0.000207 min_lr: 0.000207 loss: 2.9674 (2.8729) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [271] [ 50/156] eta: 0:01:28 lr: 0.000206 min_lr: 0.000206 loss: 3.0335 (2.9276) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [271] [ 60/156] eta: 0:01:16 lr: 0.000205 min_lr: 0.000205 loss: 3.0697 (2.9339) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [271] [ 70/156] eta: 0:01:05 lr: 0.000204 min_lr: 0.000204 loss: 2.9706 (2.9379) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [271] [ 80/156] eta: 0:00:56 lr: 0.000204 min_lr: 0.000204 loss: 2.9828 (2.9189) weight_decay: 0.0500 (0.0500) time: 0.5885 data: 0.0005 max mem: 55573 Epoch: [271] [ 90/156] eta: 0:00:47 lr: 0.000203 min_lr: 0.000203 loss: 2.8744 (2.9080) weight_decay: 0.0500 (0.0500) time: 0.5887 data: 0.0004 max mem: 55573 Epoch: [271] [100/156] eta: 0:00:39 lr: 0.000202 min_lr: 0.000202 loss: 2.9927 (2.9161) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [271] [110/156] eta: 0:00:32 lr: 0.000201 min_lr: 0.000201 loss: 2.9831 (2.8890) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [271] [120/156] eta: 0:00:24 lr: 0.000200 min_lr: 0.000200 loss: 3.0192 (2.9066) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [271] [130/156] eta: 0:00:17 lr: 0.000199 min_lr: 0.000199 loss: 3.1113 (2.9054) weight_decay: 0.0500 (0.0500) time: 0.5888 data: 0.0012 max mem: 55573 Epoch: [271] [140/156] eta: 0:00:10 lr: 0.000198 min_lr: 0.000198 loss: 2.8702 (2.8951) weight_decay: 0.0500 (0.0500) time: 0.5850 data: 0.0010 max mem: 55573 Epoch: [271] [150/156] eta: 0:00:04 lr: 0.000197 min_lr: 0.000197 loss: 3.0250 (2.9004) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [271] [155/156] eta: 0:00:00 lr: 0.000197 min_lr: 0.000197 loss: 3.0069 (2.9007) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [271] Total time: 0:01:45 (0.6743 s / it) Averaged stats: lr: 0.000197 min_lr: 0.000197 loss: 3.0069 (2.8870) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8084 (0.8084) acc1: 84.1146 (84.1146) acc5: 97.5260 (97.5260) time: 6.8773 data: 6.6394 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9106 (0.8865) acc1: 81.5104 (80.7680) acc5: 97.2656 (96.1600) time: 1.5221 data: 1.3280 max mem: 55573 Test: Total time: 0:00:07 (1.5657 s / it) * Acc@1 81.602 Acc@5 95.970 loss 0.872 Accuracy of the model on the 50000 test images: 81.6% Max accuracy: 81.69% Test: [0/5] eta: 0:00:35 loss: 0.9901 (0.9901) acc1: 75.6510 (75.6510) acc5: 93.3594 (93.3594) time: 7.1664 data: 6.9304 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0208 (1.0396) acc1: 73.6979 (71.3600) acc5: 93.3594 (90.8800) time: 1.5786 data: 1.3862 max mem: 55573 Test: Total time: 0:00:07 (1.5958 s / it) * Acc@1 72.050 Acc@5 91.102 loss 1.059 Accuracy of the model EMA on 50000 test images: 72.1% Max EMA accuracy: 72.05% Epoch: [272] [ 0/156] eta: 0:35:23 lr: 0.000197 min_lr: 0.000197 loss: 3.0785 (3.0785) weight_decay: 0.0500 (0.0500) time: 13.6136 data: 9.8098 max mem: 55573 Epoch: [272] [ 10/156] eta: 0:04:19 lr: 0.000196 min_lr: 0.000196 loss: 2.8848 (2.7486) weight_decay: 0.0500 (0.0500) time: 1.7802 data: 0.8922 max mem: 55573 Epoch: [272] [ 20/156] eta: 0:02:44 lr: 0.000195 min_lr: 0.000195 loss: 2.8848 (2.8555) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [272] [ 30/156] eta: 0:02:07 lr: 0.000194 min_lr: 0.000194 loss: 3.0463 (2.8513) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [272] [ 40/156] eta: 0:01:45 lr: 0.000193 min_lr: 0.000193 loss: 3.0463 (2.8526) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [272] [ 50/156] eta: 0:01:29 lr: 0.000192 min_lr: 0.000192 loss: 3.0224 (2.8676) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0005 max mem: 55573 Epoch: [272] [ 60/156] eta: 0:01:17 lr: 0.000191 min_lr: 0.000191 loss: 3.0030 (2.8710) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [272] [ 70/156] eta: 0:01:06 lr: 0.000191 min_lr: 0.000191 loss: 3.1715 (2.9004) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [272] [ 80/156] eta: 0:00:57 lr: 0.000190 min_lr: 0.000190 loss: 3.1406 (2.8986) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [272] [ 90/156] eta: 0:00:48 lr: 0.000189 min_lr: 0.000189 loss: 2.9815 (2.9084) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [272] [100/156] eta: 0:00:40 lr: 0.000188 min_lr: 0.000188 loss: 2.8916 (2.8962) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [272] [110/156] eta: 0:00:32 lr: 0.000187 min_lr: 0.000187 loss: 2.9755 (2.9030) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [272] [120/156] eta: 0:00:25 lr: 0.000186 min_lr: 0.000186 loss: 3.0085 (2.8951) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [272] [130/156] eta: 0:00:17 lr: 0.000185 min_lr: 0.000185 loss: 2.4400 (2.8545) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0011 max mem: 55573 Epoch: [272] [140/156] eta: 0:00:10 lr: 0.000184 min_lr: 0.000184 loss: 2.8369 (2.8741) weight_decay: 0.0500 (0.0500) time: 0.5876 data: 0.0009 max mem: 55573 Epoch: [272] [150/156] eta: 0:00:04 lr: 0.000184 min_lr: 0.000184 loss: 3.0467 (2.8728) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [272] [155/156] eta: 0:00:00 lr: 0.000183 min_lr: 0.000183 loss: 3.0467 (2.8740) weight_decay: 0.0500 (0.0500) time: 0.5813 data: 0.0001 max mem: 55573 Epoch: [272] Total time: 0:01:46 (0.6798 s / it) Averaged stats: lr: 0.000183 min_lr: 0.000183 loss: 3.0467 (2.8696) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.7914 (0.7914) acc1: 84.7656 (84.7656) acc5: 97.1354 (97.1354) time: 7.0456 data: 6.8068 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8801 (0.8709) acc1: 81.6406 (80.6080) acc5: 96.8750 (96.0640) time: 1.5558 data: 1.3615 max mem: 55573 Test: Total time: 0:00:08 (1.6015 s / it) * Acc@1 81.722 Acc@5 95.936 loss 0.858 Accuracy of the model on the 50000 test images: 81.7% Max accuracy: 81.72% Test: [0/5] eta: 0:00:34 loss: 0.9883 (0.9883) acc1: 75.6510 (75.6510) acc5: 93.3594 (93.3594) time: 6.9464 data: 6.7103 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0190 (1.0380) acc1: 73.6979 (71.3600) acc5: 93.3594 (90.9440) time: 1.5346 data: 1.3422 max mem: 55573 Test: Total time: 0:00:07 (1.5537 s / it) * Acc@1 72.084 Acc@5 91.140 loss 1.057 Accuracy of the model EMA on 50000 test images: 72.1% Max EMA accuracy: 72.08% Epoch: [273] [ 0/156] eta: 0:35:44 lr: 0.000183 min_lr: 0.000183 loss: 3.0868 (3.0868) weight_decay: 0.0500 (0.0500) time: 13.7493 data: 8.6278 max mem: 55573 Epoch: [273] [ 10/156] eta: 0:04:21 lr: 0.000182 min_lr: 0.000182 loss: 2.9065 (2.7109) weight_decay: 0.0500 (0.0500) time: 1.7894 data: 0.7849 max mem: 55573 Epoch: [273] [ 20/156] eta: 0:02:45 lr: 0.000181 min_lr: 0.000181 loss: 2.9065 (2.7549) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0008 max mem: 55573 Epoch: [273] [ 30/156] eta: 0:02:08 lr: 0.000181 min_lr: 0.000181 loss: 2.9043 (2.7482) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0008 max mem: 55573 Epoch: [273] [ 40/156] eta: 0:01:45 lr: 0.000180 min_lr: 0.000180 loss: 2.9043 (2.7710) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [273] [ 50/156] eta: 0:01:30 lr: 0.000179 min_lr: 0.000179 loss: 3.0610 (2.8060) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [273] [ 60/156] eta: 0:01:17 lr: 0.000178 min_lr: 0.000178 loss: 3.0965 (2.8346) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [273] [ 70/156] eta: 0:01:06 lr: 0.000177 min_lr: 0.000177 loss: 3.0055 (2.8116) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [273] [ 80/156] eta: 0:00:57 lr: 0.000176 min_lr: 0.000176 loss: 3.0055 (2.8196) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0005 max mem: 55573 Epoch: [273] [ 90/156] eta: 0:00:48 lr: 0.000175 min_lr: 0.000175 loss: 3.1073 (2.8258) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [273] [100/156] eta: 0:00:40 lr: 0.000175 min_lr: 0.000175 loss: 3.0083 (2.8117) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [273] [110/156] eta: 0:00:32 lr: 0.000174 min_lr: 0.000174 loss: 2.7577 (2.8077) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0006 max mem: 55573 Epoch: [273] [120/156] eta: 0:00:25 lr: 0.000173 min_lr: 0.000173 loss: 2.7572 (2.8070) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [273] [130/156] eta: 0:00:17 lr: 0.000172 min_lr: 0.000172 loss: 2.7707 (2.8186) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0022 max mem: 55573 Epoch: [273] [140/156] eta: 0:00:10 lr: 0.000171 min_lr: 0.000171 loss: 2.9528 (2.8186) weight_decay: 0.0500 (0.0500) time: 0.5856 data: 0.0020 max mem: 55573 Epoch: [273] [150/156] eta: 0:00:04 lr: 0.000170 min_lr: 0.000170 loss: 3.0684 (2.8393) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [273] [155/156] eta: 0:00:00 lr: 0.000170 min_lr: 0.000170 loss: 3.1031 (2.8410) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [273] Total time: 0:01:46 (0.6808 s / it) Averaged stats: lr: 0.000170 min_lr: 0.000170 loss: 3.1031 (2.8716) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8527 (0.8527) acc1: 84.2448 (84.2448) acc5: 97.3958 (97.3958) time: 6.7890 data: 6.5518 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9505 (0.9288) acc1: 80.9896 (80.3840) acc5: 96.7448 (95.9680) time: 1.5046 data: 1.3105 max mem: 55573 Test: Total time: 0:00:07 (1.5489 s / it) * Acc@1 81.670 Acc@5 95.908 loss 0.916 Accuracy of the model on the 50000 test images: 81.7% Max accuracy: 81.72% Test: [0/5] eta: 0:00:35 loss: 0.9865 (0.9865) acc1: 75.6510 (75.6510) acc5: 93.4896 (93.4896) time: 7.1342 data: 6.8982 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0174 (1.0363) acc1: 73.6979 (71.3920) acc5: 93.4896 (90.9760) time: 1.5722 data: 1.3797 max mem: 55573 Test: Total time: 0:00:07 (1.5879 s / it) * Acc@1 72.158 Acc@5 91.170 loss 1.055 Accuracy of the model EMA on 50000 test images: 72.2% Max EMA accuracy: 72.16% Epoch: [274] [ 0/156] eta: 0:36:00 lr: 0.000170 min_lr: 0.000170 loss: 3.2319 (3.2319) weight_decay: 0.0500 (0.0500) time: 13.8499 data: 11.5596 max mem: 55573 Epoch: [274] [ 10/156] eta: 0:04:22 lr: 0.000169 min_lr: 0.000169 loss: 2.8645 (2.7537) weight_decay: 0.0500 (0.0500) time: 1.8004 data: 1.0512 max mem: 55573 Epoch: [274] [ 20/156] eta: 0:02:46 lr: 0.000168 min_lr: 0.000168 loss: 2.8645 (2.8375) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0004 max mem: 55573 Epoch: [274] [ 30/156] eta: 0:02:08 lr: 0.000168 min_lr: 0.000168 loss: 3.0200 (2.8232) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [274] [ 40/156] eta: 0:01:46 lr: 0.000167 min_lr: 0.000167 loss: 2.8389 (2.7958) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [274] [ 50/156] eta: 0:01:30 lr: 0.000166 min_lr: 0.000166 loss: 2.8389 (2.8168) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [274] [ 60/156] eta: 0:01:17 lr: 0.000165 min_lr: 0.000165 loss: 3.0415 (2.8577) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [274] [ 70/156] eta: 0:01:06 lr: 0.000164 min_lr: 0.000164 loss: 2.9775 (2.8330) weight_decay: 0.0500 (0.0500) time: 0.5892 data: 0.0005 max mem: 55573 Epoch: [274] [ 80/156] eta: 0:00:57 lr: 0.000163 min_lr: 0.000163 loss: 2.9775 (2.8612) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [274] [ 90/156] eta: 0:00:48 lr: 0.000163 min_lr: 0.000163 loss: 2.9621 (2.8533) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [274] [100/156] eta: 0:00:40 lr: 0.000162 min_lr: 0.000162 loss: 2.9614 (2.8601) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [274] [110/156] eta: 0:00:32 lr: 0.000161 min_lr: 0.000161 loss: 2.9966 (2.8536) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [274] [120/156] eta: 0:00:25 lr: 0.000160 min_lr: 0.000160 loss: 3.0455 (2.8590) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [274] [130/156] eta: 0:00:17 lr: 0.000159 min_lr: 0.000159 loss: 3.1109 (2.8694) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0011 max mem: 55573 Epoch: [274] [140/156] eta: 0:00:10 lr: 0.000159 min_lr: 0.000159 loss: 3.0928 (2.8641) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0009 max mem: 55573 Epoch: [274] [150/156] eta: 0:00:04 lr: 0.000158 min_lr: 0.000158 loss: 2.5622 (2.8460) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [274] [155/156] eta: 0:00:00 lr: 0.000157 min_lr: 0.000157 loss: 2.7618 (2.8504) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [274] Total time: 0:01:46 (0.6809 s / it) Averaged stats: lr: 0.000157 min_lr: 0.000157 loss: 2.7618 (2.8822) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.8005 (0.8005) acc1: 84.3750 (84.3750) acc5: 97.1354 (97.1354) time: 7.0358 data: 6.7983 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8975 (0.8808) acc1: 81.1198 (80.8640) acc5: 97.1354 (96.0960) time: 1.5536 data: 1.3597 max mem: 55573 Test: Total time: 0:00:07 (1.5936 s / it) * Acc@1 81.718 Acc@5 95.984 loss 0.866 Accuracy of the model on the 50000 test images: 81.7% Max accuracy: 81.72% Test: [0/5] eta: 0:00:35 loss: 0.9846 (0.9846) acc1: 75.9115 (75.9115) acc5: 93.4896 (93.4896) time: 7.0228 data: 6.7864 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0154 (1.0345) acc1: 73.6979 (71.4880) acc5: 93.4896 (91.0720) time: 1.5499 data: 1.3574 max mem: 55573 Test: Total time: 0:00:07 (1.5710 s / it) * Acc@1 72.204 Acc@5 91.196 loss 1.053 Accuracy of the model EMA on 50000 test images: 72.2% Max EMA accuracy: 72.20% Epoch: [275] [ 0/156] eta: 0:30:31 lr: 0.000157 min_lr: 0.000157 loss: 2.6137 (2.6137) weight_decay: 0.0500 (0.0500) time: 11.7372 data: 8.6486 max mem: 55573 Epoch: [275] [ 10/156] eta: 0:04:00 lr: 0.000157 min_lr: 0.000157 loss: 2.8953 (2.8371) weight_decay: 0.0500 (0.0500) time: 1.6477 data: 0.7867 max mem: 55573 Epoch: [275] [ 20/156] eta: 0:02:35 lr: 0.000156 min_lr: 0.000156 loss: 2.8622 (2.8195) weight_decay: 0.0500 (0.0500) time: 0.6146 data: 0.0005 max mem: 55573 Epoch: [275] [ 30/156] eta: 0:02:01 lr: 0.000155 min_lr: 0.000155 loss: 2.8622 (2.8349) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [275] [ 40/156] eta: 0:01:41 lr: 0.000154 min_lr: 0.000154 loss: 2.6771 (2.7700) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0004 max mem: 55573 Epoch: [275] [ 50/156] eta: 0:01:26 lr: 0.000153 min_lr: 0.000153 loss: 2.5807 (2.7739) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0005 max mem: 55573 Epoch: [275] [ 60/156] eta: 0:01:15 lr: 0.000153 min_lr: 0.000153 loss: 2.7003 (2.7621) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0005 max mem: 55573 Epoch: [275] [ 70/156] eta: 0:01:04 lr: 0.000152 min_lr: 0.000152 loss: 2.9134 (2.7801) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [275] [ 80/156] eta: 0:00:55 lr: 0.000151 min_lr: 0.000151 loss: 2.9021 (2.7945) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [275] [ 90/156] eta: 0:00:47 lr: 0.000150 min_lr: 0.000150 loss: 3.0215 (2.8020) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [275] [100/156] eta: 0:00:39 lr: 0.000149 min_lr: 0.000149 loss: 3.0215 (2.7932) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0005 max mem: 55573 Epoch: [275] [110/156] eta: 0:00:32 lr: 0.000149 min_lr: 0.000149 loss: 2.8279 (2.8076) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [275] [120/156] eta: 0:00:24 lr: 0.000148 min_lr: 0.000148 loss: 2.8756 (2.8042) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [275] [130/156] eta: 0:00:17 lr: 0.000147 min_lr: 0.000147 loss: 2.9965 (2.8180) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0011 max mem: 55573 Epoch: [275] [140/156] eta: 0:00:10 lr: 0.000146 min_lr: 0.000146 loss: 3.0541 (2.8253) weight_decay: 0.0500 (0.0500) time: 0.5854 data: 0.0009 max mem: 55573 Epoch: [275] [150/156] eta: 0:00:04 lr: 0.000146 min_lr: 0.000146 loss: 2.9819 (2.8322) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [275] [155/156] eta: 0:00:00 lr: 0.000145 min_lr: 0.000145 loss: 2.9575 (2.8296) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [275] Total time: 0:01:44 (0.6711 s / it) Averaged stats: lr: 0.000145 min_lr: 0.000145 loss: 2.9575 (2.8688) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8135 (0.8135) acc1: 83.9844 (83.9844) acc5: 97.3958 (97.3958) time: 6.8242 data: 6.5870 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9098 (0.9029) acc1: 81.6406 (80.5440) acc5: 96.8750 (96.0000) time: 1.5102 data: 1.3175 max mem: 55573 Test: Total time: 0:00:07 (1.5282 s / it) * Acc@1 81.776 Acc@5 95.918 loss 0.888 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.78% Test: [0/5] eta: 0:00:32 loss: 0.9827 (0.9827) acc1: 76.1719 (76.1719) acc5: 93.4896 (93.4896) time: 6.4678 data: 6.2318 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0136 (1.0328) acc1: 73.6979 (71.6160) acc5: 93.4896 (91.0720) time: 1.4390 data: 1.2465 max mem: 55573 Test: Total time: 0:00:07 (1.4562 s / it) * Acc@1 72.262 Acc@5 91.212 loss 1.051 Accuracy of the model EMA on 50000 test images: 72.3% Max EMA accuracy: 72.26% Epoch: [276] [ 0/156] eta: 0:28:44 lr: 0.000145 min_lr: 0.000145 loss: 3.0251 (3.0251) weight_decay: 0.0500 (0.0500) time: 11.0558 data: 8.0143 max mem: 55573 Epoch: [276] [ 10/156] eta: 0:03:56 lr: 0.000144 min_lr: 0.000144 loss: 3.1052 (2.9076) weight_decay: 0.0500 (0.0500) time: 1.6186 data: 0.7290 max mem: 55573 Epoch: [276] [ 20/156] eta: 0:02:33 lr: 0.000144 min_lr: 0.000144 loss: 3.1052 (2.9666) weight_decay: 0.0500 (0.0500) time: 0.6332 data: 0.0005 max mem: 55573 Epoch: [276] [ 30/156] eta: 0:02:00 lr: 0.000143 min_lr: 0.000143 loss: 3.1001 (2.9804) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [276] [ 40/156] eta: 0:01:40 lr: 0.000142 min_lr: 0.000142 loss: 3.0867 (2.9893) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [276] [ 50/156] eta: 0:01:26 lr: 0.000141 min_lr: 0.000141 loss: 3.0753 (2.9518) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [276] [ 60/156] eta: 0:01:14 lr: 0.000141 min_lr: 0.000141 loss: 3.0753 (2.9585) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [276] [ 70/156] eta: 0:01:04 lr: 0.000140 min_lr: 0.000140 loss: 2.8266 (2.9242) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [276] [ 80/156] eta: 0:00:55 lr: 0.000139 min_lr: 0.000139 loss: 2.7481 (2.8809) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [276] [ 90/156] eta: 0:00:47 lr: 0.000138 min_lr: 0.000138 loss: 2.9235 (2.8942) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [276] [100/156] eta: 0:00:39 lr: 0.000138 min_lr: 0.000138 loss: 2.9235 (2.8796) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [276] [110/156] eta: 0:00:31 lr: 0.000137 min_lr: 0.000137 loss: 2.8841 (2.8694) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0005 max mem: 55573 Epoch: [276] [120/156] eta: 0:00:24 lr: 0.000136 min_lr: 0.000136 loss: 2.9660 (2.8864) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [276] [130/156] eta: 0:00:17 lr: 0.000135 min_lr: 0.000135 loss: 3.1339 (2.8915) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0010 max mem: 55573 Epoch: [276] [140/156] eta: 0:00:10 lr: 0.000135 min_lr: 0.000135 loss: 3.0665 (2.8919) weight_decay: 0.0500 (0.0500) time: 0.5878 data: 0.0009 max mem: 55573 Epoch: [276] [150/156] eta: 0:00:03 lr: 0.000134 min_lr: 0.000134 loss: 2.9620 (2.8959) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [276] [155/156] eta: 0:00:00 lr: 0.000134 min_lr: 0.000134 loss: 3.0800 (2.9020) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [276] Total time: 0:01:44 (0.6687 s / it) Averaged stats: lr: 0.000134 min_lr: 0.000134 loss: 3.0800 (2.8731) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8573 (0.8573) acc1: 84.5052 (84.5052) acc5: 97.0052 (97.0052) time: 6.7333 data: 6.4964 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9505 (0.9351) acc1: 81.6406 (80.5760) acc5: 96.8750 (95.9680) time: 1.4931 data: 1.2994 max mem: 55573 Test: Total time: 0:00:07 (1.5406 s / it) * Acc@1 81.736 Acc@5 95.908 loss 0.921 Accuracy of the model on the 50000 test images: 81.7% Max accuracy: 81.78% Test: [0/5] eta: 0:00:34 loss: 0.9807 (0.9807) acc1: 76.0417 (76.0417) acc5: 93.6198 (93.6198) time: 6.9378 data: 6.7017 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0116 (1.0308) acc1: 73.6979 (71.5840) acc5: 93.6198 (91.1680) time: 1.5329 data: 1.3404 max mem: 55573 Test: Total time: 0:00:08 (1.6062 s / it) * Acc@1 72.310 Acc@5 91.248 loss 1.049 Accuracy of the model EMA on 50000 test images: 72.3% Max EMA accuracy: 72.31% Epoch: [277] [ 0/156] eta: 0:36:41 lr: 0.000133 min_lr: 0.000133 loss: 3.3783 (3.3783) weight_decay: 0.0500 (0.0500) time: 14.1091 data: 13.5285 max mem: 55573 Epoch: [277] [ 10/156] eta: 0:04:25 lr: 0.000133 min_lr: 0.000133 loss: 2.9981 (2.9181) weight_decay: 0.0500 (0.0500) time: 1.8176 data: 1.2302 max mem: 55573 Epoch: [277] [ 20/156] eta: 0:02:47 lr: 0.000132 min_lr: 0.000132 loss: 2.9981 (2.9133) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [277] [ 30/156] eta: 0:02:09 lr: 0.000131 min_lr: 0.000131 loss: 3.0500 (2.9532) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [277] [ 40/156] eta: 0:01:46 lr: 0.000131 min_lr: 0.000131 loss: 3.1262 (2.9792) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [277] [ 50/156] eta: 0:01:31 lr: 0.000130 min_lr: 0.000130 loss: 3.1377 (2.9543) weight_decay: 0.0500 (0.0500) time: 0.6034 data: 0.0004 max mem: 55573 Epoch: [277] [ 60/156] eta: 0:01:18 lr: 0.000129 min_lr: 0.000129 loss: 3.0087 (2.9553) weight_decay: 0.0500 (0.0500) time: 0.6031 data: 0.0005 max mem: 55573 Epoch: [277] [ 70/156] eta: 0:01:07 lr: 0.000128 min_lr: 0.000128 loss: 3.1087 (2.9526) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [277] [ 80/156] eta: 0:00:57 lr: 0.000128 min_lr: 0.000128 loss: 3.1242 (2.9746) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [277] [ 90/156] eta: 0:00:48 lr: 0.000127 min_lr: 0.000127 loss: 2.9561 (2.9403) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [277] [100/156] eta: 0:00:40 lr: 0.000126 min_lr: 0.000126 loss: 2.8543 (2.9275) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [277] [110/156] eta: 0:00:32 lr: 0.000125 min_lr: 0.000125 loss: 2.9188 (2.9131) weight_decay: 0.0500 (0.0500) time: 0.5943 data: 0.0005 max mem: 55573 Epoch: [277] [120/156] eta: 0:00:25 lr: 0.000125 min_lr: 0.000125 loss: 3.0062 (2.9164) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0004 max mem: 55573 Epoch: [277] [130/156] eta: 0:00:18 lr: 0.000124 min_lr: 0.000124 loss: 2.9783 (2.9227) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0011 max mem: 55573 Epoch: [277] [140/156] eta: 0:00:11 lr: 0.000123 min_lr: 0.000123 loss: 2.9664 (2.9200) weight_decay: 0.0500 (0.0500) time: 0.5868 data: 0.0009 max mem: 55573 Epoch: [277] [150/156] eta: 0:00:04 lr: 0.000123 min_lr: 0.000123 loss: 2.8675 (2.9050) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [277] [155/156] eta: 0:00:00 lr: 0.000122 min_lr: 0.000122 loss: 2.8675 (2.9089) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [277] Total time: 0:01:46 (0.6845 s / it) Averaged stats: lr: 0.000122 min_lr: 0.000122 loss: 2.8675 (2.8693) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8202 (0.8202) acc1: 84.2448 (84.2448) acc5: 97.3958 (97.3958) time: 6.9426 data: 6.7053 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9114 (0.8918) acc1: 81.3802 (80.7360) acc5: 97.1354 (96.1280) time: 1.5349 data: 1.3411 max mem: 55573 Test: Total time: 0:00:07 (1.5796 s / it) * Acc@1 81.774 Acc@5 95.936 loss 0.877 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.78% Test: [0/5] eta: 0:00:36 loss: 0.9786 (0.9786) acc1: 76.0417 (76.0417) acc5: 93.6198 (93.6198) time: 7.3245 data: 7.0885 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0095 (1.0288) acc1: 73.8281 (71.5840) acc5: 93.6198 (91.2000) time: 1.6102 data: 1.4178 max mem: 55573 Test: Total time: 0:00:08 (1.6292 s / it) * Acc@1 72.360 Acc@5 91.268 loss 1.047 Accuracy of the model EMA on 50000 test images: 72.4% Max EMA accuracy: 72.36% Epoch: [278] [ 0/156] eta: 0:32:32 lr: 0.000122 min_lr: 0.000122 loss: 3.3732 (3.3732) weight_decay: 0.0500 (0.0500) time: 12.5141 data: 10.4160 max mem: 55573 Epoch: [278] [ 10/156] eta: 0:04:10 lr: 0.000122 min_lr: 0.000122 loss: 2.9924 (2.8719) weight_decay: 0.0500 (0.0500) time: 1.7163 data: 0.9565 max mem: 55573 Epoch: [278] [ 20/156] eta: 0:02:40 lr: 0.000121 min_lr: 0.000121 loss: 2.9924 (2.9340) weight_decay: 0.0500 (0.0500) time: 0.6133 data: 0.0055 max mem: 55573 Epoch: [278] [ 30/156] eta: 0:02:04 lr: 0.000120 min_lr: 0.000120 loss: 3.0376 (2.9080) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [278] [ 40/156] eta: 0:01:43 lr: 0.000119 min_lr: 0.000119 loss: 3.0005 (2.9066) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [278] [ 50/156] eta: 0:01:28 lr: 0.000119 min_lr: 0.000119 loss: 3.0577 (2.9084) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0004 max mem: 55573 Epoch: [278] [ 60/156] eta: 0:01:16 lr: 0.000118 min_lr: 0.000118 loss: 3.0817 (2.9299) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [278] [ 70/156] eta: 0:01:05 lr: 0.000117 min_lr: 0.000117 loss: 3.0773 (2.9269) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [278] [ 80/156] eta: 0:00:56 lr: 0.000117 min_lr: 0.000117 loss: 2.9015 (2.9232) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [278] [ 90/156] eta: 0:00:47 lr: 0.000116 min_lr: 0.000116 loss: 2.9713 (2.9236) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [278] [100/156] eta: 0:00:39 lr: 0.000115 min_lr: 0.000115 loss: 2.9572 (2.9118) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [278] [110/156] eta: 0:00:32 lr: 0.000115 min_lr: 0.000115 loss: 2.8622 (2.9095) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [278] [120/156] eta: 0:00:24 lr: 0.000114 min_lr: 0.000114 loss: 2.8601 (2.8911) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [278] [130/156] eta: 0:00:17 lr: 0.000113 min_lr: 0.000113 loss: 2.6864 (2.8749) weight_decay: 0.0500 (0.0500) time: 0.5922 data: 0.0011 max mem: 55573 Epoch: [278] [140/156] eta: 0:00:10 lr: 0.000113 min_lr: 0.000113 loss: 2.8828 (2.8786) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0009 max mem: 55573 Epoch: [278] [150/156] eta: 0:00:04 lr: 0.000112 min_lr: 0.000112 loss: 2.8997 (2.8695) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [278] [155/156] eta: 0:00:00 lr: 0.000112 min_lr: 0.000112 loss: 2.9700 (2.8790) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [278] Total time: 0:01:45 (0.6761 s / it) Averaged stats: lr: 0.000112 min_lr: 0.000112 loss: 2.9700 (2.8761) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8284 (0.8284) acc1: 84.7656 (84.7656) acc5: 97.2656 (97.2656) time: 6.8877 data: 6.6502 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9250 (0.9071) acc1: 80.8594 (80.8320) acc5: 97.2656 (96.2560) time: 1.5241 data: 1.3301 max mem: 55573 Test: Total time: 0:00:07 (1.5644 s / it) * Acc@1 81.774 Acc@5 95.948 loss 0.896 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.78% Test: [0/5] eta: 0:00:36 loss: 0.9767 (0.9767) acc1: 76.1719 (76.1719) acc5: 93.6198 (93.6198) time: 7.2233 data: 6.9874 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0075 (1.0268) acc1: 73.9583 (71.6800) acc5: 93.6198 (91.2000) time: 1.5900 data: 1.3976 max mem: 55573 Test: Total time: 0:00:08 (1.6082 s / it) * Acc@1 72.418 Acc@5 91.306 loss 1.045 Accuracy of the model EMA on 50000 test images: 72.4% Max EMA accuracy: 72.42% Epoch: [279] [ 0/156] eta: 0:32:15 lr: 0.000112 min_lr: 0.000112 loss: 3.1425 (3.1425) weight_decay: 0.0500 (0.0500) time: 12.4087 data: 8.8257 max mem: 55573 Epoch: [279] [ 10/156] eta: 0:04:04 lr: 0.000111 min_lr: 0.000111 loss: 2.7046 (2.6851) weight_decay: 0.0500 (0.0500) time: 1.6730 data: 0.8028 max mem: 55573 Epoch: [279] [ 20/156] eta: 0:02:37 lr: 0.000110 min_lr: 0.000110 loss: 2.7398 (2.7332) weight_decay: 0.0500 (0.0500) time: 0.5951 data: 0.0004 max mem: 55573 Epoch: [279] [ 30/156] eta: 0:02:02 lr: 0.000110 min_lr: 0.000110 loss: 2.9136 (2.8033) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [279] [ 40/156] eta: 0:01:42 lr: 0.000109 min_lr: 0.000109 loss: 2.9491 (2.8089) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [279] [ 50/156] eta: 0:01:27 lr: 0.000108 min_lr: 0.000108 loss: 2.8078 (2.8036) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [279] [ 60/156] eta: 0:01:15 lr: 0.000108 min_lr: 0.000108 loss: 2.8078 (2.7795) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [279] [ 70/156] eta: 0:01:05 lr: 0.000107 min_lr: 0.000107 loss: 2.9958 (2.7999) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [279] [ 80/156] eta: 0:00:56 lr: 0.000106 min_lr: 0.000106 loss: 3.1198 (2.8321) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [279] [ 90/156] eta: 0:00:47 lr: 0.000106 min_lr: 0.000106 loss: 3.0668 (2.8269) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [279] [100/156] eta: 0:00:39 lr: 0.000105 min_lr: 0.000105 loss: 2.6445 (2.8105) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [279] [110/156] eta: 0:00:32 lr: 0.000104 min_lr: 0.000104 loss: 2.8839 (2.8216) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [279] [120/156] eta: 0:00:24 lr: 0.000104 min_lr: 0.000104 loss: 2.9987 (2.8163) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [279] [130/156] eta: 0:00:17 lr: 0.000103 min_lr: 0.000103 loss: 2.9983 (2.8309) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0011 max mem: 55573 Epoch: [279] [140/156] eta: 0:00:10 lr: 0.000102 min_lr: 0.000102 loss: 2.8902 (2.8361) weight_decay: 0.0500 (0.0500) time: 0.5880 data: 0.0010 max mem: 55573 Epoch: [279] [150/156] eta: 0:00:04 lr: 0.000102 min_lr: 0.000102 loss: 2.9304 (2.8404) weight_decay: 0.0500 (0.0500) time: 0.5837 data: 0.0001 max mem: 55573 Epoch: [279] [155/156] eta: 0:00:00 lr: 0.000101 min_lr: 0.000101 loss: 2.8936 (2.8429) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [279] Total time: 0:01:44 (0.6726 s / it) Averaged stats: lr: 0.000101 min_lr: 0.000101 loss: 2.8936 (2.8742) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8033 (0.8033) acc1: 84.7656 (84.7656) acc5: 97.1354 (97.1354) time: 6.7411 data: 6.5042 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8964 (0.8874) acc1: 82.0312 (80.9920) acc5: 97.0052 (96.0640) time: 1.4935 data: 1.3009 max mem: 55573 Test: Total time: 0:00:07 (1.5106 s / it) * Acc@1 81.848 Acc@5 95.994 loss 0.874 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.85% Test: [0/5] eta: 0:00:34 loss: 0.9745 (0.9745) acc1: 76.1719 (76.1719) acc5: 93.6198 (93.6198) time: 6.8288 data: 6.5927 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0054 (1.0248) acc1: 74.0885 (71.7760) acc5: 93.6198 (91.2000) time: 1.5111 data: 1.3187 max mem: 55573 Test: Total time: 0:00:07 (1.5306 s / it) * Acc@1 72.490 Acc@5 91.336 loss 1.042 Accuracy of the model EMA on 50000 test images: 72.5% Max EMA accuracy: 72.49% Epoch: [280] [ 0/156] eta: 0:33:11 lr: 0.000101 min_lr: 0.000101 loss: 2.3962 (2.3962) weight_decay: 0.0500 (0.0500) time: 12.7652 data: 11.1993 max mem: 55573 Epoch: [280] [ 10/156] eta: 0:04:13 lr: 0.000101 min_lr: 0.000101 loss: 3.0947 (3.0152) weight_decay: 0.0500 (0.0500) time: 1.7384 data: 1.0559 max mem: 55573 Epoch: [280] [ 20/156] eta: 0:02:42 lr: 0.000100 min_lr: 0.000100 loss: 3.0747 (3.0042) weight_decay: 0.0500 (0.0500) time: 0.6144 data: 0.0210 max mem: 55573 Epoch: [280] [ 30/156] eta: 0:02:05 lr: 0.000099 min_lr: 0.000099 loss: 3.0030 (2.9311) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0005 max mem: 55573 Epoch: [280] [ 40/156] eta: 0:01:44 lr: 0.000099 min_lr: 0.000099 loss: 2.9751 (2.9596) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [280] [ 50/156] eta: 0:01:28 lr: 0.000098 min_lr: 0.000098 loss: 3.0363 (2.9141) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [280] [ 60/156] eta: 0:01:16 lr: 0.000097 min_lr: 0.000097 loss: 3.0363 (2.9013) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [280] [ 70/156] eta: 0:01:06 lr: 0.000097 min_lr: 0.000097 loss: 3.0472 (2.8983) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [280] [ 80/156] eta: 0:00:56 lr: 0.000096 min_lr: 0.000096 loss: 3.0177 (2.8970) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0004 max mem: 55573 Epoch: [280] [ 90/156] eta: 0:00:48 lr: 0.000096 min_lr: 0.000096 loss: 2.9567 (2.8680) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [280] [100/156] eta: 0:00:40 lr: 0.000095 min_lr: 0.000095 loss: 2.8161 (2.8537) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [280] [110/156] eta: 0:00:32 lr: 0.000094 min_lr: 0.000094 loss: 2.9916 (2.8603) weight_decay: 0.0500 (0.0500) time: 0.5947 data: 0.0004 max mem: 55573 Epoch: [280] [120/156] eta: 0:00:25 lr: 0.000094 min_lr: 0.000094 loss: 2.9916 (2.8345) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0004 max mem: 55573 Epoch: [280] [130/156] eta: 0:00:17 lr: 0.000093 min_lr: 0.000093 loss: 2.7402 (2.8309) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0010 max mem: 55573 Epoch: [280] [140/156] eta: 0:00:10 lr: 0.000093 min_lr: 0.000093 loss: 3.0753 (2.8456) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0009 max mem: 55573 Epoch: [280] [150/156] eta: 0:00:04 lr: 0.000092 min_lr: 0.000092 loss: 3.0745 (2.8548) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0002 max mem: 55573 Epoch: [280] [155/156] eta: 0:00:00 lr: 0.000092 min_lr: 0.000092 loss: 3.0479 (2.8493) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [280] Total time: 0:01:45 (0.6781 s / it) Averaged stats: lr: 0.000092 min_lr: 0.000092 loss: 3.0479 (2.8715) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.7918 (0.7918) acc1: 85.4167 (85.4167) acc5: 97.3958 (97.3958) time: 6.8633 data: 6.6259 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8881 (0.8745) acc1: 81.2500 (80.8640) acc5: 97.1354 (96.1280) time: 1.5191 data: 1.3253 max mem: 55573 Test: Total time: 0:00:07 (1.5686 s / it) * Acc@1 81.742 Acc@5 95.994 loss 0.860 Accuracy of the model on the 50000 test images: 81.7% Max accuracy: 81.85% Test: [0/5] eta: 0:00:36 loss: 0.9722 (0.9722) acc1: 76.1719 (76.1719) acc5: 93.6198 (93.6198) time: 7.2602 data: 7.0242 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0031 (1.0224) acc1: 74.2188 (71.8400) acc5: 93.6198 (91.2000) time: 1.5977 data: 1.4049 max mem: 55573 Test: Total time: 0:00:08 (1.6147 s / it) * Acc@1 72.546 Acc@5 91.372 loss 1.040 Accuracy of the model EMA on 50000 test images: 72.5% Max EMA accuracy: 72.55% Epoch: [281] [ 0/156] eta: 0:32:06 lr: 0.000092 min_lr: 0.000092 loss: 2.1736 (2.1736) weight_decay: 0.0500 (0.0500) time: 12.3490 data: 10.6832 max mem: 55573 Epoch: [281] [ 10/156] eta: 0:04:06 lr: 0.000091 min_lr: 0.000091 loss: 3.0667 (2.8989) weight_decay: 0.0500 (0.0500) time: 1.6903 data: 0.9716 max mem: 55573 Epoch: [281] [ 20/156] eta: 0:02:38 lr: 0.000090 min_lr: 0.000090 loss: 3.0390 (2.9200) weight_decay: 0.0500 (0.0500) time: 0.6096 data: 0.0004 max mem: 55573 Epoch: [281] [ 30/156] eta: 0:02:03 lr: 0.000090 min_lr: 0.000090 loss: 2.8059 (2.8474) weight_decay: 0.0500 (0.0500) time: 0.5956 data: 0.0004 max mem: 55573 Epoch: [281] [ 40/156] eta: 0:01:43 lr: 0.000089 min_lr: 0.000089 loss: 2.7517 (2.8580) weight_decay: 0.0500 (0.0500) time: 0.5934 data: 0.0005 max mem: 55573 Epoch: [281] [ 50/156] eta: 0:01:27 lr: 0.000089 min_lr: 0.000089 loss: 3.0054 (2.8560) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [281] [ 60/156] eta: 0:01:15 lr: 0.000088 min_lr: 0.000088 loss: 2.7002 (2.8261) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [281] [ 70/156] eta: 0:01:05 lr: 0.000087 min_lr: 0.000087 loss: 2.9783 (2.8512) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0006 max mem: 55573 Epoch: [281] [ 80/156] eta: 0:00:56 lr: 0.000087 min_lr: 0.000087 loss: 2.9783 (2.8571) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0006 max mem: 55573 Epoch: [281] [ 90/156] eta: 0:00:47 lr: 0.000086 min_lr: 0.000086 loss: 2.9241 (2.8747) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [281] [100/156] eta: 0:00:39 lr: 0.000086 min_lr: 0.000086 loss: 3.0839 (2.8883) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [281] [110/156] eta: 0:00:32 lr: 0.000085 min_lr: 0.000085 loss: 3.0449 (2.8735) weight_decay: 0.0500 (0.0500) time: 0.5936 data: 0.0004 max mem: 55573 Epoch: [281] [120/156] eta: 0:00:24 lr: 0.000084 min_lr: 0.000084 loss: 2.6024 (2.8660) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [281] [130/156] eta: 0:00:17 lr: 0.000084 min_lr: 0.000084 loss: 3.0602 (2.8673) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0012 max mem: 55573 Epoch: [281] [140/156] eta: 0:00:10 lr: 0.000083 min_lr: 0.000083 loss: 3.0709 (2.8785) weight_decay: 0.0500 (0.0500) time: 0.5860 data: 0.0011 max mem: 55573 Epoch: [281] [150/156] eta: 0:00:04 lr: 0.000083 min_lr: 0.000083 loss: 2.9858 (2.8694) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [281] [155/156] eta: 0:00:00 lr: 0.000082 min_lr: 0.000082 loss: 2.9858 (2.8795) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [281] Total time: 0:01:45 (0.6748 s / it) Averaged stats: lr: 0.000082 min_lr: 0.000082 loss: 2.9858 (2.8654) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8159 (0.8159) acc1: 85.0260 (85.0260) acc5: 97.1354 (97.1354) time: 6.7516 data: 6.5145 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9061 (0.8973) acc1: 81.6406 (80.8000) acc5: 97.1354 (95.9360) time: 1.4968 data: 1.3030 max mem: 55573 Test: Total time: 0:00:07 (1.5451 s / it) * Acc@1 81.778 Acc@5 95.934 loss 0.879 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.85% Test: [0/5] eta: 0:00:36 loss: 0.9698 (0.9698) acc1: 76.0417 (76.0417) acc5: 93.6198 (93.6198) time: 7.2125 data: 6.9765 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 1.0008 (1.0201) acc1: 74.2188 (71.8080) acc5: 93.6198 (91.2320) time: 1.5878 data: 1.3954 max mem: 55573 Test: Total time: 0:00:08 (1.6046 s / it) * Acc@1 72.590 Acc@5 91.394 loss 1.038 Accuracy of the model EMA on 50000 test images: 72.6% Max EMA accuracy: 72.59% Epoch: [282] [ 0/156] eta: 0:36:14 lr: 0.000082 min_lr: 0.000082 loss: 3.0202 (3.0202) weight_decay: 0.0500 (0.0500) time: 13.9371 data: 11.6352 max mem: 55573 Epoch: [282] [ 10/156] eta: 0:04:23 lr: 0.000082 min_lr: 0.000082 loss: 2.9689 (2.9441) weight_decay: 0.0500 (0.0500) time: 1.8027 data: 1.0581 max mem: 55573 Epoch: [282] [ 20/156] eta: 0:02:46 lr: 0.000081 min_lr: 0.000081 loss: 2.8565 (2.8519) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [282] [ 30/156] eta: 0:02:08 lr: 0.000081 min_lr: 0.000081 loss: 2.7889 (2.7863) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0004 max mem: 55573 Epoch: [282] [ 40/156] eta: 0:01:46 lr: 0.000080 min_lr: 0.000080 loss: 2.8706 (2.8569) weight_decay: 0.0500 (0.0500) time: 0.5942 data: 0.0004 max mem: 55573 Epoch: [282] [ 50/156] eta: 0:01:30 lr: 0.000079 min_lr: 0.000079 loss: 2.9835 (2.8911) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [282] [ 60/156] eta: 0:01:17 lr: 0.000079 min_lr: 0.000079 loss: 3.0733 (2.8991) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [282] [ 70/156] eta: 0:01:07 lr: 0.000078 min_lr: 0.000078 loss: 2.9419 (2.8848) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [282] [ 80/156] eta: 0:00:57 lr: 0.000078 min_lr: 0.000078 loss: 2.8939 (2.8882) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [282] [ 90/156] eta: 0:00:48 lr: 0.000077 min_lr: 0.000077 loss: 3.0421 (2.8891) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [282] [100/156] eta: 0:00:40 lr: 0.000077 min_lr: 0.000077 loss: 3.0722 (2.8761) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0004 max mem: 55573 Epoch: [282] [110/156] eta: 0:00:32 lr: 0.000076 min_lr: 0.000076 loss: 2.9012 (2.8743) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [282] [120/156] eta: 0:00:25 lr: 0.000076 min_lr: 0.000076 loss: 3.0392 (2.8910) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [282] [130/156] eta: 0:00:18 lr: 0.000075 min_lr: 0.000075 loss: 3.0219 (2.8991) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0010 max mem: 55573 Epoch: [282] [140/156] eta: 0:00:10 lr: 0.000074 min_lr: 0.000074 loss: 3.0189 (2.8973) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0009 max mem: 55573 Epoch: [282] [150/156] eta: 0:00:04 lr: 0.000074 min_lr: 0.000074 loss: 3.0009 (2.8926) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [282] [155/156] eta: 0:00:00 lr: 0.000074 min_lr: 0.000074 loss: 2.9796 (2.8898) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [282] Total time: 0:01:46 (0.6821 s / it) Averaged stats: lr: 0.000074 min_lr: 0.000074 loss: 2.9796 (2.8730) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.8014 (0.8014) acc1: 84.6354 (84.6354) acc5: 97.2656 (97.2656) time: 7.1694 data: 6.9319 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8963 (0.8847) acc1: 81.6406 (80.6400) acc5: 97.2656 (96.0640) time: 1.5804 data: 1.3865 max mem: 55573 Test: Total time: 0:00:08 (1.6311 s / it) * Acc@1 81.884 Acc@5 95.984 loss 0.869 Accuracy of the model on the 50000 test images: 81.9% Max accuracy: 81.88% Test: [0/5] eta: 0:00:35 loss: 0.9674 (0.9674) acc1: 76.0417 (76.0417) acc5: 93.6198 (93.6198) time: 7.0038 data: 6.7676 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9984 (1.0179) acc1: 74.0885 (71.8080) acc5: 93.6198 (91.2320) time: 1.5462 data: 1.3536 max mem: 55573 Test: Total time: 0:00:07 (1.5661 s / it) * Acc@1 72.644 Acc@5 91.416 loss 1.035 Accuracy of the model EMA on 50000 test images: 72.6% Max EMA accuracy: 72.64% Epoch: [283] [ 0/156] eta: 0:32:09 lr: 0.000074 min_lr: 0.000074 loss: 3.0168 (3.0168) weight_decay: 0.0500 (0.0500) time: 12.3688 data: 11.7568 max mem: 55573 Epoch: [283] [ 10/156] eta: 0:04:15 lr: 0.000073 min_lr: 0.000073 loss: 3.1233 (3.0783) weight_decay: 0.0500 (0.0500) time: 1.7478 data: 1.1411 max mem: 55573 Epoch: [283] [ 20/156] eta: 0:02:42 lr: 0.000072 min_lr: 0.000072 loss: 3.1132 (3.0332) weight_decay: 0.0500 (0.0500) time: 0.6378 data: 0.0400 max mem: 55573 Epoch: [283] [ 30/156] eta: 0:02:06 lr: 0.000072 min_lr: 0.000072 loss: 3.0557 (2.9833) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [283] [ 40/156] eta: 0:01:44 lr: 0.000071 min_lr: 0.000071 loss: 3.1337 (3.0206) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0005 max mem: 55573 Epoch: [283] [ 50/156] eta: 0:01:29 lr: 0.000071 min_lr: 0.000071 loss: 3.2321 (3.0197) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0004 max mem: 55573 Epoch: [283] [ 60/156] eta: 0:01:16 lr: 0.000070 min_lr: 0.000070 loss: 2.9087 (2.9625) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0004 max mem: 55573 Epoch: [283] [ 70/156] eta: 0:01:06 lr: 0.000070 min_lr: 0.000070 loss: 2.8530 (2.9575) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [283] [ 80/156] eta: 0:00:56 lr: 0.000069 min_lr: 0.000069 loss: 3.0562 (2.9503) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0004 max mem: 55573 Epoch: [283] [ 90/156] eta: 0:00:48 lr: 0.000069 min_lr: 0.000069 loss: 2.9420 (2.9419) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [283] [100/156] eta: 0:00:40 lr: 0.000068 min_lr: 0.000068 loss: 3.0544 (2.9514) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0005 max mem: 55573 Epoch: [283] [110/156] eta: 0:00:32 lr: 0.000068 min_lr: 0.000068 loss: 3.0339 (2.9367) weight_decay: 0.0500 (0.0500) time: 0.5944 data: 0.0004 max mem: 55573 Epoch: [283] [120/156] eta: 0:00:25 lr: 0.000067 min_lr: 0.000067 loss: 2.8495 (2.9319) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [283] [130/156] eta: 0:00:17 lr: 0.000067 min_lr: 0.000067 loss: 2.9518 (2.9290) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0012 max mem: 55573 Epoch: [283] [140/156] eta: 0:00:10 lr: 0.000066 min_lr: 0.000066 loss: 2.7645 (2.9049) weight_decay: 0.0500 (0.0500) time: 0.5866 data: 0.0011 max mem: 55573 Epoch: [283] [150/156] eta: 0:00:04 lr: 0.000066 min_lr: 0.000066 loss: 2.9573 (2.9131) weight_decay: 0.0500 (0.0500) time: 0.5836 data: 0.0001 max mem: 55573 Epoch: [283] [155/156] eta: 0:00:00 lr: 0.000065 min_lr: 0.000065 loss: 3.0765 (2.9156) weight_decay: 0.0500 (0.0500) time: 0.5834 data: 0.0001 max mem: 55573 Epoch: [283] Total time: 0:01:45 (0.6783 s / it) Averaged stats: lr: 0.000065 min_lr: 0.000065 loss: 3.0765 (2.8682) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8314 (0.8314) acc1: 84.5052 (84.5052) acc5: 97.3958 (97.3958) time: 6.9747 data: 6.7373 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9257 (0.9127) acc1: 81.6406 (80.7040) acc5: 97.3958 (96.2240) time: 1.5414 data: 1.3475 max mem: 55573 Test: Total time: 0:00:07 (1.5851 s / it) * Acc@1 81.772 Acc@5 96.022 loss 0.896 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.88% Test: [0/5] eta: 0:00:36 loss: 0.9649 (0.9649) acc1: 76.3021 (76.3021) acc5: 93.6198 (93.6198) time: 7.3321 data: 7.0960 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9959 (1.0154) acc1: 74.3490 (72.0640) acc5: 93.6198 (91.2320) time: 1.6118 data: 1.4193 max mem: 55573 Test: Total time: 0:00:08 (1.6319 s / it) * Acc@1 72.742 Acc@5 91.460 loss 1.032 Accuracy of the model EMA on 50000 test images: 72.7% Max EMA accuracy: 72.74% Epoch: [284] [ 0/156] eta: 0:36:12 lr: 0.000065 min_lr: 0.000065 loss: 2.0253 (2.0253) weight_decay: 0.0500 (0.0500) time: 13.9276 data: 9.3562 max mem: 55573 Epoch: [284] [ 10/156] eta: 0:04:23 lr: 0.000065 min_lr: 0.000065 loss: 2.9073 (2.7983) weight_decay: 0.0500 (0.0500) time: 1.8067 data: 0.8509 max mem: 55573 Epoch: [284] [ 20/156] eta: 0:02:46 lr: 0.000064 min_lr: 0.000064 loss: 2.7498 (2.7712) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [284] [ 30/156] eta: 0:02:08 lr: 0.000064 min_lr: 0.000064 loss: 2.7498 (2.8487) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [284] [ 40/156] eta: 0:01:46 lr: 0.000063 min_lr: 0.000063 loss: 2.9884 (2.8287) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [284] [ 50/156] eta: 0:01:30 lr: 0.000063 min_lr: 0.000063 loss: 3.0362 (2.8830) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0004 max mem: 55573 Epoch: [284] [ 60/156] eta: 0:01:17 lr: 0.000062 min_lr: 0.000062 loss: 3.1333 (2.8965) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [284] [ 70/156] eta: 0:01:07 lr: 0.000062 min_lr: 0.000062 loss: 2.9779 (2.8773) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [284] [ 80/156] eta: 0:00:57 lr: 0.000061 min_lr: 0.000061 loss: 2.9779 (2.9010) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [284] [ 90/156] eta: 0:00:48 lr: 0.000061 min_lr: 0.000061 loss: 2.9566 (2.8996) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [284] [100/156] eta: 0:00:40 lr: 0.000060 min_lr: 0.000060 loss: 2.9100 (2.8927) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [284] [110/156] eta: 0:00:32 lr: 0.000060 min_lr: 0.000060 loss: 2.8789 (2.8678) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [284] [120/156] eta: 0:00:25 lr: 0.000059 min_lr: 0.000059 loss: 2.7212 (2.8564) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [284] [130/156] eta: 0:00:18 lr: 0.000059 min_lr: 0.000059 loss: 2.9992 (2.8617) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0024 max mem: 55573 Epoch: [284] [140/156] eta: 0:00:10 lr: 0.000058 min_lr: 0.000058 loss: 2.9992 (2.8591) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0022 max mem: 55573 Epoch: [284] [150/156] eta: 0:00:04 lr: 0.000058 min_lr: 0.000058 loss: 2.8909 (2.8476) weight_decay: 0.0500 (0.0500) time: 0.5832 data: 0.0001 max mem: 55573 Epoch: [284] [155/156] eta: 0:00:00 lr: 0.000058 min_lr: 0.000058 loss: 2.9291 (2.8522) weight_decay: 0.0500 (0.0500) time: 0.5827 data: 0.0001 max mem: 55573 Epoch: [284] Total time: 0:01:46 (0.6822 s / it) Averaged stats: lr: 0.000058 min_lr: 0.000058 loss: 2.9291 (2.8765) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.7862 (0.7862) acc1: 84.6354 (84.6354) acc5: 97.2656 (97.2656) time: 6.6780 data: 6.4408 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8816 (0.8692) acc1: 81.9010 (80.8960) acc5: 97.1354 (96.0960) time: 1.4821 data: 1.2883 max mem: 55573 Test: Total time: 0:00:07 (1.5303 s / it) * Acc@1 81.904 Acc@5 96.012 loss 0.854 Accuracy of the model on the 50000 test images: 81.9% Max accuracy: 81.90% Test: [0/5] eta: 0:00:34 loss: 0.9623 (0.9623) acc1: 76.5625 (76.5625) acc5: 93.7500 (93.7500) time: 6.9725 data: 6.7365 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9935 (1.0130) acc1: 74.2188 (72.1600) acc5: 93.7500 (91.2640) time: 1.5398 data: 1.3474 max mem: 55573 Test: Total time: 0:00:07 (1.5574 s / it) * Acc@1 72.846 Acc@5 91.492 loss 1.030 Accuracy of the model EMA on 50000 test images: 72.8% Max EMA accuracy: 72.85% Epoch: [285] [ 0/156] eta: 0:35:37 lr: 0.000058 min_lr: 0.000058 loss: 2.3760 (2.3760) weight_decay: 0.0500 (0.0500) time: 13.7018 data: 12.5015 max mem: 55573 Epoch: [285] [ 10/156] eta: 0:04:20 lr: 0.000057 min_lr: 0.000057 loss: 3.1132 (2.9954) weight_decay: 0.0500 (0.0500) time: 1.7849 data: 1.1369 max mem: 55573 Epoch: [285] [ 20/156] eta: 0:02:45 lr: 0.000057 min_lr: 0.000057 loss: 3.0209 (2.8511) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0004 max mem: 55573 Epoch: [285] [ 30/156] eta: 0:02:08 lr: 0.000056 min_lr: 0.000056 loss: 2.9831 (2.8225) weight_decay: 0.0500 (0.0500) time: 0.5954 data: 0.0004 max mem: 55573 Epoch: [285] [ 40/156] eta: 0:01:45 lr: 0.000056 min_lr: 0.000056 loss: 3.0889 (2.8674) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0004 max mem: 55573 Epoch: [285] [ 50/156] eta: 0:01:30 lr: 0.000055 min_lr: 0.000055 loss: 2.9176 (2.8097) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [285] [ 60/156] eta: 0:01:17 lr: 0.000055 min_lr: 0.000055 loss: 2.7905 (2.8267) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0005 max mem: 55573 Epoch: [285] [ 70/156] eta: 0:01:06 lr: 0.000054 min_lr: 0.000054 loss: 2.7905 (2.7906) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0004 max mem: 55573 Epoch: [285] [ 80/156] eta: 0:00:57 lr: 0.000054 min_lr: 0.000054 loss: 2.9389 (2.7998) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0004 max mem: 55573 Epoch: [285] [ 90/156] eta: 0:00:48 lr: 0.000053 min_lr: 0.000053 loss: 3.0323 (2.8112) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [285] [100/156] eta: 0:00:40 lr: 0.000053 min_lr: 0.000053 loss: 2.8982 (2.7953) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [285] [110/156] eta: 0:00:32 lr: 0.000052 min_lr: 0.000052 loss: 2.8344 (2.7935) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [285] [120/156] eta: 0:00:25 lr: 0.000052 min_lr: 0.000052 loss: 3.0923 (2.8005) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [285] [130/156] eta: 0:00:17 lr: 0.000051 min_lr: 0.000051 loss: 3.0472 (2.8067) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0010 max mem: 55573 Epoch: [285] [140/156] eta: 0:00:10 lr: 0.000051 min_lr: 0.000051 loss: 3.0472 (2.8179) weight_decay: 0.0500 (0.0500) time: 0.5864 data: 0.0009 max mem: 55573 Epoch: [285] [150/156] eta: 0:00:04 lr: 0.000051 min_lr: 0.000051 loss: 3.0727 (2.8310) weight_decay: 0.0500 (0.0500) time: 0.5824 data: 0.0001 max mem: 55573 Epoch: [285] [155/156] eta: 0:00:00 lr: 0.000050 min_lr: 0.000050 loss: 3.0727 (2.8288) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [285] Total time: 0:01:46 (0.6814 s / it) Averaged stats: lr: 0.000050 min_lr: 0.000050 loss: 3.0727 (2.8518) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.8161 (0.8161) acc1: 84.7656 (84.7656) acc5: 97.1354 (97.1354) time: 7.0845 data: 6.8471 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9113 (0.8990) acc1: 82.0312 (80.8320) acc5: 97.1354 (96.0320) time: 1.5634 data: 1.3695 max mem: 55573 Test: Total time: 0:00:08 (1.6085 s / it) * Acc@1 81.890 Acc@5 96.024 loss 0.884 Accuracy of the model on the 50000 test images: 81.9% Max accuracy: 81.90% Test: [0/5] eta: 0:00:35 loss: 0.9599 (0.9599) acc1: 76.5625 (76.5625) acc5: 93.7500 (93.7500) time: 7.1427 data: 6.9066 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9911 (1.0105) acc1: 74.2188 (72.2560) acc5: 93.7500 (91.3600) time: 1.5739 data: 1.3814 max mem: 55573 Test: Total time: 0:00:07 (1.5906 s / it) * Acc@1 72.920 Acc@5 91.532 loss 1.027 Accuracy of the model EMA on 50000 test images: 72.9% Max EMA accuracy: 72.92% Epoch: [286] [ 0/156] eta: 0:35:22 lr: 0.000050 min_lr: 0.000050 loss: 2.5102 (2.5102) weight_decay: 0.0500 (0.0500) time: 13.6086 data: 10.7925 max mem: 55573 Epoch: [286] [ 10/156] eta: 0:04:18 lr: 0.000050 min_lr: 0.000050 loss: 3.1600 (2.8889) weight_decay: 0.0500 (0.0500) time: 1.7730 data: 0.9816 max mem: 55573 Epoch: [286] [ 20/156] eta: 0:02:44 lr: 0.000049 min_lr: 0.000049 loss: 3.0893 (2.8497) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [286] [ 30/156] eta: 0:02:07 lr: 0.000049 min_lr: 0.000049 loss: 3.1179 (2.8952) weight_decay: 0.0500 (0.0500) time: 0.5893 data: 0.0005 max mem: 55573 Epoch: [286] [ 40/156] eta: 0:01:45 lr: 0.000048 min_lr: 0.000048 loss: 3.1147 (2.8701) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0004 max mem: 55573 Epoch: [286] [ 50/156] eta: 0:01:29 lr: 0.000048 min_lr: 0.000048 loss: 2.9239 (2.8835) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [286] [ 60/156] eta: 0:01:17 lr: 0.000048 min_lr: 0.000048 loss: 2.7915 (2.8409) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [286] [ 70/156] eta: 0:01:06 lr: 0.000047 min_lr: 0.000047 loss: 2.7915 (2.8474) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [286] [ 80/156] eta: 0:00:57 lr: 0.000047 min_lr: 0.000047 loss: 2.8335 (2.8279) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [286] [ 90/156] eta: 0:00:48 lr: 0.000046 min_lr: 0.000046 loss: 2.8335 (2.8383) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [286] [100/156] eta: 0:00:40 lr: 0.000046 min_lr: 0.000046 loss: 3.1145 (2.8624) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0006 max mem: 55573 Epoch: [286] [110/156] eta: 0:00:32 lr: 0.000045 min_lr: 0.000045 loss: 3.1145 (2.8739) weight_decay: 0.0500 (0.0500) time: 0.5890 data: 0.0005 max mem: 55573 Epoch: [286] [120/156] eta: 0:00:25 lr: 0.000045 min_lr: 0.000045 loss: 3.1064 (2.8774) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0004 max mem: 55573 Epoch: [286] [130/156] eta: 0:00:17 lr: 0.000045 min_lr: 0.000045 loss: 3.0182 (2.8813) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0010 max mem: 55573 Epoch: [286] [140/156] eta: 0:00:10 lr: 0.000044 min_lr: 0.000044 loss: 2.9897 (2.8798) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0009 max mem: 55573 Epoch: [286] [150/156] eta: 0:00:04 lr: 0.000044 min_lr: 0.000044 loss: 2.9253 (2.8724) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [286] [155/156] eta: 0:00:00 lr: 0.000044 min_lr: 0.000044 loss: 2.9253 (2.8699) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [286] Total time: 0:01:46 (0.6795 s / it) Averaged stats: lr: 0.000044 min_lr: 0.000044 loss: 2.9253 (2.8477) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.7928 (0.7928) acc1: 84.3750 (84.3750) acc5: 97.1354 (97.1354) time: 6.8678 data: 6.6306 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8845 (0.8724) acc1: 82.0312 (80.9280) acc5: 97.1354 (96.1600) time: 1.5198 data: 1.3262 max mem: 55573 Test: Total time: 0:00:07 (1.5672 s / it) * Acc@1 81.880 Acc@5 96.008 loss 0.857 Accuracy of the model on the 50000 test images: 81.9% Max accuracy: 81.90% Test: [0/5] eta: 0:00:35 loss: 0.9572 (0.9572) acc1: 76.6927 (76.6927) acc5: 93.7500 (93.7500) time: 7.0800 data: 6.8415 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9883 (1.0078) acc1: 74.3490 (72.3200) acc5: 93.7500 (91.3600) time: 1.5614 data: 1.3684 max mem: 55573 Test: Total time: 0:00:07 (1.5840 s / it) * Acc@1 72.992 Acc@5 91.570 loss 1.024 Accuracy of the model EMA on 50000 test images: 73.0% Max EMA accuracy: 72.99% Epoch: [287] [ 0/156] eta: 0:32:03 lr: 0.000043 min_lr: 0.000043 loss: 3.1491 (3.1491) weight_decay: 0.0500 (0.0500) time: 12.3294 data: 8.6382 max mem: 55573 Epoch: [287] [ 10/156] eta: 0:04:17 lr: 0.000043 min_lr: 0.000043 loss: 3.1491 (2.8690) weight_decay: 0.0500 (0.0500) time: 1.7654 data: 0.7859 max mem: 55573 Epoch: [287] [ 20/156] eta: 0:02:43 lr: 0.000043 min_lr: 0.000043 loss: 2.9434 (2.8666) weight_decay: 0.0500 (0.0500) time: 0.6497 data: 0.0005 max mem: 55573 Epoch: [287] [ 30/156] eta: 0:02:06 lr: 0.000042 min_lr: 0.000042 loss: 3.0048 (2.8840) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [287] [ 40/156] eta: 0:01:45 lr: 0.000042 min_lr: 0.000042 loss: 3.0175 (2.8688) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0005 max mem: 55573 Epoch: [287] [ 50/156] eta: 0:01:29 lr: 0.000041 min_lr: 0.000041 loss: 2.9220 (2.8303) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [287] [ 60/156] eta: 0:01:17 lr: 0.000041 min_lr: 0.000041 loss: 2.9421 (2.8355) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [287] [ 70/156] eta: 0:01:06 lr: 0.000041 min_lr: 0.000041 loss: 2.7052 (2.8216) weight_decay: 0.0500 (0.0500) time: 0.5926 data: 0.0005 max mem: 55573 Epoch: [287] [ 80/156] eta: 0:00:57 lr: 0.000040 min_lr: 0.000040 loss: 2.6833 (2.8006) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [287] [ 90/156] eta: 0:00:48 lr: 0.000040 min_lr: 0.000040 loss: 2.8434 (2.8047) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0005 max mem: 55573 Epoch: [287] [100/156] eta: 0:00:40 lr: 0.000039 min_lr: 0.000039 loss: 2.8239 (2.7876) weight_decay: 0.0500 (0.0500) time: 0.5898 data: 0.0005 max mem: 55573 Epoch: [287] [110/156] eta: 0:00:32 lr: 0.000039 min_lr: 0.000039 loss: 2.8239 (2.7935) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [287] [120/156] eta: 0:00:25 lr: 0.000039 min_lr: 0.000039 loss: 2.9736 (2.8074) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [287] [130/156] eta: 0:00:17 lr: 0.000038 min_lr: 0.000038 loss: 2.9736 (2.8018) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0011 max mem: 55573 Epoch: [287] [140/156] eta: 0:00:10 lr: 0.000038 min_lr: 0.000038 loss: 2.7893 (2.7998) weight_decay: 0.0500 (0.0500) time: 0.5881 data: 0.0009 max mem: 55573 Epoch: [287] [150/156] eta: 0:00:04 lr: 0.000037 min_lr: 0.000037 loss: 3.0011 (2.8013) weight_decay: 0.0500 (0.0500) time: 0.5829 data: 0.0001 max mem: 55573 Epoch: [287] [155/156] eta: 0:00:00 lr: 0.000037 min_lr: 0.000037 loss: 2.9931 (2.8003) weight_decay: 0.0500 (0.0500) time: 0.5823 data: 0.0001 max mem: 55573 Epoch: [287] Total time: 0:01:45 (0.6794 s / it) Averaged stats: lr: 0.000037 min_lr: 0.000037 loss: 2.9931 (2.8369) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 0.7850 (0.7850) acc1: 85.1562 (85.1562) acc5: 97.1354 (97.1354) time: 7.2335 data: 6.9961 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8828 (0.8641) acc1: 81.6406 (80.9920) acc5: 96.7448 (95.9680) time: 1.5949 data: 1.3993 max mem: 55573 Test: Total time: 0:00:08 (1.6515 s / it) * Acc@1 81.872 Acc@5 95.924 loss 0.849 Accuracy of the model on the 50000 test images: 81.9% Max accuracy: 81.90% Test: [0/5] eta: 0:00:36 loss: 0.9544 (0.9544) acc1: 76.9531 (76.9531) acc5: 93.7500 (93.7500) time: 7.3306 data: 7.0946 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9856 (1.0051) acc1: 74.6094 (72.4480) acc5: 93.7500 (91.3920) time: 1.6114 data: 1.4190 max mem: 55573 Test: Total time: 0:00:08 (1.6294 s / it) * Acc@1 73.078 Acc@5 91.616 loss 1.021 Accuracy of the model EMA on 50000 test images: 73.1% Max EMA accuracy: 73.08% Epoch: [288] [ 0/156] eta: 0:32:50 lr: 0.000037 min_lr: 0.000037 loss: 3.0875 (3.0875) weight_decay: 0.0500 (0.0500) time: 12.6303 data: 11.2286 max mem: 55573 Epoch: [288] [ 10/156] eta: 0:04:11 lr: 0.000037 min_lr: 0.000037 loss: 3.0723 (2.7767) weight_decay: 0.0500 (0.0500) time: 1.7203 data: 1.0214 max mem: 55573 Epoch: [288] [ 20/156] eta: 0:02:40 lr: 0.000036 min_lr: 0.000036 loss: 3.0515 (2.8677) weight_decay: 0.0500 (0.0500) time: 0.6103 data: 0.0005 max mem: 55573 Epoch: [288] [ 30/156] eta: 0:02:04 lr: 0.000036 min_lr: 0.000036 loss: 3.0456 (2.8959) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0004 max mem: 55573 Epoch: [288] [ 40/156] eta: 0:01:43 lr: 0.000036 min_lr: 0.000036 loss: 3.0456 (2.8856) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [288] [ 50/156] eta: 0:01:28 lr: 0.000035 min_lr: 0.000035 loss: 3.0522 (2.8742) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [288] [ 60/156] eta: 0:01:16 lr: 0.000035 min_lr: 0.000035 loss: 2.7860 (2.8451) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [288] [ 70/156] eta: 0:01:05 lr: 0.000035 min_lr: 0.000035 loss: 2.8750 (2.8649) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0005 max mem: 55573 Epoch: [288] [ 80/156] eta: 0:00:56 lr: 0.000034 min_lr: 0.000034 loss: 3.0016 (2.8752) weight_decay: 0.0500 (0.0500) time: 0.5935 data: 0.0005 max mem: 55573 Epoch: [288] [ 90/156] eta: 0:00:48 lr: 0.000034 min_lr: 0.000034 loss: 2.9417 (2.8646) weight_decay: 0.0500 (0.0500) time: 0.5940 data: 0.0005 max mem: 55573 Epoch: [288] [100/156] eta: 0:00:40 lr: 0.000033 min_lr: 0.000033 loss: 2.8958 (2.8622) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0005 max mem: 55573 Epoch: [288] [110/156] eta: 0:00:32 lr: 0.000033 min_lr: 0.000033 loss: 2.9906 (2.8730) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [288] [120/156] eta: 0:00:25 lr: 0.000033 min_lr: 0.000033 loss: 2.9906 (2.8747) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [288] [130/156] eta: 0:00:17 lr: 0.000032 min_lr: 0.000032 loss: 2.8610 (2.8681) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0010 max mem: 55573 Epoch: [288] [140/156] eta: 0:00:10 lr: 0.000032 min_lr: 0.000032 loss: 2.7924 (2.8670) weight_decay: 0.0500 (0.0500) time: 0.5865 data: 0.0009 max mem: 55573 Epoch: [288] [150/156] eta: 0:00:04 lr: 0.000032 min_lr: 0.000032 loss: 3.0264 (2.8810) weight_decay: 0.0500 (0.0500) time: 0.5821 data: 0.0001 max mem: 55573 Epoch: [288] [155/156] eta: 0:00:00 lr: 0.000031 min_lr: 0.000031 loss: 3.0339 (2.8875) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [288] Total time: 0:01:45 (0.6766 s / it) Averaged stats: lr: 0.000031 min_lr: 0.000031 loss: 3.0339 (2.8531) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.8507 (0.8507) acc1: 84.8958 (84.8958) acc5: 97.2656 (97.2656) time: 7.0260 data: 6.7886 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9471 (0.9332) acc1: 81.2500 (80.6720) acc5: 96.8750 (95.9680) time: 1.5514 data: 1.3578 max mem: 55573 Test: Total time: 0:00:07 (1.5972 s / it) * Acc@1 81.810 Acc@5 96.010 loss 0.917 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.90% Test: [0/5] eta: 0:00:36 loss: 0.9516 (0.9516) acc1: 77.0833 (77.0833) acc5: 93.7500 (93.7500) time: 7.2111 data: 6.9750 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9830 (1.0024) acc1: 74.6094 (72.5120) acc5: 93.7500 (91.4240) time: 1.5875 data: 1.3951 max mem: 55573 Test: Total time: 0:00:08 (1.6099 s / it) * Acc@1 73.158 Acc@5 91.648 loss 1.018 Accuracy of the model EMA on 50000 test images: 73.2% Max EMA accuracy: 73.16% Epoch: [289] [ 0/156] eta: 0:31:40 lr: 0.000031 min_lr: 0.000031 loss: 3.3554 (3.3554) weight_decay: 0.0500 (0.0500) time: 12.1840 data: 10.2032 max mem: 55573 Epoch: [289] [ 10/156] eta: 0:04:10 lr: 0.000031 min_lr: 0.000031 loss: 3.0822 (3.0132) weight_decay: 0.0500 (0.0500) time: 1.7170 data: 0.9280 max mem: 55573 Epoch: [289] [ 20/156] eta: 0:02:40 lr: 0.000031 min_lr: 0.000031 loss: 3.0480 (2.9721) weight_decay: 0.0500 (0.0500) time: 0.6304 data: 0.0005 max mem: 55573 Epoch: [289] [ 30/156] eta: 0:02:04 lr: 0.000030 min_lr: 0.000030 loss: 3.0946 (2.9647) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [289] [ 40/156] eta: 0:01:43 lr: 0.000030 min_lr: 0.000030 loss: 3.0263 (2.9218) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [289] [ 50/156] eta: 0:01:28 lr: 0.000030 min_lr: 0.000030 loss: 2.9952 (2.9236) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [289] [ 60/156] eta: 0:01:16 lr: 0.000029 min_lr: 0.000029 loss: 3.0477 (2.9167) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [289] [ 70/156] eta: 0:01:05 lr: 0.000029 min_lr: 0.000029 loss: 3.0477 (2.9165) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [289] [ 80/156] eta: 0:00:56 lr: 0.000029 min_lr: 0.000029 loss: 3.0076 (2.8888) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [289] [ 90/156] eta: 0:00:47 lr: 0.000028 min_lr: 0.000028 loss: 2.9021 (2.8940) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [289] [100/156] eta: 0:00:39 lr: 0.000028 min_lr: 0.000028 loss: 2.9760 (2.9047) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [289] [110/156] eta: 0:00:32 lr: 0.000028 min_lr: 0.000028 loss: 2.8991 (2.8903) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [289] [120/156] eta: 0:00:24 lr: 0.000027 min_lr: 0.000027 loss: 2.9122 (2.8848) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [289] [130/156] eta: 0:00:17 lr: 0.000027 min_lr: 0.000027 loss: 2.9122 (2.8825) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0012 max mem: 55573 Epoch: [289] [140/156] eta: 0:00:10 lr: 0.000027 min_lr: 0.000027 loss: 2.8368 (2.8846) weight_decay: 0.0500 (0.0500) time: 0.5862 data: 0.0010 max mem: 55573 Epoch: [289] [150/156] eta: 0:00:04 lr: 0.000026 min_lr: 0.000026 loss: 3.0024 (2.8832) weight_decay: 0.0500 (0.0500) time: 0.5822 data: 0.0001 max mem: 55573 Epoch: [289] [155/156] eta: 0:00:00 lr: 0.000026 min_lr: 0.000026 loss: 2.9468 (2.8817) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [289] Total time: 0:01:45 (0.6756 s / it) Averaged stats: lr: 0.000026 min_lr: 0.000026 loss: 2.9468 (2.8537) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.7988 (0.7988) acc1: 84.7656 (84.7656) acc5: 97.2656 (97.2656) time: 6.8551 data: 6.6180 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8995 (0.8808) acc1: 81.3802 (80.8000) acc5: 97.0052 (96.0320) time: 1.5173 data: 1.3237 max mem: 55573 Test: Total time: 0:00:07 (1.5641 s / it) * Acc@1 81.844 Acc@5 96.022 loss 0.867 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.90% Test: [0/5] eta: 0:00:34 loss: 0.9487 (0.9487) acc1: 77.0833 (77.0833) acc5: 93.7500 (93.7500) time: 6.9733 data: 6.7371 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9803 (0.9995) acc1: 74.6094 (72.5760) acc5: 93.7500 (91.4560) time: 1.5400 data: 1.3475 max mem: 55573 Test: Total time: 0:00:07 (1.5645 s / it) * Acc@1 73.224 Acc@5 91.700 loss 1.015 Accuracy of the model EMA on 50000 test images: 73.2% Max EMA accuracy: 73.22% Epoch: [290] [ 0/156] eta: 0:34:02 lr: 0.000026 min_lr: 0.000026 loss: 2.0972 (2.0972) weight_decay: 0.0500 (0.0500) time: 13.0960 data: 11.2859 max mem: 55573 Epoch: [290] [ 10/156] eta: 0:04:12 lr: 0.000026 min_lr: 0.000026 loss: 2.2559 (2.5711) weight_decay: 0.0500 (0.0500) time: 1.7300 data: 1.0264 max mem: 55573 Epoch: [290] [ 20/156] eta: 0:02:41 lr: 0.000026 min_lr: 0.000026 loss: 2.8997 (2.7508) weight_decay: 0.0500 (0.0500) time: 0.5931 data: 0.0005 max mem: 55573 Epoch: [290] [ 30/156] eta: 0:02:05 lr: 0.000025 min_lr: 0.000025 loss: 2.9835 (2.8070) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [290] [ 40/156] eta: 0:01:44 lr: 0.000025 min_lr: 0.000025 loss: 3.0443 (2.8403) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0005 max mem: 55573 Epoch: [290] [ 50/156] eta: 0:01:28 lr: 0.000025 min_lr: 0.000025 loss: 3.0395 (2.8589) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [290] [ 60/156] eta: 0:01:16 lr: 0.000024 min_lr: 0.000024 loss: 3.0842 (2.8842) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [290] [ 70/156] eta: 0:01:06 lr: 0.000024 min_lr: 0.000024 loss: 3.1245 (2.9165) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [290] [ 80/156] eta: 0:00:56 lr: 0.000024 min_lr: 0.000024 loss: 3.0605 (2.9162) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [290] [ 90/156] eta: 0:00:48 lr: 0.000023 min_lr: 0.000023 loss: 2.7673 (2.8780) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [290] [100/156] eta: 0:00:40 lr: 0.000023 min_lr: 0.000023 loss: 2.7673 (2.8760) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [290] [110/156] eta: 0:00:32 lr: 0.000023 min_lr: 0.000023 loss: 3.0256 (2.8713) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.0004 max mem: 55573 Epoch: [290] [120/156] eta: 0:00:25 lr: 0.000022 min_lr: 0.000022 loss: 2.9084 (2.8601) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0004 max mem: 55573 Epoch: [290] [130/156] eta: 0:00:17 lr: 0.000022 min_lr: 0.000022 loss: 2.6264 (2.8512) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0010 max mem: 55573 Epoch: [290] [140/156] eta: 0:00:10 lr: 0.000022 min_lr: 0.000022 loss: 2.5372 (2.8217) weight_decay: 0.0500 (0.0500) time: 0.5859 data: 0.0009 max mem: 55573 Epoch: [290] [150/156] eta: 0:00:04 lr: 0.000022 min_lr: 0.000022 loss: 2.8465 (2.8413) weight_decay: 0.0500 (0.0500) time: 0.5828 data: 0.0001 max mem: 55573 Epoch: [290] [155/156] eta: 0:00:00 lr: 0.000021 min_lr: 0.000021 loss: 2.9433 (2.8471) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [290] Total time: 0:01:45 (0.6771 s / it) Averaged stats: lr: 0.000021 min_lr: 0.000021 loss: 2.9433 (2.8529) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.8424 (0.8424) acc1: 84.7656 (84.7656) acc5: 97.0052 (97.0052) time: 7.1317 data: 6.8937 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9341 (0.9199) acc1: 81.5104 (80.8640) acc5: 97.0052 (95.9680) time: 1.5728 data: 1.3788 max mem: 55573 Test: Total time: 0:00:08 (1.6161 s / it) * Acc@1 81.848 Acc@5 95.966 loss 0.908 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.90% Test: [0/5] eta: 0:00:35 loss: 0.9460 (0.9460) acc1: 77.0833 (77.0833) acc5: 93.7500 (93.7500) time: 7.1119 data: 6.8758 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9776 (0.9967) acc1: 74.8698 (72.6400) acc5: 93.7500 (91.5200) time: 1.5677 data: 1.3753 max mem: 55573 Test: Total time: 0:00:07 (1.5866 s / it) * Acc@1 73.308 Acc@5 91.756 loss 1.012 Accuracy of the model EMA on 50000 test images: 73.3% Max EMA accuracy: 73.31% Epoch: [291] [ 0/156] eta: 0:33:05 lr: 0.000021 min_lr: 0.000021 loss: 2.7024 (2.7024) weight_decay: 0.0500 (0.0500) time: 12.7284 data: 8.9519 max mem: 55573 Epoch: [291] [ 10/156] eta: 0:04:31 lr: 0.000021 min_lr: 0.000021 loss: 2.9492 (2.8120) weight_decay: 0.0500 (0.0500) time: 1.8567 data: 0.8142 max mem: 55573 Epoch: [291] [ 20/156] eta: 0:02:50 lr: 0.000021 min_lr: 0.000021 loss: 2.8525 (2.8054) weight_decay: 0.0500 (0.0500) time: 0.6800 data: 0.0004 max mem: 55573 Epoch: [291] [ 30/156] eta: 0:02:10 lr: 0.000021 min_lr: 0.000021 loss: 3.0332 (2.8431) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [291] [ 40/156] eta: 0:01:47 lr: 0.000020 min_lr: 0.000020 loss: 3.0845 (2.8431) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [291] [ 50/156] eta: 0:01:31 lr: 0.000020 min_lr: 0.000020 loss: 2.9757 (2.8458) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [291] [ 60/156] eta: 0:01:18 lr: 0.000020 min_lr: 0.000020 loss: 2.8873 (2.8316) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [291] [ 70/156] eta: 0:01:07 lr: 0.000019 min_lr: 0.000019 loss: 2.8873 (2.8301) weight_decay: 0.0500 (0.0500) time: 0.5929 data: 0.0005 max mem: 55573 Epoch: [291] [ 80/156] eta: 0:00:58 lr: 0.000019 min_lr: 0.000019 loss: 2.8086 (2.8173) weight_decay: 0.0500 (0.0500) time: 0.5925 data: 0.0005 max mem: 55573 Epoch: [291] [ 90/156] eta: 0:00:49 lr: 0.000019 min_lr: 0.000019 loss: 2.9851 (2.8454) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [291] [100/156] eta: 0:00:40 lr: 0.000019 min_lr: 0.000019 loss: 3.0320 (2.8265) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [291] [110/156] eta: 0:00:32 lr: 0.000018 min_lr: 0.000018 loss: 2.9695 (2.8355) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0005 max mem: 55573 Epoch: [291] [120/156] eta: 0:00:25 lr: 0.000018 min_lr: 0.000018 loss: 2.7717 (2.8188) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [291] [130/156] eta: 0:00:18 lr: 0.000018 min_lr: 0.000018 loss: 2.4876 (2.8157) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0014 max mem: 55573 Epoch: [291] [140/156] eta: 0:00:11 lr: 0.000018 min_lr: 0.000018 loss: 2.4876 (2.7896) weight_decay: 0.0500 (0.0500) time: 0.5868 data: 0.0012 max mem: 55573 Epoch: [291] [150/156] eta: 0:00:04 lr: 0.000017 min_lr: 0.000017 loss: 2.7828 (2.8030) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [291] [155/156] eta: 0:00:00 lr: 0.000017 min_lr: 0.000017 loss: 2.8447 (2.8027) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [291] Total time: 0:01:47 (0.6860 s / it) Averaged stats: lr: 0.000017 min_lr: 0.000017 loss: 2.8447 (2.8537) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:36 loss: 0.7961 (0.7961) acc1: 84.5052 (84.5052) acc5: 97.2656 (97.2656) time: 7.3960 data: 7.1586 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8927 (0.8782) acc1: 81.9010 (80.9600) acc5: 97.0052 (96.0640) time: 1.6257 data: 1.4318 max mem: 55573 Test: Total time: 0:00:08 (1.6678 s / it) * Acc@1 81.890 Acc@5 96.038 loss 0.864 Accuracy of the model on the 50000 test images: 81.9% Max accuracy: 81.90% Test: [0/5] eta: 0:00:34 loss: 0.9429 (0.9429) acc1: 77.2135 (77.2135) acc5: 93.7500 (93.7500) time: 6.9735 data: 6.7371 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9748 (0.9937) acc1: 74.7396 (72.6400) acc5: 93.7500 (91.5520) time: 1.5910 data: 1.3985 max mem: 55573 Test: Total time: 0:00:08 (1.6125 s / it) * Acc@1 73.438 Acc@5 91.808 loss 1.009 Accuracy of the model EMA on 50000 test images: 73.4% Max EMA accuracy: 73.44% Epoch: [292] [ 0/156] eta: 0:33:30 lr: 0.000017 min_lr: 0.000017 loss: 3.3552 (3.3552) weight_decay: 0.0500 (0.0500) time: 12.8871 data: 12.2592 max mem: 55573 Epoch: [292] [ 10/156] eta: 0:04:13 lr: 0.000017 min_lr: 0.000017 loss: 3.1688 (2.9962) weight_decay: 0.0500 (0.0500) time: 1.7383 data: 1.1149 max mem: 55573 Epoch: [292] [ 20/156] eta: 0:02:42 lr: 0.000017 min_lr: 0.000017 loss: 2.8273 (2.8098) weight_decay: 0.0500 (0.0500) time: 0.6076 data: 0.0004 max mem: 55573 Epoch: [292] [ 30/156] eta: 0:02:05 lr: 0.000016 min_lr: 0.000016 loss: 2.7934 (2.8302) weight_decay: 0.0500 (0.0500) time: 0.5917 data: 0.0004 max mem: 55573 Epoch: [292] [ 40/156] eta: 0:01:44 lr: 0.000016 min_lr: 0.000016 loss: 3.0566 (2.8688) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0004 max mem: 55573 Epoch: [292] [ 50/156] eta: 0:01:28 lr: 0.000016 min_lr: 0.000016 loss: 3.0120 (2.8432) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [292] [ 60/156] eta: 0:01:16 lr: 0.000016 min_lr: 0.000016 loss: 2.9895 (2.8602) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [292] [ 70/156] eta: 0:01:06 lr: 0.000015 min_lr: 0.000015 loss: 2.8553 (2.8443) weight_decay: 0.0500 (0.0500) time: 0.5895 data: 0.0005 max mem: 55573 Epoch: [292] [ 80/156] eta: 0:00:56 lr: 0.000015 min_lr: 0.000015 loss: 2.7597 (2.8301) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0005 max mem: 55573 Epoch: [292] [ 90/156] eta: 0:00:48 lr: 0.000015 min_lr: 0.000015 loss: 2.9450 (2.8240) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [292] [100/156] eta: 0:00:40 lr: 0.000015 min_lr: 0.000015 loss: 2.7164 (2.8015) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [292] [110/156] eta: 0:00:32 lr: 0.000014 min_lr: 0.000014 loss: 2.7164 (2.8103) weight_decay: 0.0500 (0.0500) time: 0.5927 data: 0.0005 max mem: 55573 Epoch: [292] [120/156] eta: 0:00:25 lr: 0.000014 min_lr: 0.000014 loss: 3.1511 (2.8291) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0004 max mem: 55573 Epoch: [292] [130/156] eta: 0:00:17 lr: 0.000014 min_lr: 0.000014 loss: 2.9493 (2.8365) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0010 max mem: 55573 Epoch: [292] [140/156] eta: 0:00:10 lr: 0.000014 min_lr: 0.000014 loss: 2.9234 (2.8423) weight_decay: 0.0500 (0.0500) time: 0.5863 data: 0.0009 max mem: 55573 Epoch: [292] [150/156] eta: 0:00:04 lr: 0.000013 min_lr: 0.000013 loss: 3.0988 (2.8602) weight_decay: 0.0500 (0.0500) time: 0.5831 data: 0.0002 max mem: 55573 Epoch: [292] [155/156] eta: 0:00:00 lr: 0.000013 min_lr: 0.000013 loss: 2.8884 (2.8471) weight_decay: 0.0500 (0.0500) time: 0.5825 data: 0.0001 max mem: 55573 Epoch: [292] Total time: 0:01:45 (0.6774 s / it) Averaged stats: lr: 0.000013 min_lr: 0.000013 loss: 2.8884 (2.8553) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.7828 (0.7828) acc1: 84.8958 (84.8958) acc5: 97.2656 (97.2656) time: 6.7270 data: 6.4900 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8845 (0.8684) acc1: 81.5104 (80.8960) acc5: 97.1354 (96.0320) time: 1.4914 data: 1.2981 max mem: 55573 Test: Total time: 0:00:07 (1.5295 s / it) * Acc@1 81.888 Acc@5 96.022 loss 0.854 Accuracy of the model on the 50000 test images: 81.9% Max accuracy: 81.90% Test: [0/5] eta: 0:00:36 loss: 0.9399 (0.9399) acc1: 77.2135 (77.2135) acc5: 93.8802 (93.8802) time: 7.2509 data: 7.0149 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9719 (0.9907) acc1: 75.0000 (72.7680) acc5: 93.8802 (91.5840) time: 1.5955 data: 1.4031 max mem: 55573 Test: Total time: 0:00:08 (1.6260 s / it) * Acc@1 73.538 Acc@5 91.832 loss 1.006 Accuracy of the model EMA on 50000 test images: 73.5% Max EMA accuracy: 73.54% Epoch: [293] [ 0/156] eta: 0:31:09 lr: 0.000013 min_lr: 0.000013 loss: 3.1664 (3.1664) weight_decay: 0.0500 (0.0500) time: 11.9865 data: 10.7557 max mem: 55573 Epoch: [293] [ 10/156] eta: 0:04:07 lr: 0.000013 min_lr: 0.000013 loss: 3.0679 (2.9283) weight_decay: 0.0500 (0.0500) time: 1.6925 data: 0.9782 max mem: 55573 Epoch: [293] [ 20/156] eta: 0:02:38 lr: 0.000013 min_lr: 0.000013 loss: 3.0037 (2.9609) weight_decay: 0.0500 (0.0500) time: 0.6269 data: 0.0005 max mem: 55573 Epoch: [293] [ 30/156] eta: 0:02:03 lr: 0.000013 min_lr: 0.000013 loss: 2.9378 (2.8642) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [293] [ 40/156] eta: 0:01:42 lr: 0.000012 min_lr: 0.000012 loss: 2.8914 (2.8820) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [293] [ 50/156] eta: 0:01:27 lr: 0.000012 min_lr: 0.000012 loss: 2.8585 (2.8478) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [293] [ 60/156] eta: 0:01:15 lr: 0.000012 min_lr: 0.000012 loss: 2.8590 (2.8530) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [293] [ 70/156] eta: 0:01:05 lr: 0.000012 min_lr: 0.000012 loss: 2.9792 (2.8673) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [293] [ 80/156] eta: 0:00:56 lr: 0.000012 min_lr: 0.000012 loss: 3.0380 (2.8762) weight_decay: 0.0500 (0.0500) time: 0.5923 data: 0.0004 max mem: 55573 Epoch: [293] [ 90/156] eta: 0:00:47 lr: 0.000011 min_lr: 0.000011 loss: 2.8843 (2.8363) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0004 max mem: 55573 Epoch: [293] [100/156] eta: 0:00:39 lr: 0.000011 min_lr: 0.000011 loss: 2.5096 (2.8184) weight_decay: 0.0500 (0.0500) time: 0.5920 data: 0.0004 max mem: 55573 Epoch: [293] [110/156] eta: 0:00:32 lr: 0.000011 min_lr: 0.000011 loss: 2.8958 (2.8242) weight_decay: 0.0500 (0.0500) time: 0.5939 data: 0.0005 max mem: 55573 Epoch: [293] [120/156] eta: 0:00:24 lr: 0.000011 min_lr: 0.000011 loss: 3.0192 (2.8396) weight_decay: 0.0500 (0.0500) time: 0.5957 data: 0.0004 max mem: 55573 Epoch: [293] [130/156] eta: 0:00:17 lr: 0.000011 min_lr: 0.000011 loss: 3.0357 (2.8336) weight_decay: 0.0500 (0.0500) time: 0.5938 data: 0.0011 max mem: 55573 Epoch: [293] [140/156] eta: 0:00:10 lr: 0.000010 min_lr: 0.000010 loss: 2.8001 (2.8309) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0010 max mem: 55573 Epoch: [293] [150/156] eta: 0:00:04 lr: 0.000010 min_lr: 0.000010 loss: 2.6256 (2.8197) weight_decay: 0.0500 (0.0500) time: 0.5820 data: 0.0001 max mem: 55573 Epoch: [293] [155/156] eta: 0:00:00 lr: 0.000010 min_lr: 0.000010 loss: 2.9359 (2.8148) weight_decay: 0.0500 (0.0500) time: 0.5818 data: 0.0001 max mem: 55573 Epoch: [293] Total time: 0:01:45 (0.6748 s / it) Averaged stats: lr: 0.000010 min_lr: 0.000010 loss: 2.9359 (2.8447) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.7408 (0.7408) acc1: 84.6354 (84.6354) acc5: 97.2656 (97.2656) time: 6.9570 data: 6.7197 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8427 (0.8257) acc1: 82.4219 (81.1200) acc5: 97.2656 (96.0960) time: 1.5378 data: 1.3440 max mem: 55573 Test: Total time: 0:00:07 (1.5848 s / it) * Acc@1 81.958 Acc@5 96.046 loss 0.809 Accuracy of the model on the 50000 test images: 82.0% Max accuracy: 81.96% Test: [0/5] eta: 0:00:33 loss: 0.9367 (0.9367) acc1: 77.2135 (77.2135) acc5: 93.8802 (93.8802) time: 6.7995 data: 6.5634 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9690 (0.9875) acc1: 75.3906 (72.8640) acc5: 93.8802 (91.6160) time: 1.5052 data: 1.3128 max mem: 55573 Test: Total time: 0:00:07 (1.5225 s / it) * Acc@1 73.622 Acc@5 91.876 loss 1.003 Accuracy of the model EMA on 50000 test images: 73.6% Max EMA accuracy: 73.62% Epoch: [294] [ 0/156] eta: 0:30:41 lr: 0.000010 min_lr: 0.000010 loss: 2.1934 (2.1934) weight_decay: 0.0500 (0.0500) time: 11.8042 data: 10.5507 max mem: 55573 Epoch: [294] [ 10/156] eta: 0:04:03 lr: 0.000010 min_lr: 0.000010 loss: 2.5740 (2.5848) weight_decay: 0.0500 (0.0500) time: 1.6663 data: 0.9638 max mem: 55573 Epoch: [294] [ 20/156] eta: 0:02:37 lr: 0.000010 min_lr: 0.000010 loss: 2.7381 (2.7451) weight_decay: 0.0500 (0.0500) time: 0.6226 data: 0.0028 max mem: 55573 Epoch: [294] [ 30/156] eta: 0:02:02 lr: 0.000009 min_lr: 0.000009 loss: 2.9648 (2.7495) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0005 max mem: 55573 Epoch: [294] [ 40/156] eta: 0:01:42 lr: 0.000009 min_lr: 0.000009 loss: 2.9194 (2.7514) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [294] [ 50/156] eta: 0:01:27 lr: 0.000009 min_lr: 0.000009 loss: 2.8902 (2.7494) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [294] [ 60/156] eta: 0:01:15 lr: 0.000009 min_lr: 0.000009 loss: 3.0295 (2.7813) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [294] [ 70/156] eta: 0:01:05 lr: 0.000009 min_lr: 0.000009 loss: 3.0529 (2.8079) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [294] [ 80/156] eta: 0:00:56 lr: 0.000009 min_lr: 0.000009 loss: 3.0601 (2.8344) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [294] [ 90/156] eta: 0:00:47 lr: 0.000008 min_lr: 0.000008 loss: 3.0654 (2.8370) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0005 max mem: 55573 Epoch: [294] [100/156] eta: 0:00:39 lr: 0.000008 min_lr: 0.000008 loss: 2.9787 (2.8429) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [294] [110/156] eta: 0:00:32 lr: 0.000008 min_lr: 0.000008 loss: 3.0866 (2.8529) weight_decay: 0.0500 (0.0500) time: 0.5918 data: 0.0005 max mem: 55573 Epoch: [294] [120/156] eta: 0:00:24 lr: 0.000008 min_lr: 0.000008 loss: 3.0026 (2.8445) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0005 max mem: 55573 Epoch: [294] [130/156] eta: 0:00:17 lr: 0.000008 min_lr: 0.000008 loss: 3.0026 (2.8460) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0011 max mem: 55573 Epoch: [294] [140/156] eta: 0:00:10 lr: 0.000008 min_lr: 0.000008 loss: 3.0376 (2.8545) weight_decay: 0.0500 (0.0500) time: 0.5878 data: 0.0009 max mem: 55573 Epoch: [294] [150/156] eta: 0:00:04 lr: 0.000007 min_lr: 0.000007 loss: 3.0376 (2.8525) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [294] [155/156] eta: 0:00:00 lr: 0.000007 min_lr: 0.000007 loss: 3.0042 (2.8550) weight_decay: 0.0500 (0.0500) time: 0.5819 data: 0.0001 max mem: 55573 Epoch: [294] Total time: 0:01:44 (0.6720 s / it) Averaged stats: lr: 0.000007 min_lr: 0.000007 loss: 3.0042 (2.8448) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8175 (0.8175) acc1: 84.7656 (84.7656) acc5: 97.2656 (97.2656) time: 6.9018 data: 6.6644 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9137 (0.8983) acc1: 81.5104 (80.8000) acc5: 97.0052 (96.0320) time: 1.5268 data: 1.3329 max mem: 55573 Test: Total time: 0:00:07 (1.5661 s / it) * Acc@1 81.872 Acc@5 96.010 loss 0.885 Accuracy of the model on the 50000 test images: 81.9% Max accuracy: 81.96% Test: [0/5] eta: 0:00:34 loss: 0.9337 (0.9337) acc1: 77.3438 (77.3438) acc5: 93.8802 (93.8802) time: 6.8451 data: 6.6091 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9662 (0.9845) acc1: 75.3906 (72.8960) acc5: 93.8802 (91.6160) time: 1.5144 data: 1.3219 max mem: 55573 Test: Total time: 0:00:07 (1.5349 s / it) * Acc@1 73.720 Acc@5 91.926 loss 0.999 Accuracy of the model EMA on 50000 test images: 73.7% Max EMA accuracy: 73.72% Epoch: [295] [ 0/156] eta: 0:30:57 lr: 0.000007 min_lr: 0.000007 loss: 2.8298 (2.8298) weight_decay: 0.0500 (0.0500) time: 11.9045 data: 7.7112 max mem: 55573 Epoch: [295] [ 10/156] eta: 0:04:00 lr: 0.000007 min_lr: 0.000007 loss: 2.9205 (2.8992) weight_decay: 0.0500 (0.0500) time: 1.6492 data: 0.7015 max mem: 55573 Epoch: [295] [ 20/156] eta: 0:02:35 lr: 0.000007 min_lr: 0.000007 loss: 2.9603 (2.8803) weight_decay: 0.0500 (0.0500) time: 0.6070 data: 0.0005 max mem: 55573 Epoch: [295] [ 30/156] eta: 0:02:01 lr: 0.000007 min_lr: 0.000007 loss: 2.9223 (2.8307) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.0004 max mem: 55573 Epoch: [295] [ 40/156] eta: 0:01:41 lr: 0.000007 min_lr: 0.000007 loss: 2.9042 (2.8310) weight_decay: 0.0500 (0.0500) time: 0.5944 data: 0.0004 max mem: 55573 Epoch: [295] [ 50/156] eta: 0:01:27 lr: 0.000007 min_lr: 0.000007 loss: 2.9042 (2.8300) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.0004 max mem: 55573 Epoch: [295] [ 60/156] eta: 0:01:15 lr: 0.000006 min_lr: 0.000006 loss: 2.9355 (2.8455) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0004 max mem: 55573 Epoch: [295] [ 70/156] eta: 0:01:05 lr: 0.000006 min_lr: 0.000006 loss: 2.9355 (2.8250) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0004 max mem: 55573 Epoch: [295] [ 80/156] eta: 0:00:55 lr: 0.000006 min_lr: 0.000006 loss: 3.0327 (2.8322) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [295] [ 90/156] eta: 0:00:47 lr: 0.000006 min_lr: 0.000006 loss: 3.0606 (2.8363) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [295] [100/156] eta: 0:00:39 lr: 0.000006 min_lr: 0.000006 loss: 3.0606 (2.8595) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [295] [110/156] eta: 0:00:32 lr: 0.000006 min_lr: 0.000006 loss: 3.0415 (2.8386) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [295] [120/156] eta: 0:00:24 lr: 0.000006 min_lr: 0.000006 loss: 2.8471 (2.8469) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0005 max mem: 55573 Epoch: [295] [130/156] eta: 0:00:17 lr: 0.000005 min_lr: 0.000005 loss: 3.0761 (2.8581) weight_decay: 0.0500 (0.0500) time: 0.5916 data: 0.0010 max mem: 55573 Epoch: [295] [140/156] eta: 0:00:10 lr: 0.000005 min_lr: 0.000005 loss: 3.0761 (2.8585) weight_decay: 0.0500 (0.0500) time: 0.5858 data: 0.0008 max mem: 55573 Epoch: [295] [150/156] eta: 0:00:04 lr: 0.000005 min_lr: 0.000005 loss: 2.9832 (2.8604) weight_decay: 0.0500 (0.0500) time: 0.5815 data: 0.0001 max mem: 55573 Epoch: [295] [155/156] eta: 0:00:00 lr: 0.000005 min_lr: 0.000005 loss: 3.0793 (2.8625) weight_decay: 0.0500 (0.0500) time: 0.5812 data: 0.0001 max mem: 55573 Epoch: [295] Total time: 0:01:44 (0.6713 s / it) Averaged stats: lr: 0.000005 min_lr: 0.000005 loss: 3.0793 (2.8447) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8398 (0.8398) acc1: 84.5052 (84.5052) acc5: 97.0052 (97.0052) time: 6.6415 data: 6.4045 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9391 (0.9234) acc1: 80.8594 (80.6720) acc5: 96.8750 (95.9040) time: 1.4737 data: 1.2810 max mem: 55573 Test: Total time: 0:00:07 (1.4941 s / it) * Acc@1 81.816 Acc@5 95.898 loss 0.910 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.96% Test: [0/5] eta: 0:00:36 loss: 0.9307 (0.9307) acc1: 77.3438 (77.3438) acc5: 93.8802 (93.8802) time: 7.3595 data: 7.1235 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9632 (0.9815) acc1: 75.3906 (72.9280) acc5: 93.8802 (91.6160) time: 1.6172 data: 1.4248 max mem: 55573 Test: Total time: 0:00:08 (1.6436 s / it) * Acc@1 73.824 Acc@5 91.966 loss 0.996 Accuracy of the model EMA on 50000 test images: 73.8% Max EMA accuracy: 73.82% Epoch: [296] [ 0/156] eta: 0:32:43 lr: 0.000005 min_lr: 0.000005 loss: 2.9076 (2.9076) weight_decay: 0.0500 (0.0500) time: 12.5873 data: 10.3984 max mem: 55573 Epoch: [296] [ 10/156] eta: 0:04:07 lr: 0.000005 min_lr: 0.000005 loss: 2.9076 (2.8473) weight_decay: 0.0500 (0.0500) time: 1.6922 data: 0.9457 max mem: 55573 Epoch: [296] [ 20/156] eta: 0:02:38 lr: 0.000005 min_lr: 0.000005 loss: 3.0570 (2.9805) weight_decay: 0.0500 (0.0500) time: 0.5963 data: 0.0004 max mem: 55573 Epoch: [296] [ 30/156] eta: 0:02:03 lr: 0.000005 min_lr: 0.000005 loss: 2.9175 (2.8956) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [296] [ 40/156] eta: 0:01:42 lr: 0.000005 min_lr: 0.000005 loss: 2.7863 (2.8858) weight_decay: 0.0500 (0.0500) time: 0.5924 data: 0.0005 max mem: 55573 Epoch: [296] [ 50/156] eta: 0:01:27 lr: 0.000004 min_lr: 0.000004 loss: 2.9910 (2.8860) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [296] [ 60/156] eta: 0:01:15 lr: 0.000004 min_lr: 0.000004 loss: 2.9123 (2.8721) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [296] [ 70/156] eta: 0:01:05 lr: 0.000004 min_lr: 0.000004 loss: 2.8518 (2.8483) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0004 max mem: 55573 Epoch: [296] [ 80/156] eta: 0:00:56 lr: 0.000004 min_lr: 0.000004 loss: 2.7960 (2.8551) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0005 max mem: 55573 Epoch: [296] [ 90/156] eta: 0:00:47 lr: 0.000004 min_lr: 0.000004 loss: 2.8421 (2.8562) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [296] [100/156] eta: 0:00:39 lr: 0.000004 min_lr: 0.000004 loss: 2.9746 (2.8466) weight_decay: 0.0500 (0.0500) time: 0.5915 data: 0.0004 max mem: 55573 Epoch: [296] [110/156] eta: 0:00:32 lr: 0.000004 min_lr: 0.000004 loss: 3.0020 (2.8633) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0005 max mem: 55573 Epoch: [296] [120/156] eta: 0:00:24 lr: 0.000004 min_lr: 0.000004 loss: 3.0276 (2.8741) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0005 max mem: 55573 Epoch: [296] [130/156] eta: 0:00:17 lr: 0.000004 min_lr: 0.000004 loss: 2.7416 (2.8525) weight_decay: 0.0500 (0.0500) time: 0.5894 data: 0.0013 max mem: 55573 Epoch: [296] [140/156] eta: 0:00:10 lr: 0.000003 min_lr: 0.000003 loss: 2.9587 (2.8688) weight_decay: 0.0500 (0.0500) time: 0.5856 data: 0.0011 max mem: 55573 Epoch: [296] [150/156] eta: 0:00:04 lr: 0.000003 min_lr: 0.000003 loss: 3.0616 (2.8664) weight_decay: 0.0500 (0.0500) time: 0.5814 data: 0.0001 max mem: 55573 Epoch: [296] [155/156] eta: 0:00:00 lr: 0.000003 min_lr: 0.000003 loss: 3.0616 (2.8744) weight_decay: 0.0500 (0.0500) time: 0.5816 data: 0.0001 max mem: 55573 Epoch: [296] Total time: 0:01:45 (0.6738 s / it) Averaged stats: lr: 0.000003 min_lr: 0.000003 loss: 3.0616 (2.8562) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:34 loss: 0.8350 (0.8350) acc1: 84.7656 (84.7656) acc5: 97.2656 (97.2656) time: 6.9584 data: 6.7195 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9331 (0.9170) acc1: 81.2500 (80.9280) acc5: 96.8750 (95.9680) time: 1.5383 data: 1.3440 max mem: 55573 Test: Total time: 0:00:07 (1.5860 s / it) * Acc@1 81.832 Acc@5 95.970 loss 0.903 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.96% Test: [0/5] eta: 0:00:34 loss: 0.9274 (0.9274) acc1: 77.4740 (77.4740) acc5: 93.8802 (93.8802) time: 6.9398 data: 6.7037 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9601 (0.9782) acc1: 75.5208 (73.0240) acc5: 93.8802 (91.6480) time: 1.5333 data: 1.3408 max mem: 55573 Test: Total time: 0:00:07 (1.5578 s / it) * Acc@1 73.920 Acc@5 92.020 loss 0.993 Accuracy of the model EMA on 50000 test images: 73.9% Max EMA accuracy: 73.92% Epoch: [297] [ 0/156] eta: 0:30:27 lr: 0.000003 min_lr: 0.000003 loss: 3.3890 (3.3890) weight_decay: 0.0500 (0.0500) time: 11.7165 data: 8.3041 max mem: 55573 Epoch: [297] [ 10/156] eta: 0:04:05 lr: 0.000003 min_lr: 0.000003 loss: 3.0997 (3.0449) weight_decay: 0.0500 (0.0500) time: 1.6789 data: 0.7553 max mem: 55573 Epoch: [297] [ 20/156] eta: 0:02:37 lr: 0.000003 min_lr: 0.000003 loss: 2.9268 (2.8493) weight_decay: 0.0500 (0.0500) time: 0.6336 data: 0.0005 max mem: 55573 Epoch: [297] [ 30/156] eta: 0:02:03 lr: 0.000003 min_lr: 0.000003 loss: 2.9268 (2.8785) weight_decay: 0.0500 (0.0500) time: 0.5919 data: 0.0005 max mem: 55573 Epoch: [297] [ 40/156] eta: 0:01:42 lr: 0.000003 min_lr: 0.000003 loss: 3.1059 (2.9361) weight_decay: 0.0500 (0.0500) time: 0.5913 data: 0.0005 max mem: 55573 Epoch: [297] [ 50/156] eta: 0:01:27 lr: 0.000003 min_lr: 0.000003 loss: 2.9491 (2.8898) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [297] [ 60/156] eta: 0:01:15 lr: 0.000003 min_lr: 0.000003 loss: 2.8951 (2.8834) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [297] [ 70/156] eta: 0:01:05 lr: 0.000003 min_lr: 0.000003 loss: 2.9032 (2.8841) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [297] [ 80/156] eta: 0:00:56 lr: 0.000003 min_lr: 0.000003 loss: 3.0608 (2.8876) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [297] [ 90/156] eta: 0:00:47 lr: 0.000002 min_lr: 0.000002 loss: 2.9876 (2.8791) weight_decay: 0.0500 (0.0500) time: 0.5911 data: 0.0004 max mem: 55573 Epoch: [297] [100/156] eta: 0:00:39 lr: 0.000002 min_lr: 0.000002 loss: 2.9018 (2.8710) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0004 max mem: 55573 Epoch: [297] [110/156] eta: 0:00:32 lr: 0.000002 min_lr: 0.000002 loss: 2.7992 (2.8557) weight_decay: 0.0500 (0.0500) time: 0.5901 data: 0.0004 max mem: 55573 Epoch: [297] [120/156] eta: 0:00:24 lr: 0.000002 min_lr: 0.000002 loss: 2.9701 (2.8668) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0004 max mem: 55573 Epoch: [297] [130/156] eta: 0:00:17 lr: 0.000002 min_lr: 0.000002 loss: 2.9675 (2.8693) weight_decay: 0.0500 (0.0500) time: 0.5909 data: 0.0010 max mem: 55573 Epoch: [297] [140/156] eta: 0:00:10 lr: 0.000002 min_lr: 0.000002 loss: 2.9558 (2.8775) weight_decay: 0.0500 (0.0500) time: 0.5879 data: 0.0009 max mem: 55573 Epoch: [297] [150/156] eta: 0:00:04 lr: 0.000002 min_lr: 0.000002 loss: 2.9080 (2.8565) weight_decay: 0.0500 (0.0500) time: 0.5835 data: 0.0001 max mem: 55573 Epoch: [297] [155/156] eta: 0:00:00 lr: 0.000002 min_lr: 0.000002 loss: 2.8371 (2.8540) weight_decay: 0.0500 (0.0500) time: 0.5833 data: 0.0001 max mem: 55573 Epoch: [297] Total time: 0:01:45 (0.6735 s / it) Averaged stats: lr: 0.000002 min_lr: 0.000002 loss: 2.8371 (2.8571) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:35 loss: 0.7598 (0.7598) acc1: 84.6354 (84.6354) acc5: 97.2656 (97.2656) time: 7.0852 data: 6.8478 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.8607 (0.8456) acc1: 82.1615 (81.0560) acc5: 96.8750 (96.0320) time: 1.5636 data: 1.3696 max mem: 55573 Test: Total time: 0:00:08 (1.6068 s / it) * Acc@1 81.966 Acc@5 96.024 loss 0.830 Accuracy of the model on the 50000 test images: 82.0% Max accuracy: 81.97% Test: [0/5] eta: 0:00:33 loss: 0.9244 (0.9244) acc1: 77.4740 (77.4740) acc5: 93.8802 (93.8802) time: 6.7108 data: 6.4748 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9574 (0.9752) acc1: 75.5208 (72.9600) acc5: 93.8802 (91.7120) time: 1.4875 data: 1.2951 max mem: 55573 Test: Total time: 0:00:07 (1.5027 s / it) * Acc@1 74.014 Acc@5 92.078 loss 0.989 Accuracy of the model EMA on 50000 test images: 74.0% Max EMA accuracy: 74.01% Epoch: [298] [ 0/156] eta: 0:36:39 lr: 0.000002 min_lr: 0.000002 loss: 3.1110 (3.1110) weight_decay: 0.0500 (0.0500) time: 14.0979 data: 13.5134 max mem: 55573 Epoch: [298] [ 10/156] eta: 0:04:26 lr: 0.000002 min_lr: 0.000002 loss: 2.6630 (2.6563) weight_decay: 0.0500 (0.0500) time: 1.8227 data: 1.2288 max mem: 55573 Epoch: [298] [ 20/156] eta: 0:02:47 lr: 0.000002 min_lr: 0.000002 loss: 2.5014 (2.6700) weight_decay: 0.0500 (0.0500) time: 0.5921 data: 0.0003 max mem: 55573 Epoch: [298] [ 30/156] eta: 0:02:09 lr: 0.000002 min_lr: 0.000002 loss: 2.9936 (2.7282) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0004 max mem: 55573 Epoch: [298] [ 40/156] eta: 0:01:46 lr: 0.000002 min_lr: 0.000002 loss: 3.0350 (2.7538) weight_decay: 0.0500 (0.0500) time: 0.5914 data: 0.0005 max mem: 55573 Epoch: [298] [ 50/156] eta: 0:01:30 lr: 0.000002 min_lr: 0.000002 loss: 3.0350 (2.7672) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [298] [ 60/156] eta: 0:01:18 lr: 0.000002 min_lr: 0.000002 loss: 2.9931 (2.7770) weight_decay: 0.0500 (0.0500) time: 0.5908 data: 0.0005 max mem: 55573 Epoch: [298] [ 70/156] eta: 0:01:07 lr: 0.000002 min_lr: 0.000002 loss: 2.8794 (2.7603) weight_decay: 0.0500 (0.0500) time: 0.5907 data: 0.0005 max mem: 55573 Epoch: [298] [ 80/156] eta: 0:00:57 lr: 0.000002 min_lr: 0.000002 loss: 2.9347 (2.7858) weight_decay: 0.0500 (0.0500) time: 0.5905 data: 0.0005 max mem: 55573 Epoch: [298] [ 90/156] eta: 0:00:48 lr: 0.000002 min_lr: 0.000002 loss: 3.0463 (2.8106) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0005 max mem: 55573 Epoch: [298] [100/156] eta: 0:00:40 lr: 0.000001 min_lr: 0.000001 loss: 3.0166 (2.8102) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [298] [110/156] eta: 0:00:32 lr: 0.000001 min_lr: 0.000001 loss: 2.9596 (2.8126) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [298] [120/156] eta: 0:00:25 lr: 0.000001 min_lr: 0.000001 loss: 3.0473 (2.8286) weight_decay: 0.0500 (0.0500) time: 0.5906 data: 0.0004 max mem: 55573 Epoch: [298] [130/156] eta: 0:00:18 lr: 0.000001 min_lr: 0.000001 loss: 3.0072 (2.8131) weight_decay: 0.0500 (0.0500) time: 0.5900 data: 0.0011 max mem: 55573 Epoch: [298] [140/156] eta: 0:00:10 lr: 0.000001 min_lr: 0.000001 loss: 2.6989 (2.8125) weight_decay: 0.0500 (0.0500) time: 0.5861 data: 0.0009 max mem: 55573 Epoch: [298] [150/156] eta: 0:00:04 lr: 0.000001 min_lr: 0.000001 loss: 2.9530 (2.8272) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [298] [155/156] eta: 0:00:00 lr: 0.000001 min_lr: 0.000001 loss: 3.0692 (2.8344) weight_decay: 0.0500 (0.0500) time: 0.5817 data: 0.0001 max mem: 55573 Epoch: [298] Total time: 0:01:46 (0.6829 s / it) Averaged stats: lr: 0.000001 min_lr: 0.000001 loss: 3.0692 (2.8486) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:33 loss: 0.8633 (0.8633) acc1: 84.8958 (84.8958) acc5: 97.2656 (97.2656) time: 6.6935 data: 6.4565 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9571 (0.9422) acc1: 81.3802 (80.8000) acc5: 97.1354 (96.0640) time: 1.4851 data: 1.2914 max mem: 55573 Test: Total time: 0:00:07 (1.5400 s / it) * Acc@1 81.784 Acc@5 96.028 loss 0.929 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.97% Test: [0/5] eta: 0:00:35 loss: 0.9210 (0.9210) acc1: 77.6042 (77.6042) acc5: 94.0104 (94.0104) time: 7.0315 data: 6.7951 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9542 (0.9717) acc1: 75.5208 (72.9600) acc5: 94.0104 (91.9040) time: 1.5516 data: 1.3591 max mem: 55573 Test: Total time: 0:00:07 (1.5677 s / it) * Acc@1 74.100 Acc@5 92.128 loss 0.986 Accuracy of the model EMA on 50000 test images: 74.1% Max EMA accuracy: 74.10% Epoch: [299] [ 0/156] eta: 0:31:59 lr: 0.000001 min_lr: 0.000001 loss: 3.1710 (3.1710) weight_decay: 0.0500 (0.0500) time: 12.3018 data: 9.0384 max mem: 55573 Epoch: [299] [ 10/156] eta: 0:04:02 lr: 0.000001 min_lr: 0.000001 loss: 2.9251 (2.8009) weight_decay: 0.0500 (0.0500) time: 1.6597 data: 0.8221 max mem: 55573 Epoch: [299] [ 20/156] eta: 0:02:36 lr: 0.000001 min_lr: 0.000001 loss: 2.8581 (2.7667) weight_decay: 0.0500 (0.0500) time: 0.5928 data: 0.0005 max mem: 55573 Epoch: [299] [ 30/156] eta: 0:02:02 lr: 0.000001 min_lr: 0.000001 loss: 2.9887 (2.8109) weight_decay: 0.0500 (0.0500) time: 0.5912 data: 0.0005 max mem: 55573 Epoch: [299] [ 40/156] eta: 0:01:41 lr: 0.000001 min_lr: 0.000001 loss: 2.9588 (2.7911) weight_decay: 0.0500 (0.0500) time: 0.5904 data: 0.0005 max mem: 55573 Epoch: [299] [ 50/156] eta: 0:01:27 lr: 0.000001 min_lr: 0.000001 loss: 2.9431 (2.8041) weight_decay: 0.0500 (0.0500) time: 0.5896 data: 0.0005 max mem: 55573 Epoch: [299] [ 60/156] eta: 0:01:15 lr: 0.000001 min_lr: 0.000001 loss: 2.9431 (2.7788) weight_decay: 0.0500 (0.0500) time: 0.5910 data: 0.0005 max mem: 55573 Epoch: [299] [ 70/156] eta: 0:01:05 lr: 0.000001 min_lr: 0.000001 loss: 2.8625 (2.7956) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0005 max mem: 55573 Epoch: [299] [ 80/156] eta: 0:00:55 lr: 0.000001 min_lr: 0.000001 loss: 2.8719 (2.8019) weight_decay: 0.0500 (0.0500) time: 0.5899 data: 0.0004 max mem: 55573 Epoch: [299] [ 90/156] eta: 0:00:47 lr: 0.000001 min_lr: 0.000001 loss: 2.7964 (2.7788) weight_decay: 0.0500 (0.0500) time: 0.5902 data: 0.0004 max mem: 55573 Epoch: [299] [100/156] eta: 0:00:39 lr: 0.000001 min_lr: 0.000001 loss: 2.7309 (2.7743) weight_decay: 0.0500 (0.0500) time: 0.5903 data: 0.0004 max mem: 55573 Epoch: [299] [110/156] eta: 0:00:32 lr: 0.000001 min_lr: 0.000001 loss: 2.7309 (2.7686) weight_decay: 0.0500 (0.0500) time: 0.5939 data: 0.0004 max mem: 55573 Epoch: [299] [120/156] eta: 0:00:24 lr: 0.000001 min_lr: 0.000001 loss: 2.9476 (2.7835) weight_decay: 0.0500 (0.0500) time: 0.5930 data: 0.0004 max mem: 55573 Epoch: [299] [130/156] eta: 0:00:17 lr: 0.000001 min_lr: 0.000001 loss: 3.0920 (2.7981) weight_decay: 0.0500 (0.0500) time: 0.5897 data: 0.0010 max mem: 55573 Epoch: [299] [140/156] eta: 0:00:10 lr: 0.000001 min_lr: 0.000001 loss: 3.0089 (2.8152) weight_decay: 0.0500 (0.0500) time: 0.5869 data: 0.0009 max mem: 55573 Epoch: [299] [150/156] eta: 0:00:04 lr: 0.000001 min_lr: 0.000001 loss: 3.0798 (2.8276) weight_decay: 0.0500 (0.0500) time: 0.5830 data: 0.0001 max mem: 55573 Epoch: [299] [155/156] eta: 0:00:00 lr: 0.000001 min_lr: 0.000001 loss: 3.1008 (2.8296) weight_decay: 0.0500 (0.0500) time: 0.5826 data: 0.0001 max mem: 55573 Epoch: [299] Total time: 0:01:44 (0.6717 s / it) Averaged stats: lr: 0.000001 min_lr: 0.000001 loss: 3.1008 (2.8573) weight_decay: 0.0500 (0.0500) Test: [0/5] eta: 0:00:32 loss: 0.8475 (0.8475) acc1: 84.6354 (84.6354) acc5: 97.0052 (97.0052) time: 6.5963 data: 6.3595 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9444 (0.9285) acc1: 81.1198 (80.7360) acc5: 97.0052 (95.9680) time: 1.4646 data: 1.2720 max mem: 55573 Test: Total time: 0:00:07 (1.4876 s / it) * Acc@1 81.800 Acc@5 95.938 loss 0.915 Accuracy of the model on the 50000 test images: 81.8% Max accuracy: 81.97% Test: [0/5] eta: 0:00:35 loss: 0.9177 (0.9177) acc1: 77.4740 (77.4740) acc5: 94.0104 (94.0104) time: 7.1367 data: 6.9008 max mem: 55573 Test: [4/5] eta: 0:00:01 loss: 0.9512 (0.9685) acc1: 75.6510 (73.0880) acc5: 94.0104 (91.9680) time: 1.5727 data: 1.3802 max mem: 55573 Test: Total time: 0:00:07 (1.5884 s / it) * Acc@1 74.194 Acc@5 92.164 loss 0.982 Accuracy of the model EMA on 50000 test images: 74.2% Max EMA accuracy: 74.19% Training time 10:21:37